1 #include "EXTERN.h"
2 #include "perl.h"
3 #include "XSUB.h"
4
5 #define NEED_newRV_noinc
6 #define NEED_sv_2pv_flags
7 #include "ppport.h"
8 #include "utils.h"
9
10 //#define DOM_GC_TRACE(msg, ...) fprintf(stderr, "[GC] " msg "\n", ##__VA_ARGS__);
11 #define DOM_GC_TRACE(...)
12
13 // HACK: sv_derived_from_pvn faster than sv_derived_from
14 #if PERL_BCDVERSION > 0x5015004
15 #undef sv_derived_from
16 #define sv_derived_from(sv, name) sv_derived_from_pvn(sv, name, sizeof(name) - 1, 0)
17 #else
18 #define sv_derived_from_pvn(sv, name, len) sv_derived_from(sv, name)
19 #endif
20
21 // HACK: support older perl <5.6 (why not :D)
22 #if PERL_BCDVERSION < 0x5006000
23 #define SvUTF8(x) (0)
24 #define SvUTF8_on(x)
25 #define SvUTF8_off(x)
26 #endif
27
28 #define sub_croak(cv, msg, ...) do { \
29 const GV *const __gv = CvGV(cv); \
30 if (__gv) { \
31 const char *__gvname = GvNAME(__gv); \
32 const HV *__stash = GvSTASH(__gv); \
33 const char *__hvname = __stash ? HvNAME(__stash) : NULL; \
34 croak("%s%s%s(): " msg, __hvname ? __hvname : __gvname, __hvname ? "::" : "", __hvname ? __gvname : "", ##__VA_ARGS__); \
35 } \
36 } while (0);
37
38 typedef html5_dom_parser_t * HTML5__DOM;
39 typedef myhtml_collection_t * HTML5__DOM__Collection;
40 typedef myhtml_tree_node_t * HTML5__DOM__Node;
41 typedef myhtml_tree_node_t * HTML5__DOM__Element;
42 typedef myhtml_tree_node_t * HTML5__DOM__Text;
43 typedef myhtml_tree_node_t * HTML5__DOM__Comment;
44 typedef myhtml_tree_node_t * HTML5__DOM__Document;
45 typedef myhtml_tree_node_t * HTML5__DOM__Fragment;
46 typedef myhtml_tree_node_t * HTML5__DOM__DocType;
47 typedef html5_dom_tree_t * HTML5__DOM__Tree;
48 typedef html5_css_parser_t * HTML5__DOM__CSS;
49 typedef html5_css_selector_t * HTML5__DOM__CSS__Selector;
50 typedef html5_css_selector_entry_t * HTML5__DOM__CSS__Selector__Entry;
51 typedef html5_dom_async_result * HTML5__DOM__AsyncResult;
52
sv_serialization_callback(const char * data,size_t len,void * ctx)53 static mystatus_t sv_serialization_callback(const char *data, size_t len, void *ctx) {
54 sv_catpvn((SV *) ctx, data, len);
55 return MyCORE_STATUS_OK;
56 }
57
pack_pointer(const char * clazz,void * ptr)58 static inline SV *pack_pointer(const char *clazz, void *ptr) {
59 SV *sv = newSV(0);
60 sv_setref_pv(sv, clazz, ptr);
61 return sv;
62 }
63
html5_dom_recursive_node_text(myhtml_tree_node_t * node,SV * sv)64 static void html5_dom_recursive_node_text(myhtml_tree_node_t *node, SV *sv) {
65 node = myhtml_node_child(node);
66 while (node) {
67 if (node->tag_id == MyHTML_TAG__TEXT) {
68 size_t text_len = 0;
69 const char *text = myhtml_node_text(node, &text_len);
70 if (text_len)
71 sv_catpvn(sv, text, text_len);
72 } else if (node_is_element(node)) {
73 html5_dom_recursive_node_text(node, sv);
74 }
75 node = myhtml_node_next(node);
76 }
77 }
78
create_tree_object(myhtml_tree_t * tree,SV * parent,html5_dom_parser_t * parser,bool used,bool utf8)79 static SV *create_tree_object(myhtml_tree_t *tree, SV *parent, html5_dom_parser_t *parser, bool used, bool utf8) {
80 html5_dom_tree_t *tree_obj = (html5_dom_tree_t *) tree->context;
81
82 if (tree_obj)
83 return newRV(tree_obj->sv);
84
85 tree->context = safemalloc(sizeof(html5_dom_tree_t));
86 tree_obj = (html5_dom_tree_t *) tree->context;
87
88 tree_obj->tree = tree;
89 tree_obj->parent = parent;
90 tree_obj->parser = parser;
91 tree_obj->fragment_tag_id = MyHTML_TAG__UNDEF;
92 tree_obj->used = used;
93 tree_obj->utf8 = utf8;
94
95 SvREFCNT_inc(parent);
96
97 SV *sv = pack_pointer("HTML5::DOM::Tree", tree_obj);
98 tree_obj->sv = SvRV(sv);
99
100 DOM_GC_TRACE("DOM::Tree::NEW (refcnt=%d)", SvREFCNT(sv));
101
102 return sv;
103 }
104
get_node_class(myhtml_tree_node_t * node)105 static inline const char *get_node_class(myhtml_tree_node_t *node) {
106 html5_dom_tree_t *context = (html5_dom_tree_t *) node->tree->context;
107
108 if (node->tag_id != MyHTML_TAG__UNDEF) {
109 if (node->tag_id == MyHTML_TAG__TEXT) {
110 return "HTML5::DOM::Text";
111 } else if (node->tag_id == MyHTML_TAG__COMMENT) {
112 return "HTML5::DOM::Comment";
113 } else if (node->tag_id == MyHTML_TAG__DOCTYPE) {
114 return "HTML5::DOM::DocType";
115 } else if (context->fragment_tag_id && node->tag_id == context->fragment_tag_id) {
116 return "HTML5::DOM::Fragment";
117 }
118 return "HTML5::DOM::Element";
119 }
120
121 // Modest myhtml bug - document node has tag_id == MyHTML_TAG__UNDEF
122 if (node_is_document(node))
123 return "HTML5::DOM::Document";
124
125 return "HTML5::DOM::Node";
126 }
127
newSVpv_utf8_auto(myhtml_tree_t * tree,const char * value,STRLEN length)128 static inline SV *newSVpv_utf8_auto(myhtml_tree_t *tree, const char *value, STRLEN length) {
129 html5_dom_tree_t *context = (html5_dom_tree_t *) tree->context;
130 if (!context || !context->utf8) {
131 return newSVpv(value, length);
132 } else {
133 SV *sv = newSVpv(value, length);
134 SvUTF8_on(sv);
135 return sv;
136 }
137 }
138
newSVpv_utf8_auto_css(html5_css_selector_t * selector,const char * value,STRLEN length)139 static inline SV *newSVpv_utf8_auto_css(html5_css_selector_t *selector, const char *value, STRLEN length) {
140 if (!selector || !selector->utf8) {
141 return newSVpv(value, length);
142 } else {
143 SV *sv = newSVpv(value, length);
144 SvUTF8_on(sv);
145 return sv;
146 }
147 }
148
tree_to_sv(myhtml_tree_t * tree)149 static SV *tree_to_sv(myhtml_tree_t *tree) {
150 html5_dom_tree_t *context = (html5_dom_tree_t *) tree->context;
151 return newRV(context->sv);
152 }
153
myhtml_to_sv(myhtml_tree_t * tree)154 static SV *myhtml_to_sv(myhtml_tree_t *tree) {
155 html5_dom_tree_t *context = (html5_dom_tree_t *) tree->context;
156 return newRV(context->parent);
157 }
158
node_to_sv(myhtml_tree_node_t * node)159 static SV *node_to_sv(myhtml_tree_node_t *node) {
160 if (!node)
161 return &PL_sv_undef;
162
163 SV *sv = (SV *) myhtml_node_get_data(node);
164 if (!sv) {
165 SV *node_ref = pack_pointer(get_node_class(node), (void *) node);
166 sv = SvRV(node_ref);
167 myhtml_node_set_data(node, (void *) sv);
168
169 DOM_GC_TRACE("DOM::Node::NEW (new refcnt=%d)", SvREFCNT(sv));
170
171 html5_dom_tree_t *tree = (html5_dom_tree_t *) node->tree->context;
172 SvREFCNT_inc(tree->sv);
173
174 return node_ref;
175 } else {
176 SV *node_ref = newRV(sv);
177 DOM_GC_TRACE("DOM::Node::NEW (reuse refcnt=%d)", SvREFCNT(sv));
178 return node_ref;
179 }
180 }
181
collection_to_blessed_array(myhtml_collection_t * collection)182 static SV *collection_to_blessed_array(myhtml_collection_t *collection) {
183 AV *arr = newAV();
184 if (collection) {
185 for (int i = 0; i < collection->length; ++i)
186 av_push(arr, node_to_sv(collection->list[i]));
187 }
188 return sv_bless(newRV_noinc((SV *) arr), gv_stashpv("HTML5::DOM::Collection", 0));
189 }
190
sv_stringify(SV * sv)191 static SV *sv_stringify(SV *sv) {
192 if (SvROK(sv)) {
193 SV *tmp_sv = SvRV(sv);
194 if (SvOBJECT(tmp_sv)) {
195 HV *stash = SvSTASH(tmp_sv);
196 GV *to_string = gv_fetchmethod_autoload(stash, "\x28\x22\x22", 0);
197
198 if (to_string) {
199 dSP;
200 ENTER; SAVETMPS; PUSHMARK(SP);
201 XPUSHs(sv_bless(sv_2mortal(newRV_inc(tmp_sv)), stash));
202 PUTBACK;
203 call_sv((SV *) GvCV(to_string), G_SCALAR);
204 SPAGAIN;
205
206 SV *new_sv = POPs;
207
208 PUTBACK;
209 FREETMPS; LEAVE;
210
211 return new_sv;
212 }
213 }
214 }
215 return sv;
216 }
217
html5_node_find(CV * cv,html5_dom_parser_t * parser,myhtml_tree_node_t * scope,SV * query,SV * combinator,bool one)218 static SV *html5_node_find(CV *cv, html5_dom_parser_t *parser, myhtml_tree_node_t *scope, SV *query, SV *combinator, bool one) {
219 mystatus_t status;
220 mycss_selectors_entries_list_t *list = NULL;
221 size_t list_size = 0;
222 mycss_selectors_list_t *selector = NULL;
223 modest_finder_selector_combinator_f selector_func = modest_finder_node_combinator_descendant;
224 SV *result = &PL_sv_undef;
225
226 // Custom combinator as args
227 if (combinator) {
228 query = sv_stringify(query);
229
230 STRLEN combo_len;
231 const char *combo = SvPV_const(combinator, combo_len);
232
233 if (combo_len > 0)
234 selector_func = html5_find_selector_func(combo, combo_len);
235 }
236
237 if (SvROK(query)) {
238 if (sv_derived_from(query, "HTML5::DOM::CSS::Selector")) { // Precompiler selectors
239 html5_css_selector_t *selector = INT2PTR(html5_css_selector_t *, SvIV((SV*)SvRV(query)));
240 list = selector->list->entries_list;
241 list_size = selector->list->entries_list_length;
242 } else if (sv_derived_from(query, "HTML5::DOM::CSS::Selector::Entry")) { // One precompiled selector
243 html5_css_selector_entry_t *selector = INT2PTR(html5_css_selector_entry_t *, SvIV((SV*)SvRV(query)));
244 list = selector->list;
245 list_size = 1;
246 } else {
247 sub_croak(cv, "%s: %s is not of type %s or %s", "HTML5::DOM::Tree::find", "query", "HTML5::DOM::CSS::Selector", "HTML5::DOM::CSS::Selector::Entry");
248 }
249 } else {
250 // String selector, compile it
251 query = sv_stringify(query);
252
253 STRLEN query_len;
254 const char *query_str = SvPV_const(query, query_len);
255
256 status = html5_dom_init_css(parser);
257 if (status)
258 sub_croak(cv, "mycss_init failed: %d (%s)", status, modest_strerror(status));
259
260 selector = html5_parse_selector(parser->mycss_entry, query_str, query_len, &status);
261
262 if (!selector)
263 sub_croak(cv, "bad selector: %s", query_str);
264
265 list = selector->entries_list;
266 list_size = selector->entries_list_length;
267 }
268
269 if (one) { // search one element
270 myhtml_tree_node_t *node = (myhtml_tree_node_t *) html5_node_finder(parser, selector_func, scope, list, list_size, &status, 1);
271 result = node_to_sv(node);
272 } else { // search multiple elements
273 myhtml_collection_t *collection = (myhtml_collection_t *) html5_node_finder(parser, selector_func, scope, list, list_size, &status, 0);
274 result = collection_to_blessed_array(collection);
275 if (collection)
276 myhtml_collection_destroy(collection);
277 }
278
279 // destroy parsed selector
280 if (selector)
281 mycss_selectors_list_destroy(mycss_entry_selectors(parser->mycss_entry), selector, true);
282
283 return result;
284 }
285
html5_node_simple_find(CV * cv,myhtml_tree_node_t * self,SV * key,SV * val,SV * cmp,bool icase,int ix)286 static SV *html5_node_simple_find(CV *cv, myhtml_tree_node_t *self, SV *key, SV *val, SV *cmp, bool icase, int ix) {
287 if (!self)
288 return collection_to_blessed_array(NULL);
289
290 SV *result = &PL_sv_undef;
291 key = sv_stringify(key);
292
293 STRLEN key_len;
294 const char *key_str = SvPV_const(key, key_len);
295
296 myhtml_collection_t *collection = NULL;
297 switch (ix) {
298 case 0: case 1: // tag name
299 collection = myhtml_get_nodes_by_name_in_scope(self->tree, NULL, self, key_str, key_len, NULL);
300 result = collection_to_blessed_array(collection);
301 break;
302 case 2: case 3: // class
303 collection = myhtml_get_nodes_by_attribute_value_whitespace_separated(self->tree, NULL, self, false, "class", 5, key_str, key_len, NULL);
304 result = collection_to_blessed_array(collection);
305 break;
306 case 4: case 5: // id (first)
307 collection = myhtml_get_nodes_by_attribute_value(self->tree, NULL, self, false, "id", 2, key_str, key_len, NULL);
308 if (collection && collection->length)
309 result = node_to_sv(collection->list[0]);
310 break;
311 case 6: case 7: // attribute
312 if (val) {
313 STRLEN val_len;
314 const char *val_str = SvPV_const(val, val_len);
315
316 char cmp_type = '=';
317 if (cmp) {
318 cmp = sv_stringify(cmp);
319 STRLEN cmp_len;
320 const char *cmp_str = SvPV_const(cmp, cmp_len);
321
322 if (cmp_len)
323 cmp_type = cmp_str[0];
324 }
325
326 if (cmp_type == '=') {
327 // [key=val]
328 collection = myhtml_get_nodes_by_attribute_value(self->tree, NULL, self, icase, key_str, key_len, val_str, val_len, NULL);
329 } else if (cmp_type == '~') {
330 // [key~=val]
331 collection = myhtml_get_nodes_by_attribute_value_whitespace_separated(self->tree, NULL, self, icase, key_str, key_len, val_str, val_len, NULL);
332 } else if (cmp_type == '^') {
333 // [key^=val]
334 collection = myhtml_get_nodes_by_attribute_value_begin(self->tree, NULL, self, icase, key_str, key_len, val_str, val_len, NULL);
335 } else if (cmp_type == '$') {
336 // [key$=val]
337 collection = myhtml_get_nodes_by_attribute_value_end(self->tree, NULL, self, icase, key_str, key_len, val_str, val_len, NULL);
338 } else if (cmp_type == '*') {
339 // [key*=val]
340 collection = myhtml_get_nodes_by_attribute_value_contain(self->tree, NULL, self, icase, key_str, key_len, val_str, val_len, NULL);
341 } else if (cmp_type == '|') {
342 // [key|=val]
343 collection = myhtml_get_nodes_by_attribute_value_hyphen_separated(self->tree, NULL, self, icase, key_str, key_len, val_str, val_len, NULL);
344 } else {
345 sub_croak(cv, "unknown cmp type: %c", cmp_type);
346 }
347 } else {
348 // [key]
349 collection = myhtml_get_nodes_by_attribute_key(self->tree, NULL, self, key_str, key_len, NULL);
350 }
351 result = collection_to_blessed_array(collection);
352 break;
353 }
354
355 if (collection)
356 myhtml_collection_destroy(collection);
357
358 return result;
359 }
360
hv_get_int_value(HV * hv,const char * key,int length,long def)361 static long hv_get_int_value(HV *hv, const char *key, int length, long def) {
362 if (hv) {
363 SV **sv = hv_fetch(hv, key, length, 0);
364 if (sv && *sv)
365 return SvIV(*sv);
366 }
367 return def;
368 }
369
hv_get_encoding_value(HV * hv,const char * key,int length,myencoding_t def)370 static myencoding_t hv_get_encoding_value(HV *hv, const char *key, int length, myencoding_t def) {
371 if (hv) {
372 SV **sv = hv_fetch(hv, key, length, 0);
373 if (sv && *sv) {
374 SV *encoding = sv_stringify(*sv);
375
376 STRLEN enc_length;
377 const char *enc_str = SvPV_const(encoding, enc_length);
378
379 if (enc_length > 0) {
380 myencoding_t enc_id;
381 if (isdigit(enc_str[0])) { // May be encoding id
382 enc_id = SvIV(encoding);
383 if (enc_id == MyENCODING_AUTO || enc_id == MyENCODING_DEFAULT || enc_id == MyENCODING_NOT_DETERMINED)
384 return enc_id;
385 if (!myencoding_name_by_id(enc_id, NULL))
386 return MyENCODING_NOT_DETERMINED;
387 } else { // May be encoding name
388 if (!myencoding_by_name(enc_str, enc_length, &enc_id)) {
389 if (enc_length == 4 && strcasecmp(enc_str, "auto") == 0)
390 return MyENCODING_AUTO;
391 if (enc_length == 7 && strcasecmp(enc_str, "default") == 0)
392 return MyENCODING_DEFAULT;
393 return MyENCODING_NOT_DETERMINED;
394 }
395 }
396 return enc_id;
397 }
398 }
399 }
400 return def;
401 }
402
hv_get_utf8_value(HV * hv,const char * key,int length,int def)403 static int hv_get_utf8_value(HV *hv, const char *key, int length, int def) {
404 if (hv) {
405 SV **sv = hv_fetch(hv, key, length, 0);
406 if (sv && *sv) {
407 SV *encoding = sv_stringify(*sv);
408
409 STRLEN enc_length;
410 const char *enc_str = SvPV_const(encoding, enc_length);
411
412 if (enc_length > 0) {
413 if (isdigit(enc_str[0])) {
414 return SvIV(encoding) != 0;
415 } else if (length == 4 && strcasecmp(enc_str, "auto") == 0) {
416 return 2;
417 }
418 return enc_length > 0;
419 }
420 }
421 }
422 return def;
423 }
424
html5_dom_parse_options(html5_dom_options_t * opts,html5_dom_options_t * extend,HV * options)425 static void html5_dom_parse_options(html5_dom_options_t *opts, html5_dom_options_t *extend, HV *options) {
426 opts->threads = hv_get_int_value(options, "threads", 7, extend ? extend->threads : 0);
427 opts->ignore_whitespace = hv_get_int_value(options, "ignore_whitespace", 17, extend ? extend->ignore_whitespace : 0) > 0;
428 opts->ignore_doctype = hv_get_int_value(options, "ignore_doctype", 14, extend ? extend->ignore_doctype : 0) > 0;
429 opts->scripts = hv_get_int_value(options, "scripts", 7, extend ? extend->scripts : 0) > 0;
430 opts->encoding = hv_get_encoding_value(options, "encoding", 8, extend ? extend->encoding : MyENCODING_AUTO);
431 opts->default_encoding = hv_get_encoding_value(options, "default_encoding", 16, extend ? extend->default_encoding : MyENCODING_DEFAULT);
432 opts->encoding_use_meta = hv_get_int_value(options, "encoding_use_meta", 17, extend ? extend->encoding_use_meta : 1) > 0;
433 opts->encoding_use_bom = hv_get_int_value(options, "encoding_use_bom", 16, extend ? extend->encoding_use_bom : 1) > 0;
434 opts->encoding_prescan_limit = hv_get_int_value(options, "encoding_prescan_limit", 22, extend ? extend->encoding_prescan_limit : 1024);
435 opts->utf8 = hv_get_utf8_value(options, "utf8", 4, extend ? extend->utf8 : 2);
436
437 #ifdef MyCORE_BUILD_WITHOUT_THREADS
438 opts->threads = 0;
439 #endif
440 }
441
html5_dom_check_options(CV * cv,html5_dom_options_t * opts)442 static void html5_dom_check_options(CV *cv, html5_dom_options_t *opts) {
443 if (opts->encoding == MyENCODING_NOT_DETERMINED)
444 sub_croak(cv, "invalid encoding value");
445 if (opts->default_encoding == MyENCODING_NOT_DETERMINED || opts->default_encoding == MyENCODING_AUTO)
446 sub_croak(cv, "invalid default_encoding value");
447 if (opts->threads < 0)
448 sub_croak(cv, "invalid threads count");
449 if (opts->encoding_prescan_limit < 0)
450 sub_croak(cv, "invalid encoding_prescan_limit value");
451 }
452
453 // selectors to AST serialization
454 static void html5_dom_css_serialize_entry(html5_css_selector_t *self, mycss_selectors_list_t *selector, mycss_selectors_entry_t *entry, AV *result);
455
html5_dom_css_serialize_selector(html5_css_selector_t * self,mycss_selectors_list_t * selector,AV * result)456 static void html5_dom_css_serialize_selector(html5_css_selector_t *self, mycss_selectors_list_t *selector, AV *result) {
457 while (selector) {
458 for (size_t i = 0; i < selector->entries_list_length; ++i) {
459 mycss_selectors_entries_list_t *entries = &selector->entries_list[i];
460 AV *chain = newAV();
461 html5_dom_css_serialize_entry(self, selector, entries->entry, chain);
462 av_push(result, newRV_noinc((SV *) chain));
463 }
464 selector = selector->next;
465 }
466 }
467
html5_dom_css_serialize_entry(html5_css_selector_t * self,mycss_selectors_list_t * selector,mycss_selectors_entry_t * entry,AV * result)468 static void html5_dom_css_serialize_entry(html5_css_selector_t *self, mycss_selectors_list_t *selector, mycss_selectors_entry_t *entry, AV *result) {
469 // combinators names
470 static const struct {
471 const char name[16];
472 size_t len;
473 } combinators[] = {
474 {"", 0},
475 {"descendant", 10}, // >>
476 {"child", 5}, // >
477 {"sibling", 7}, // +
478 {"adjacent", 8}, // ~
479 {"column", 6} // ||
480 };
481
482 // attribute eq names
483 static const struct {
484 const char name[16];
485 size_t len;
486 } attr_match_names[] = {
487 {"equal", 5}, // =
488 {"include", 7}, // ~=
489 {"dash", 4}, // |=
490 {"prefix", 6}, // ^=
491 {"suffix", 6}, // $=
492 {"substring", 9} // *=
493 };
494
495 while (entry) {
496 if (entry->combinator != MyCSS_SELECTORS_COMBINATOR_UNDEF) {
497 HV *data = newHV();
498 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "type", 4)), newSVpv_utf8_auto_css(self, "combinator", 10), 0);
499 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "value", 5)), newSVpv_utf8_auto_css(self, combinators[entry->combinator].name, combinators[entry->combinator].len), 0);
500 av_push(result, newRV_noinc((SV *) data));
501 }
502
503 HV *data = newHV();
504
505 if ((selector->flags) & MyCSS_SELECTORS_FLAGS_SELECTOR_BAD)
506 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "invalid", 7)), newSViv(1), 0);
507
508 switch (entry->type) {
509 case MyCSS_SELECTORS_TYPE_ID:
510 case MyCSS_SELECTORS_TYPE_CLASS:
511 case MyCSS_SELECTORS_TYPE_ELEMENT:
512 case MyCSS_SELECTORS_TYPE_PSEUDO_CLASS:
513 case MyCSS_SELECTORS_TYPE_PSEUDO_ELEMENT:
514 {
515 switch (entry->type) {
516 case MyCSS_SELECTORS_TYPE_ELEMENT:
517 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "type", 4)), newSVpv_utf8_auto_css(self, "tag", 3), 0);
518 break;
519 case MyCSS_SELECTORS_TYPE_ID:
520 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "type", 4)), newSVpv_utf8_auto_css(self, "id", 2), 0);
521 break;
522 case MyCSS_SELECTORS_TYPE_CLASS:
523 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "type", 4)), newSVpv_utf8_auto_css(self, "class", 5), 0);
524 break;
525 case MyCSS_SELECTORS_TYPE_PSEUDO_CLASS:
526 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "type", 4)), newSVpv_utf8_auto_css(self, "pseudo-class", 12), 0);
527 break;
528 case MyCSS_SELECTORS_TYPE_PSEUDO_ELEMENT:
529 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "type", 4)), newSVpv_utf8_auto_css(self, "pseudo-element", 14), 0);
530 break;
531 }
532
533 if (entry->key)
534 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "value", 5)), newSVpv_utf8_auto_css(self, entry->key->length ? entry->key->data : "", entry->key->length), 0);
535 }
536 break;
537 case MyCSS_SELECTORS_TYPE_ATTRIBUTE:
538 {
539 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "type", 4)), newSVpv_utf8_auto_css(self, "attribute", 9), 0);
540
541 /* key */
542 if (entry->key)
543 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "name", 4)), newSVpv_utf8_auto_css(self, entry->key->length ? entry->key->data : "", entry->key->length), 0);
544
545 /* value */
546 if (mycss_selector_value_attribute(entry->value)->value) {
547 mycore_string_t *str_value = mycss_selector_value_attribute(entry->value)->value;
548 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "value", 5)), newSVpv_utf8_auto_css(self, str_value->length ? str_value->data : "", str_value->length), 0);
549 } else {
550 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "value", 5)), newSVpv_utf8_auto_css(self, "", 0), 0);
551 }
552
553 /* match */
554 int match = mycss_selector_value_attribute(entry->value)->match;
555 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "match", 5)), newSVpv_utf8_auto_css(self, attr_match_names[match].name, attr_match_names[match].len), 0);
556
557 /* modificator */
558 if (mycss_selector_value_attribute(entry->value)->mod & MyCSS_SELECTORS_MOD_I) {
559 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "ignoreCase", 10)), newSViv(1), 0);
560 } else {
561 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "ignoreCase", 10)), newSViv(0), 0);
562 }
563 }
564 break;
565 case MyCSS_SELECTORS_TYPE_PSEUDO_CLASS_FUNCTION:
566 {
567 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "type", 4)), newSVpv_utf8_auto_css(self, "function", 8), 0);
568
569 switch (entry->sub_type) {
570 case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_CONTAINS:
571 case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_HAS:
572 case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_NOT:
573 case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_MATCHES:
574 case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_CURRENT:
575 {
576 switch (entry->sub_type) {
577 case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_CONTAINS:
578 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "name", 4)), newSVpv_utf8_auto_css(self, "contains", 8), 0);
579 break;
580 case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_HAS:
581 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "name", 4)), newSVpv_utf8_auto_css(self, "has", 3), 0);
582 break;
583 case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_NOT:
584 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "name", 4)), newSVpv_utf8_auto_css(self, "not", 3), 0);
585 break;
586 case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_MATCHES:
587 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "name", 4)), newSVpv_utf8_auto_css(self, "matches", 7), 0);
588 break;
589 case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_CURRENT:
590 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "name", 4)), newSVpv_utf8_auto_css(self, "current", 7), 0);
591 break;
592 }
593
594 AV *value = newAV();
595 html5_dom_css_serialize_selector(self, entry->value, value);
596 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "value", 5)), newRV_noinc((SV *) value), 0);
597 }
598 break;
599
600 case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_NTH_CHILD:
601 case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_NTH_LAST_CHILD:
602 case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_NTH_COLUMN:
603 case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_NTH_LAST_COLUMN:
604 case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_NTH_OF_TYPE:
605 case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_NTH_LAST_OF_TYPE:
606 {
607 mycss_an_plus_b_entry_t *a_plus_b = mycss_selector_value_an_plus_b(entry->value);
608 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "name", 4)), newSVpv_utf8_auto_css(self, "nth-child", 9), 0);
609 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "a", 1)), newSViv(a_plus_b->a), 0);
610 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "b", 1)), newSViv(a_plus_b->b), 0);
611
612 if (a_plus_b->of) {
613 AV *of = newAV();
614 html5_dom_css_serialize_selector(self, a_plus_b->of, of);
615 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "of", 2)), newRV_noinc((SV *) of), 0);
616 }
617 }
618 break;
619
620 case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_DIR:
621 {
622 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "name", 4)), newSVpv_utf8_auto_css(self, "dir", 3), 0);
623 if (entry->value) {
624 mycore_string_t *str_fname = mycss_selector_value_string(entry->value);
625 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "value", 5)), newSVpv_utf8_auto_css(self, str_fname->length ? str_fname->data : "", str_fname->length), 0);
626 } else {
627 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "value", 5)), newSVpv_utf8_auto_css(self, "", 0), 0);
628 }
629 }
630 break;
631
632 case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_DROP:
633 {
634 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "name", 4)), newSVpv_utf8_auto_css(self, "drop", 4), 0);
635 mycss_selectors_function_drop_type_t drop_val = mycss_selector_value_drop(entry->value);
636
637 AV *langs = newAV();
638 if (drop_val & MyCSS_SELECTORS_FUNCTION_DROP_TYPE_ACTIVE)
639 av_push(langs, newSVpv_utf8_auto_css(self, "active", 6));
640 if (drop_val & MyCSS_SELECTORS_FUNCTION_DROP_TYPE_VALID)
641 av_push(langs, newSVpv_utf8_auto_css(self, "valid", 5));
642 if (drop_val & MyCSS_SELECTORS_FUNCTION_DROP_TYPE_INVALID)
643 av_push(langs, newSVpv_utf8_auto_css(self, "invalid", 7));
644 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "value", 5)), newRV_noinc((SV *) langs), 0);
645 }
646 break;
647
648 case MyCSS_SELECTORS_SUB_TYPE_PSEUDO_CLASS_FUNCTION_LANG:
649 {
650 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "name", 4)), newSVpv_utf8_auto_css(self, "lang", 4), 0);
651 AV *langs = newAV();
652 if (entry->value) {
653 mycss_selectors_value_lang_t *lang = mycss_selector_value_lang(entry->value);
654 while (lang) {
655 av_push(langs, newSVpv_utf8_auto_css(self, lang->str.length ? lang->str.data : "", lang->str.length));
656 lang = lang->next;
657 }
658 }
659 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "value", 5)), newRV_noinc((SV *) langs), 0);
660 }
661 break;
662
663 default:
664 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "name", 4)), newSVpv_utf8_auto_css(self, "unknown", 7), 0);
665 break;
666 }
667 }
668 break;
669
670 default:
671 hv_store_ent(data, sv_2mortal(newSVpv_utf8_auto_css(self, "type", 4)), newSVpv_utf8_auto_css(self, "unknown", 7), 0);
672 break;
673 }
674
675 av_push(result, newRV_noinc((SV *) data));
676
677 entry = entry->next;
678 }
679 }
680
html5_dom_async_parse_init(CV * cv,html5_dom_parser_t * self,SV * html,HV * options,int ev_fd)681 static html5_dom_async_result *html5_dom_async_parse_init(CV *cv, html5_dom_parser_t *self, SV *html, HV *options, int ev_fd) {
682 html5_dom_async_result *result = (html5_dom_async_result *) safemalloc(sizeof(html5_dom_async_result));
683 memset(result, 0, sizeof(html5_dom_async_result));
684
685 result->fd = ev_fd;
686
687 // extends options
688 html5_dom_parse_options(&result->opts, &self->opts, options);
689 html5_dom_check_options(cv, &result->opts);
690
691 // Auto detect UTF8 flag
692 if (result->opts.utf8 == 2)
693 result->opts.utf8 = SvUTF8(html) ? 1 : 0;
694
695 mystatus_t status;
696
697 STRLEN html_len;
698 const char *html_str = SvPV_const(html, html_len);
699
700 // copy html source
701 result->html = safemalloc(html_len);
702 result->length = html_len;
703 memcpy(result->html, html_str, html_len);
704
705 #ifndef MyCORE_BUILD_WITHOUT_THREADS
706 // create parsing thread
707 result->thread = mythread_create();
708 status = mythread_init(result->thread, MyTHREAD_TYPE_STREAM, 1, 0);
709
710 if (status) {
711 mythread_destroy(result->thread, NULL, NULL, true);
712 safefree(result->html);
713 safefree(result);
714 sub_croak(cv, "mythread_init failed: %d (%s)", status, modest_strerror(status));
715 return NULL;
716 }
717
718 result->thread->context = result;
719
720 status = myhread_entry_create(result->thread, html5_dom_mythread_function, html5_dom_async_parse_worker, MyTHREAD_OPT_STOP);
721 mythread_option_set(result->thread, MyTHREAD_OPT_QUIT);
722
723 if (status) {
724 mythread_destroy(result->thread, NULL, NULL, true);
725 safefree(result->html);
726 safefree(result);
727 sub_croak(cv, "myhread_entry_create failed: %d (%s)", status, modest_strerror(status));
728 return NULL;
729 }
730
731 // start parsing thread
732 status = mythread_resume(result->thread, MyTHREAD_OPT_UNDEF);
733
734 if (status) {
735 mythread_destroy(result->thread, NULL, NULL, true);
736 safefree(result->html);
737 safefree(result);
738 sub_croak(cv, "mythread_resume failed: %d (%s)", status, modest_strerror(status));
739 return NULL;
740 }
741 #else
742 // sync fallback
743 html5_dom_async_parse(result);
744 #endif
745
746 return result;
747 }
748
html5_dom_async_parse_done(CV * cv,html5_dom_async_result * result,bool wait)749 static SV *html5_dom_async_parse_done(CV *cv, html5_dom_async_result *result, bool wait) {
750 if (!wait && !result->done)
751 return NULL;
752
753 #ifndef MyCORE_BUILD_WITHOUT_THREADS
754 if (result->thread)
755 result->thread = mythread_destroy(result->thread, NULL, NULL, true);
756 #endif
757
758 if (result->html) {
759 result->html = NULL;
760 safefree(result->html);
761 }
762
763 if (result->status) {
764 sub_croak(cv, "parse failed: %d (%s)", result->status, modest_strerror(result->status));
765 return NULL;
766 }
767
768 if (result->tree) {
769 DOM_GC_TRACE("DOM::new");
770 SV *myhtml_sv = pack_pointer("HTML5::DOM", result->parser);
771 result->tree_sv = (void *) create_tree_object(result->tree, SvRV(myhtml_sv), result->parser, false, result->opts.utf8);
772 result->tree = NULL;
773 SvREFCNT_dec(myhtml_sv);
774 }
775
776 return result->tree_sv ? SvREFCNT_inc((SV *) result->tree_sv) : &PL_sv_undef;
777 }
778
779 MODULE = HTML5::DOM PACKAGE = HTML5::DOM
780
781 #################################################################
782 # HTML5::DOM (Parser)
783 #################################################################
784 HTML5::DOM
785 new(SV *CLASS, HV *options = NULL)
786 CODE:
787 DOM_GC_TRACE("DOM::new");
788 mystatus_t status;
789
790 html5_dom_options_t opts = {0};
791 html5_dom_parse_options(&opts, NULL, options);
792 html5_dom_check_options(cv, &opts);
793
794 html5_dom_parser_t *self = html5_dom_parser_new(&opts);
795
796 self->myhtml = myhtml_create();
797
798 if (self->opts.threads <= 1) {
799 status = myhtml_init(self->myhtml, MyHTML_OPTIONS_PARSE_MODE_SINGLE, 1, 0);
800 } else {
801 status = myhtml_init(self->myhtml, MyHTML_OPTIONS_DEFAULT, self->opts.threads, 0);
802 }
803
804 if (status) {
805 self = html5_dom_parser_free(self);
806 sub_croak(cv, "myhtml_init failed: %d (%s)", status, modest_strerror(status));
807 }
808
809 RETVAL = self;
810 OUTPUT:
811 RETVAL
812
813 # Init html chunk parser
814 SV *
815 parseChunkStart(HTML5::DOM self, HV *options = NULL)
816 CODE:
817 mystatus_t status;
818
819 html5_dom_parse_options(&self->chunk_opts, &self->opts, options);
820 html5_dom_check_options(cv, &self->chunk_opts);
821
822 if (self->tree) {
823 if (self->tree->context) {
824 html5_dom_tree_t *tree_context = (html5_dom_tree_t *) self->tree;
825 tree_context->used = false;
826 } else {
827 myhtml_tree_destroy(self->tree);
828 }
829
830 self->tree = NULL;
831 }
832
833 self->tree = myhtml_tree_create();
834 status = myhtml_tree_init(self->tree, self->myhtml);
835 if (status) {
836 myhtml_tree_destroy(self->tree);
837 sub_croak(cv, "myhtml_tree_init failed: %d (%s)", status, modest_strerror(status));
838 }
839
840 self->chunks = 0;
841 myhtml_encoding_set(self->tree, self->chunk_opts.encoding == MyENCODING_AUTO ? self->chunk_opts.default_encoding : self->chunk_opts.encoding);
842
843 RETVAL = SvREFCNT_inc(ST(0));
844 OUTPUT:
845 RETVAL
846
847 # Parse html chunk
848 SV *
849 parseChunk(HTML5::DOM self, SV *html, HV *options = NULL)
850 CODE:
851 mystatus_t status;
852
853 html = sv_stringify(html);
854
855 if (!self->tree) {
856 self->tree = myhtml_tree_create();
857 status = myhtml_tree_init(self->tree, self->myhtml);
858 if (status) {
859 myhtml_tree_destroy(self->tree);
860 sub_croak(cv, "myhtml_tree_init failed: %d (%s)", status, modest_strerror(status));
861 }
862 memcpy(&self->opts, &self->chunk_opts, sizeof(html5_dom_options_t));
863 myhtml_encoding_set(self->tree, self->chunk_opts.encoding == MyENCODING_AUTO ? self->chunk_opts.default_encoding : self->chunk_opts.encoding);
864 self->chunks = 0;
865 }
866
867 STRLEN html_length;
868 const char *html_str = SvPV_const(html, html_length);
869
870 // Try detect encoding only in first chunk
871 if (!self->chunks) {
872 myhtml_encoding_set(self->tree, html5_dom_auto_encoding(&self->chunk_opts, &html_str, &html_length));
873
874 // Auto detect UTF8 flag
875 if (self->chunk_opts.utf8 == 2)
876 self->chunk_opts.utf8 = SvUTF8(html) ? 1 : 0;
877
878 html5_dom_apply_tree_options(self->tree, &self->chunk_opts);
879 }
880
881 ++self->chunks;
882
883 status = myhtml_parse_chunk(self->tree, html_str, html_length);
884 if (status) {
885 if (!self->tree->context)
886 myhtml_tree_destroy(self->tree);
887 sub_croak(cv, "myhtml_parse_chunk failed: %d (%s)", status, modest_strerror(status));
888 }
889
890 RETVAL = SvREFCNT_inc(ST(0));
891 OUTPUT:
892 RETVAL
893
894 # Get current Tree from current chunked parsing session
895 SV *
896 parseChunkTree(HTML5::DOM self)
897 CODE:
898 mystatus_t status;
899
900 if (!self->tree)
901 sub_croak(cv, "call parseChunkStart or parseChunk first");
902
903 RETVAL = create_tree_object(self->tree, SvRV(ST(0)), self, true, self->chunk_opts.utf8);
904 OUTPUT:
905 RETVAL
906
907 # End of parse chunks (return Tree)
908 SV *
909 parseChunkEnd(HTML5::DOM self)
910 CODE:
911 mystatus_t status;
912
913 if (!self->tree)
914 sub_croak(cv, "call parseChunkStart or parseChunk first");
915
916 status = myhtml_parse_chunk_end(self->tree);
917 if (status) {
918 if (!self->tree->context)
919 myhtml_tree_destroy(self->tree);
920 sub_croak(cv, "myhtml_parse_chunk failed:%d (%s)", status, modest_strerror(status));
921 }
922
923 if (self->tree) {
924 html5_dom_tree_t *tree_context = (html5_dom_tree_t *) self->tree;
925 tree_context->used = false;
926 }
927
928 RETVAL = create_tree_object(self->tree, SvRV(ST(0)), self, false, self->chunk_opts.utf8);
929 self->tree = NULL;
930 OUTPUT:
931 RETVAL
932
933 # Parse full html
934 SV *
935 parse(HTML5::DOM self, SV *html, HV *options = NULL)
936 CODE:
937 mystatus_t status;
938 html5_dom_options_t opts = {0};
939
940 html5_dom_parse_options(&opts, &self->opts, options);
941 html5_dom_check_options(cv, &opts);
942
943 html = sv_stringify(html);
944
945 myhtml_tree_t *tree = myhtml_tree_create();
946 status = myhtml_tree_init(tree, self->myhtml);
947 if (status) {
948 myhtml_tree_destroy(tree);
949 sub_croak(cv, "myhtml_tree_init failed: %d (%s)", status, modest_strerror(status));
950 }
951
952 STRLEN html_length;
953 const char *html_str = SvPV_const(html, html_length);
954
955 myencoding_t encoding = html5_dom_auto_encoding(&opts, &html_str, &html_length);
956
957 // Auto detect UTF8 flag
958 if (opts.utf8 == 2)
959 opts.utf8 = SvUTF8(html) ? 1 : 0;
960
961 html5_dom_apply_tree_options(tree, &opts);
962
963 status = myhtml_parse(tree, encoding, html_str, html_length);
964 if (status) {
965 myhtml_tree_destroy(tree);
966 sub_croak(cv, "myhtml_parse failed: %d (%s)", status, modest_strerror(status));
967 }
968
969 RETVAL = create_tree_object(tree, SvRV(ST(0)), self, false, opts.utf8);
970 OUTPUT:
971 RETVAL
972
973 # Parse full html (in background)
974 HTML5::DOM::AsyncResult
975 _parseAsync(HTML5::DOM self, SV *html, HV *options = NULL, int ev_fd = -1)
976 CODE:
977 DOM_GC_TRACE("DOM::AsyncResult::new");
978 html = sv_stringify(html);
979 RETVAL = html5_dom_async_parse_init(cv, self, html, options, ev_fd);
980 OUTPUT:
981 RETVAL
982
983 void
984 DESTROY(HTML5::DOM self)
985 CODE:
986 DOM_GC_TRACE("DOM::DESTROY (refs=%d)", SvREFCNT(SvRV(ST(0))));
987 html5_dom_parser_free(self);
988
989
990
991 #################################################################
992 # HTML5::DOM::AsyncResult
993 #################################################################
994 MODULE = HTML5::DOM PACKAGE = HTML5::DOM::AsyncResult
995
996 # Wait for parsing done and return HTML5::DOM::Tree
997 SV *
998 wait(HTML5::DOM::AsyncResult self)
999 CODE:
1000 RETVAL = html5_dom_async_parse_done(cv, self, true);
1001 OUTPUT:
1002 RETVAL
1003
1004 # True if parsing done
1005 int
1006 parsed(HTML5::DOM::AsyncResult self)
1007 CODE:
1008 RETVAL = self->done ? 1 : 0;
1009 OUTPUT:
1010 RETVAL
1011
1012 # Return HTML5::DOM::Tree if parsing done
1013 SV *
1014 tree(HTML5::DOM::AsyncResult self)
1015 CODE:
1016 RETVAL = html5_dom_async_parse_done(cv, self, false);
1017 OUTPUT:
1018 RETVAL
1019
1020 void
1021 DESTROY(HTML5::DOM::AsyncResult self)
1022 CODE:
1023 DOM_GC_TRACE("DOM::AsyncResult::DESTROY (refs=%d)", SvREFCNT(SvRV(ST(0))));
1024 if (self->thread)
1025 self->thread = mythread_destroy(self->thread, NULL, NULL, true);
1026
1027 if (self->tree) {
1028 self->tree = myhtml_tree_destroy(self->tree);
1029
1030 if (self->parser)
1031 self->parser = html5_dom_parser_free(self->parser);
1032 }
1033
1034 if (self->tree_sv)
1035 SvREFCNT_dec((SV *) self->tree_sv);
1036
1037 if (self->html)
1038 safefree(self->html);
1039
1040 safefree(self);
1041
1042 #################################################################
1043 # HTML5::DOM::Tree
1044 #################################################################
1045 MODULE = HTML5::DOM PACKAGE = HTML5::DOM::Tree
1046
1047 SV *
1048 body(HTML5::DOM::Tree self)
1049 CODE:
1050 RETVAL = node_to_sv(myhtml_tree_get_node_body(self->tree));
1051 OUTPUT:
1052 RETVAL
1053
1054 SV *
1055 createElement(HTML5::DOM::Tree self, SV *tag, SV *ns_name = NULL)
1056 CODE:
1057 // Get namespace id by name
1058 myhtml_namespace_t ns = MyHTML_NAMESPACE_HTML;
1059 if (ns_name) {
1060 ns_name = sv_stringify(ns_name);
1061 STRLEN ns_name_len;
1062 const char *ns_name_str = SvPV_const(ns_name, ns_name_len);
1063 if (!myhtml_namespace_id_by_name(ns_name_str, ns_name_len, &ns))
1064 sub_croak(cv, "unknown namespace: %s", ns_name_str);
1065 }
1066
1067 // Get tag id by name
1068 tag = sv_stringify(tag);
1069 STRLEN tag_len;
1070 const char *tag_str = SvPV_const(tag, tag_len);
1071 myhtml_tag_id_t tag_id = html5_dom_tag_id_by_name(self->tree, tag_str, tag_len, true);
1072
1073 // create new node
1074 myhtml_tree_node_t *node = myhtml_node_create(self->tree, tag_id, ns);
1075
1076 // if void - mark self-closed
1077 if (myhtml_node_is_void_element(node)) {
1078 if (!node->token) {
1079 node->token = myhtml_token_node_create(node->tree->token, self->tree->mcasync_rules_token_id);
1080 if (!node->token) {
1081 myhtml_tree_node_delete(node);
1082 sub_croak(cv, "myhtml_token_node_create failed");
1083 }
1084 }
1085 node->token->type |= MyHTML_TOKEN_TYPE_CLOSE_SELF | MyHTML_TOKEN_TYPE_DONE;
1086 }
1087
1088 RETVAL = node_to_sv(node);
1089 OUTPUT:
1090 RETVAL
1091
1092 SV *
1093 createComment(HTML5::DOM::Tree self, SV *text)
1094 CODE:
1095 text = sv_stringify(text);
1096 STRLEN text_len;
1097 const char *text_str = SvPV_const(text, text_len);
1098 myhtml_tree_node_t *node = myhtml_node_create(self->tree, MyHTML_TAG__COMMENT, MyHTML_NAMESPACE_HTML);
1099 myhtml_node_text_set(node, text_str, text_len, MyENCODING_DEFAULT);
1100 RETVAL = node_to_sv(node);
1101 OUTPUT:
1102 RETVAL
1103
1104 SV *
1105 createTextNode(HTML5::DOM::Tree self, SV *text)
1106 CODE:
1107 text = sv_stringify(text);
1108 STRLEN text_len;
1109 const char *text_str = SvPV_const(text, text_len);
1110 myhtml_tree_node_t *node = myhtml_node_create(self->tree, MyHTML_TAG__TEXT, MyHTML_NAMESPACE_HTML);
1111 myhtml_node_text_set(node, text_str, text_len, MyENCODING_DEFAULT);
1112 RETVAL = node_to_sv(node);
1113 OUTPUT:
1114 RETVAL
1115
1116 # Parse fragment
1117 SV *parseFragment(HTML5::DOM::Tree self, SV *text, SV *tag = NULL, SV *ns = NULL, HV *options = NULL)
1118 CODE:
1119 text = sv_stringify(text);
1120 STRLEN text_len;
1121 const char *text_str = SvPV_const(text, text_len);
1122
1123 mystatus_t status;
1124 myhtml_namespace_t ns_id = MyHTML_NAMESPACE_HTML;
1125 myhtml_tag_id_t tag_id = MyHTML_TAG_DIV;
1126
1127 if (ns) {
1128 ns = sv_stringify(ns);
1129 STRLEN ns_len;
1130 const char *ns_str = SvPV_const(ns, ns_len);
1131
1132 if (!myhtml_namespace_id_by_name(ns_str, ns_len, &ns_id))
1133 sub_croak(cv, "unknown namespace: %s", ns_str);
1134 }
1135
1136 if (tag) {
1137 tag = sv_stringify(tag);
1138 STRLEN tag_len;
1139 const char *tag_str = SvPV_const(tag, tag_len);
1140 tag_id = html5_dom_tag_id_by_name(self->tree, tag_str, tag_len, true);
1141 }
1142
1143 html5_dom_options_t opts = {0};
1144 html5_dom_parse_options(&opts, &self->parser->opts, options);
1145 html5_dom_check_options(cv, &opts);
1146
1147 myhtml_tree_node_t *node = html5_dom_parse_fragment(&opts, self->tree, tag_id, ns_id, text_str, text_len, NULL, &status);
1148 if (status)
1149 sub_croak(cv, "myhtml_parse_fragment failed: %d (%s)", status, modest_strerror(status));
1150
1151 RETVAL = node_to_sv(node);
1152 OUTPUT:
1153 RETVAL
1154
1155 SV *
1156 head(HTML5::DOM::Tree self)
1157 CODE:
1158 RETVAL = node_to_sv(myhtml_tree_get_node_head(self->tree));
1159 OUTPUT:
1160 RETVAL
1161
1162 SV *
1163 root(HTML5::DOM::Tree self)
1164 CODE:
1165 RETVAL = node_to_sv(myhtml_tree_get_node_html(self->tree));
1166 OUTPUT:
1167 RETVAL
1168
1169 SV *
1170 document(HTML5::DOM::Tree self)
1171 CODE:
1172 RETVAL = node_to_sv(myhtml_tree_get_document(self->tree));
1173 OUTPUT:
1174 RETVAL
1175
1176 SV *
1177 find(HTML5::DOM::Tree self, SV *query, SV *combinator = NULL)
1178 ALIAS:
1179 at = 1
1180 querySelector = 2
1181 querySelectorAll = 3
1182 CODE:
1183 myhtml_tree_node_t *scope = myhtml_tree_get_document(self->tree);
1184 if (scope) {
1185 RETVAL = html5_node_find(cv, self->parser, scope, query, combinator, ix == 1 || ix == 2);
1186 } else {
1187 RETVAL = &PL_sv_undef;
1188 }
1189 OUTPUT:
1190 RETVAL
1191
1192 # Wait for parsing done (when async mode) - removed
1193 SV *
1194 wait(HTML5::DOM::Tree self)
1195 CODE:
1196 RETVAL = SvREFCNT_inc(ST(0));
1197 OUTPUT:
1198 RETVAL
1199
1200 # True if parsing done (when async mode) - removed
1201 int
1202 parsed(HTML5::DOM::Tree self)
1203 CODE:
1204 RETVAL = 1;
1205 OUTPUT:
1206 RETVAL
1207
1208 # utf8(flag) - enable or disable utf8 mode
1209 # utf8() - get status of utf8 mode (0 - disabled, 1 - enabled)
1210 SV *
1211 utf8(HTML5::DOM::Tree self, SV *value = NULL)
1212 CODE:
1213 if (!value) {
1214 RETVAL = newSViv(self->utf8 ? 1 : 0);
1215 } else {
1216 value = sv_stringify(value);
1217
1218 STRLEN enc_length;
1219 const char *enc_str = SvPV_const(value, enc_length);
1220
1221 if (enc_length > 0) {
1222 if (isdigit(enc_str[0])) {
1223 self->utf8 = SvIV(value) != 0;
1224 } else {
1225 self->utf8 = 1;
1226 }
1227 }
1228
1229 self->utf8 = 0;
1230
1231 RETVAL = SvREFCNT_inc(ST(0));
1232 }
1233 OUTPUT:
1234 RETVAL
1235
1236 # findTag(val), getElementsByTagName(val) - get nodes by tag name
1237 # findClass(val), getElementsByClassName(val) - get nodes by class name
1238 # findId(val), getElementById(val) - get node by id
1239 # findAttr(key), getElementByAttribute(key) - get nodes by attribute key
1240 # findAttr(key, val, case, cmp), getElementByAttribute(key, val, case, cmp) - get nodes by attribute value
1241 SV *
1242 findTag(HTML5::DOM::Tree self, SV *key, SV *val = NULL, bool icase = false, SV *cmp = NULL)
1243 ALIAS:
1244 getElementsByTagName = 1
1245 findClass = 2
1246 getElementsByClassName = 3
1247 findId = 4
1248 getElementById = 5
1249 findAttr = 6
1250 getElementByAttribute = 7
1251 CODE:
1252 RETVAL = html5_node_simple_find(cv, myhtml_tree_get_document(self->tree), key, val, cmp, icase, ix);
1253 OUTPUT:
1254 RETVAL
1255
1256 # Get compat node
1257 SV *
1258 compatMode(HTML5::DOM::Tree self)
1259 CODE:
1260 if (self->tree->compat_mode == MyHTML_TREE_COMPAT_MODE_QUIRKS) {
1261 // if the document is in quirks mode.
1262 RETVAL = newSVpv_utf8_auto(self->tree, "BackCompat", 10);
1263 } else {
1264 // if the document is in no-quirks (also known as "standards") mode or limited-quirks (also known as "almost standards") mode.
1265 RETVAL = newSVpv_utf8_auto(self->tree, "CSS1Compat", 10);
1266 }
1267 OUTPUT:
1268 RETVAL
1269
1270 # Get current tree encoding name
1271 SV *
1272 encoding(HTML5::DOM::Tree self)
1273 CODE:
1274 size_t length = 0;
1275 const char *name = myencoding_name_by_id(self->tree->encoding, &length);
1276 RETVAL = newSVpv_utf8_auto(self->tree, name ? name : "", length);
1277 OUTPUT:
1278 RETVAL
1279
1280 # Get current tree encoding id
1281 SV *
1282 encodingId(HTML5::DOM::Tree self)
1283 CODE:
1284 RETVAL = newSViv(self->tree->encoding);
1285 OUTPUT:
1286 RETVAL
1287
1288 # Tag id by tag name
1289 SV *
1290 tag2id(HTML5::DOM::Tree self, SV *tag)
1291 CODE:
1292 tag = sv_stringify(tag);
1293 STRLEN tag_len;
1294 const char *tag_str = SvPV_const(tag, tag_len);
1295 RETVAL = newSViv(html5_dom_tag_id_by_name(self->tree, tag_str, tag_len, false));
1296 OUTPUT:
1297 RETVAL
1298
1299 # Tag name by tag id
1300 SV *
1301 id2tag(HTML5::DOM::Tree self, int tag_id)
1302 CODE:
1303 RETVAL = &PL_sv_undef;
1304 const myhtml_tag_context_t *tag_ctx = myhtml_tag_get_by_id(self->tree->tags, tag_id);
1305 if (tag_ctx)
1306 RETVAL = newSVpv_utf8_auto(self->tree, tag_ctx->name ? tag_ctx->name : "", tag_ctx->name_length);
1307 OUTPUT:
1308 RETVAL
1309
1310 # Namespace id by namepsace name
1311 SV *
1312 namespace2id(HTML5::DOM::Tree self, SV *ns)
1313 CODE:
1314 ns = sv_stringify(ns);
1315 STRLEN ns_len;
1316 const char *ns_str = SvPV_const(ns, ns_len);
1317
1318 myhtml_namespace_t ns_id;
1319 if (!myhtml_namespace_id_by_name(ns_str, ns_len, &ns_id))
1320 ns_id = MyHTML_NAMESPACE_UNDEF;
1321
1322 RETVAL = newSViv(ns_id);
1323 OUTPUT:
1324 RETVAL
1325
1326 # Namespace name by namepsace id
1327 SV *
1328 id2namespace(HTML5::DOM::Tree self, int ns_id)
1329 CODE:
1330 size_t ns_len = 0;
1331 const char *ns_name = myhtml_namespace_name_by_id(ns_id, &ns_len);
1332 RETVAL = ns_name ? newSVpv_utf8_auto(self->tree, ns_name, ns_len) : &PL_sv_undef;
1333 OUTPUT:
1334 RETVAL
1335
1336 # Return tree parent parser
1337 SV *
1338 parser(HTML5::DOM::Tree self)
1339 CODE:
1340 RETVAL = myhtml_to_sv(self->tree);
1341 OUTPUT:
1342 RETVAL
1343
1344 # Some bad idea to get "uniq id"
1345 SV *
1346 hash(HTML5::DOM::Node self)
1347 CODE:
1348 RETVAL = newSViv(PTR2IV(self));
1349 OUTPUT:
1350 RETVAL
1351
1352 # Compare tree reference
1353 bool
1354 isSameTree(HTML5::DOM::Tree self, SV *other_tree)
1355 CODE:
1356 RETVAL = false;
1357 if (sv_derived_from(other_tree, "HTML5::DOM::Tree")) {
1358 html5_dom_tree_t *tree = INT2PTR(html5_dom_tree_t *, SvIV((SV*)SvRV(other_tree)));
1359 if (tree == self)
1360 RETVAL = true;
1361 }
1362 OUTPUT:
1363 RETVAL
1364
1365 void
1366 DESTROY(HTML5::DOM::Tree self)
1367 CODE:
1368 DOM_GC_TRACE("DOM::Tree::DESTROY (refs=%d)", SvREFCNT(SvRV(ST(0))));
1369 void *context = self->tree->context;
1370 if (self->used) {
1371 self->tree->context = NULL;
1372 } else {
1373 myhtml_tree_destroy(self->tree);
1374 }
1375 SvREFCNT_dec(self->parent);
1376 safefree(context);
1377
1378
1379 #################################################################
1380 # HTML5::DOM::Node
1381 #################################################################
1382 MODULE = HTML5::DOM PACKAGE = HTML5::DOM::Node
1383 # Tag id
1384 SV *
1385 tagId(HTML5::DOM::Node self, SV *new_tag_id = NULL)
1386 CODE:
1387 if (new_tag_id) {
1388 const myhtml_tag_context_t *tag_ctx = myhtml_tag_get_by_id(self->tree->tags, SvIV(new_tag_id));
1389 if (tag_ctx) {
1390 self->tag_id = SvIV(new_tag_id);
1391 } else {
1392 sub_croak(cv, "unknown tag id %ld", SvIV(new_tag_id));
1393 }
1394
1395 RETVAL = SvREFCNT_inc(ST(0));
1396 } else {
1397 RETVAL = newSViv(self->tag_id);
1398 }
1399 OUTPUT:
1400 RETVAL
1401
1402 # Namespace id
1403 SV *
1404 namespaceId(HTML5::DOM::Node self, SV *new_ns_id = NULL)
1405 CODE:
1406 if (new_ns_id) {
1407 if (!myhtml_namespace_name_by_id(SvIV(new_ns_id), NULL)) {
1408 sub_croak(cv, "unknown namespace id %ld", SvIV(new_ns_id));
1409 } else {
1410 myhtml_node_namespace_set(self, SvIV(new_ns_id));
1411 }
1412 RETVAL = SvREFCNT_inc(ST(0));
1413 } else {
1414 RETVAL = newSViv(myhtml_node_namespace(self));
1415 }
1416 OUTPUT:
1417 RETVAL
1418
1419 # Tag name
1420 SV *
1421 tag(HTML5::DOM::Node self, SV *new_tag_name = NULL)
1422 ALIAS:
1423 nodeName = 1
1424 tagName = 2
1425 CODE:
1426 myhtml_tree_t *tree = self->tree;
1427
1428 // Set new tag name
1429 if (new_tag_name) {
1430 new_tag_name = sv_stringify(new_tag_name);
1431 STRLEN new_tag_name_len;
1432 const char *new_tag_name_str = SvPV_const(new_tag_name, new_tag_name_len);
1433
1434 if (!new_tag_name_len)
1435 sub_croak(cv, "empty tag name not allowed.");
1436
1437 myhtml_tag_id_t tag_id = html5_dom_tag_id_by_name(self->tree, new_tag_name_str, new_tag_name_len, true);
1438 self->tag_id = tag_id;
1439
1440 RETVAL = SvREFCNT_inc(ST(0));
1441 }
1442 // Get tag name
1443 else {
1444 RETVAL = &PL_sv_undef;
1445
1446 if (tree && tree->tags) {
1447 const myhtml_tag_context_t *tag_ctx = myhtml_tag_get_by_id(tree->tags, self->tag_id);
1448 if (tag_ctx) {
1449 RETVAL = newSVpv_utf8_auto(self->tree, tag_ctx->name, tag_ctx->name_length);
1450 if (ix == 1 || ix == 2) {
1451 STRLEN value_len;
1452 char *value = SvPV(RETVAL, value_len);
1453 for (size_t i = 0; i < value_len; ++i)
1454 value[i] = toupper(value[i]);
1455 }
1456 }
1457 }
1458 }
1459 OUTPUT:
1460 RETVAL
1461
1462 # Namespace name
1463 SV *
1464 namespace(HTML5::DOM::Node self, SV *new_ns = NULL)
1465 CODE:
1466 myhtml_tree_t *tree = self->tree;
1467
1468 // Set new tag namespace
1469 if (new_ns) {
1470 new_ns = sv_stringify(new_ns);
1471 STRLEN new_ns_len;
1472 const char *new_ns_str = SvPV_const(new_ns, new_ns_len);
1473
1474 myhtml_namespace_t ns;
1475 if (!myhtml_namespace_id_by_name(new_ns_str, new_ns_len, &ns))
1476 sub_croak(cv, "unknown namespace: %s", new_ns_str);
1477 myhtml_node_namespace_set(self, ns);
1478
1479 RETVAL = SvREFCNT_inc(ST(0));
1480 }
1481 // Get namespace name
1482 else {
1483 size_t ns_name_len;
1484 const char *ns_name = myhtml_namespace_name_by_id(myhtml_node_namespace(self), &ns_name_len);
1485 RETVAL = newSVpv_utf8_auto(self->tree, ns_name ? ns_name : "", ns_name_len);
1486 }
1487 OUTPUT:
1488 RETVAL
1489
1490 # Return node parent tree
1491 SV *
1492 tree(HTML5::DOM::Node self)
1493 CODE:
1494 RETVAL = tree_to_sv(self->tree);
1495 OUTPUT:
1496 RETVAL
1497
1498 # Non-recursive html serialization (example: <div id="some_id">)
1499 SV *
1500 nodeHtml(HTML5::DOM::Node self)
1501 CODE:
1502 RETVAL = newSVpv_utf8_auto(self->tree, "", 0);
1503 myhtml_serialization_node_callback(self, sv_serialization_callback, RETVAL);
1504 OUTPUT:
1505 RETVAL
1506
1507 # Return node type
1508 int
1509 nodeType(HTML5::DOM::Node self)
1510 CODE:
1511 html5_dom_tree_t *context = (html5_dom_tree_t *) self->tree->context;
1512 RETVAL = 0;
1513 if (self->tag_id != MyHTML_TAG__UNDEF) {
1514 if (self->tag_id == MyHTML_TAG__TEXT) {
1515 RETVAL = TEXT_NODE;
1516 } else if (self->tag_id == MyHTML_TAG__COMMENT) {
1517 RETVAL = COMMENT_NODE;
1518 } else if (self->tag_id == MyHTML_TAG__DOCTYPE) {
1519 RETVAL = DOCUMENT_TYPE_NODE;
1520 } else if (context->fragment_tag_id && self->tag_id == context->fragment_tag_id) {
1521 RETVAL = DOCUMENT_FRAGMENT_NODE;
1522 } else {
1523 RETVAL = ELEMENT_NODE;
1524 }
1525 } else {
1526 // Modest myhtml bug - document node has tag_id == MyHTML_TAG__UNDEF
1527 if (node_is_document(self))
1528 RETVAL = DOCUMENT_NODE;
1529 }
1530 OUTPUT:
1531 RETVAL
1532
1533 # Node::html() - Serialize text/comment node to html
1534 # Node::html(text) - Same as Node::nodeValue(text)
1535 # Element::html(text) - Remove all children nodes and add parsed fragment, return self
1536 SV *
1537 html(HTML5::DOM::Node self, SV *text = NULL)
1538 ALIAS:
1539 innerHTML = 1
1540 outerHTML = 2
1541 CODE:
1542 if (text) {
1543 if (ix == 2 && !myhtml_node_parent(self)) // outerHTML
1544 sub_croak(cv, "This element has no parent node.");
1545
1546 text = sv_stringify(text);
1547 STRLEN text_len;
1548 const char *text_str = SvPV_const(text, text_len);
1549
1550 if (node_is_element(self) || node_is_document(self)) { // parse fragment and replace all node childrens with it
1551 // parse fragment
1552 mystatus_t status;
1553 html5_fragment_parts_t parts = {0};
1554 myhtml_tree_node_t *context_node = ix == 2 ? myhtml_node_parent(self) : self;
1555 myhtml_tag_id_t context_tag_id = context_node->tag_id;
1556
1557 // hack for document node
1558 if (node_is_document(context_node))
1559 context_tag_id = MyHTML_TAG_HTML;
1560
1561 html5_dom_tree_t *tree_context = (html5_dom_tree_t *) self->tree->context;
1562 html5_dom_options_t opts = {0};
1563 html5_dom_parse_options(&opts, &tree_context->parser->opts, NULL);
1564
1565 // force set encoding to UTF-8
1566 opts.encoding = MyENCODING_DEFAULT;
1567 opts.default_encoding = MyENCODING_DEFAULT;
1568
1569 myhtml_tree_node_t *fragment = html5_dom_parse_fragment(&opts, self->tree, context_tag_id, myhtml_node_namespace(context_node), text_str, text_len, &parts, &status);
1570 if (status)
1571 sub_croak(cv, "myhtml_parse_fragment failed: %d (%s)", status, modest_strerror(status));
1572
1573 // remove all child nodes
1574 myhtml_tree_node_t *node = myhtml_node_child(self);
1575 while (node) {
1576 myhtml_tree_node_t *next = myhtml_node_next(node);
1577 myhtml_tree_node_remove(node);
1578 html5_tree_node_delete_recursive(node);
1579 node = next;
1580 }
1581
1582 // cleanup references in tree
1583 if (node_is_root(self)) {
1584 self->tree->node_body = parts.node_body;
1585 self->tree->node_head = parts.node_head;
1586 } else if (node_is_document(self)) {
1587 self->tree->node_html = parts.node_html;
1588 self->tree->node_body = parts.node_body;
1589 self->tree->node_head = parts.node_head;
1590 }
1591
1592 if (fragment != self->tree->node_html) {
1593 // add fragment
1594 node = myhtml_node_child(fragment);
1595 while (node) {
1596 myhtml_tree_node_t *next = myhtml_node_next(node);
1597 myhtml_tree_node_remove(node);
1598 if (ix == 2) { // outerHTML
1599 myhtml_tree_node_insert_before(self, node);
1600 } else { // innerHTML
1601 myhtml_tree_node_add_child(self, node);
1602 }
1603 node = next;
1604 }
1605
1606 // remove self if outerHTML
1607 if (ix == 2)
1608 myhtml_tree_node_remove(self);
1609
1610 // free fragment
1611 html5_tree_node_delete_recursive(fragment);
1612 } else {
1613 // fragment now is html node, why not?
1614 fragment->tag_id = MyHTML_TAG_HTML;
1615 myhtml_tree_node_remove(fragment);
1616 myhtml_tree_node_add_child(self, fragment);
1617 }
1618 } else { // same as nodeValue, for user friendly API
1619 myhtml_node_text_set(self, text_str, text_len, MyENCODING_DEFAULT);
1620 }
1621 RETVAL = SvREFCNT_inc(ST(0));
1622 } else {
1623 RETVAL = newSVpv_utf8_auto(self->tree, "", 0);
1624 if (self->tag_id == MyHTML_TAG__UNDEF || ix == 1 || html5_dom_is_fragment(self)) { // innerHTML
1625 myhtml_tree_node_t *node = myhtml_node_child(self);
1626 while (node) {
1627 myhtml_serialization_tree_callback(node, sv_serialization_callback, RETVAL);
1628 node = myhtml_node_next(node);
1629 }
1630 } else { // outerHTML
1631 myhtml_serialization_tree_callback(self, sv_serialization_callback, RETVAL);
1632 }
1633 }
1634 OUTPUT:
1635 RETVAL
1636
1637 # Node::text() - Serialize tree to text
1638 # Node::text(text) - Set node value, return self
1639 # Element::text(text) - Remove all children nodes and add text node, return self
1640 SV *
1641 text(HTML5::DOM::Node self, SV *text = NULL)
1642 ALIAS:
1643 nodeValue = 1
1644 innerText = 2
1645 textContent = 3
1646 data = 4
1647 outerText = 5
1648 CODE:
1649 static const char names[][16] = {
1650 "text", "nodeValue", "innerText", "textContent", "data", "outerText"
1651 };
1652
1653 myhtml_tree_t *tree = self->tree;
1654 if (!node_is_element(self)) {
1655 if (ix == 2 || ix == 3 || ix == 5) {
1656 if (text) {
1657 sub_croak(cv, "%s unsupported in %s", names[ix], get_node_class(self));
1658 } else {
1659 RETVAL = &PL_sv_undef;
1660 }
1661 } else if (text) { // set node value
1662 text = sv_stringify(text);
1663 STRLEN text_len;
1664 const char *text_str = SvPV_const(text, text_len);
1665
1666 myhtml_node_text_set(self, text_str, text_len, MyENCODING_DEFAULT);
1667 RETVAL = SvREFCNT_inc(ST(0));
1668 } else { // get node value
1669 size_t text_len = 0;
1670 const char *text = myhtml_node_text(self, &text_len);
1671 RETVAL = newSVpv_utf8_auto(self->tree, text ? text : "", text_len);
1672 }
1673 } else {
1674 if (ix == 1 || ix == 4) {
1675 if (text) {
1676 sub_croak(cv, "%s unsupported in %s", names[ix], get_node_class(self));
1677 } else {
1678 RETVAL = &PL_sv_undef;
1679 }
1680 } else if (text) { // remove all childrens and add text node
1681 text = sv_stringify(text);
1682 STRLEN text_len;
1683 const char *text_str = SvPV_const(text, text_len);
1684
1685 // remove all children nodes
1686 myhtml_tree_node_t *node = myhtml_node_child(self);
1687 while (node) {
1688 myhtml_tree_node_t *next = myhtml_node_next(node);
1689 myhtml_tree_node_remove(node);
1690 html5_tree_node_delete_recursive(node);
1691 node = next;
1692 }
1693
1694 // cleanup references in tree
1695 if (node_is_root(self)) {
1696 self->tree->node_body = NULL;
1697 self->tree->node_head = NULL;
1698 } else if (node_is_document(self)) {
1699 self->tree->node_html = NULL;
1700 self->tree->node_body = NULL;
1701 self->tree->node_head = NULL;
1702 }
1703
1704 // innerText, outerText
1705 if (ix == 2 || ix == 5) {
1706 size_t last_pos = 0;
1707 for (size_t i = 0; i < text_len; ++i) {
1708 bool is_end = (i >= text_len - 1);
1709 bool is_new_line = (text_str[i] == '\n' || text_str[i] == '\r');
1710 if (is_end || is_new_line) {
1711 if (is_end && !is_new_line)
1712 ++i;
1713
1714 // insert new text node
1715 if (i - last_pos) {
1716 myhtml_tree_node_t *text_node = myhtml_node_create(self->tree, MyHTML_TAG__TEXT, myhtml_node_namespace(self));
1717 myhtml_node_text_set(text_node, &text_str[last_pos], i - last_pos, MyENCODING_DEFAULT);
1718 if (ix == 5) { // outerText
1719 myhtml_tree_node_insert_before(self, text_node);
1720 } else { // innerText
1721 myhtml_tree_node_add_child(self, text_node);
1722 }
1723 }
1724
1725 // insert new br
1726 if (is_new_line) {
1727 myhtml_tree_node_t *text_node = myhtml_node_create(self->tree, MyHTML_TAG_BR, myhtml_node_namespace(self));
1728 if (!text_node->token) {
1729 text_node->token = myhtml_token_node_create(self->tree->token, self->tree->mcasync_rules_token_id);
1730 if (!text_node->token) {
1731 myhtml_tree_node_delete(text_node);
1732 sub_croak(cv, "myhtml_token_node_create failed");
1733 }
1734 text_node->token->type |= MyHTML_TOKEN_TYPE_CLOSE_SELF | MyHTML_TOKEN_TYPE_DONE;
1735 }
1736
1737 if (ix == 5) { // outerText
1738 myhtml_tree_node_insert_before(self, text_node);
1739 } else { // innerText
1740 myhtml_tree_node_add_child(self, text_node);
1741 }
1742 }
1743
1744 if (!is_end) {
1745 if (text_str[i] == '\r' && text_str[i + 1] == '\n')
1746 ++i;
1747 last_pos = i + 1;
1748 }
1749 }
1750 }
1751 }
1752 // text, textContent
1753 else {
1754 myhtml_tree_node_t *text_node = myhtml_node_create(self->tree, MyHTML_TAG__TEXT, myhtml_node_namespace(self));
1755 myhtml_node_text_set(text_node, text_str, text_len, MyENCODING_DEFAULT);
1756 myhtml_tree_node_add_child(self, text_node);
1757 }
1758
1759 RETVAL = SvREFCNT_inc(ST(0));
1760
1761 if (ix == 5) {
1762 // remove self, if outerText
1763 myhtml_tree_node_remove(self);
1764 }
1765 } else { // recursive serialize node to text
1766 // innerText, outerText
1767 if (ix == 2 || ix == 5) {
1768 html5_dom_inner_text_state_t state = {0};
1769 state.last_br = true;
1770 state.new_line = true;
1771
1772 mycore_string_init(self->tree->mchar, self->tree->mchar_node_id, &state.value, 1);
1773
1774 myhtml_tree_node_t *next = myhtml_node_child(self);
1775 while (next) {
1776 html5_dom_recursive_node_inner_text(next, &state);
1777 next = myhtml_node_next(next);
1778 }
1779 html5_dom_rtrim_mystring(&state.value, ' ');
1780
1781 RETVAL = newSVpv_utf8_auto(self->tree, state.value.length ? state.value.data : "", state.value.length);
1782 mycore_string_destroy(&state.value, 0);
1783 }
1784 // text, textContent
1785 else {
1786 RETVAL = newSVpv_utf8_auto(self->tree, "", 0);
1787 html5_dom_recursive_node_text(self, RETVAL);
1788 }
1789 }
1790 }
1791 OUTPUT:
1792 RETVAL
1793
1794 # Wait for node parsing done (when async mode) - removed
1795 SV *
1796 wait(HTML5::DOM::Node self, bool deep = false)
1797 CODE:
1798 RETVAL = SvREFCNT_inc(ST(0));
1799 OUTPUT:
1800 RETVAL
1801
1802 # True if node parsing done (when async mode) - removed
1803 int
1804 parsed(HTML5::DOM::Node self, bool deep = false)
1805 CODE:
1806 RETVAL = 1;
1807 OUTPUT:
1808 RETVAL
1809
1810 # Next element
1811 SV *
1812 next(HTML5::DOM::Node self)
1813 ALIAS:
1814 nextElementSibling = 1
1815 CODE:
1816 myhtml_tree_node_t *node = myhtml_node_next(self);
1817 while (node && !node_is_element(node))
1818 node = myhtml_node_next(node);
1819 RETVAL = node_to_sv(node);
1820 OUTPUT:
1821 RETVAL
1822
1823 # Next node
1824 SV *
1825 nextNode(HTML5::DOM::Node self)
1826 ALIAS:
1827 nextSibling = 1
1828 CODE:
1829 RETVAL = node_to_sv(myhtml_node_next(self));
1830 OUTPUT:
1831 RETVAL
1832
1833 # Prev element
1834 SV *
1835 prev(HTML5::DOM::Node self)
1836 ALIAS:
1837 previousElementSibling = 1
1838 CODE:
1839 myhtml_tree_node_t *node = myhtml_node_prev(self);
1840 while (node && !node_is_element(node))
1841 node = myhtml_node_prev(node);
1842 RETVAL = node_to_sv(node);
1843 OUTPUT:
1844 RETVAL
1845
1846 # Prev node
1847 SV *
1848 prevNode(HTML5::DOM::Node self)
1849 ALIAS:
1850 previousSibling = 1
1851 CODE:
1852 RETVAL = node_to_sv(myhtml_node_prev(self));
1853 OUTPUT:
1854 RETVAL
1855
1856 # Parent node
1857 SV *
1858 parent(HTML5::DOM::Node self)
1859 ALIAS:
1860 isConnected = 1
1861 parentNode = 2
1862 parentElement = 3
1863 CODE:
1864 RETVAL = ix == 1 ? newSViv(myhtml_node_parent(self) ? 1 : 0) : node_to_sv(myhtml_node_parent(self));
1865 OUTPUT:
1866 RETVAL
1867
1868 # Owner document
1869 SV *
1870 document(HTML5::DOM::Node self)
1871 ALIAS:
1872 ownerDocument = 1
1873 CODE:
1874 RETVAL = node_to_sv(myhtml_tree_get_document(self->tree));
1875 OUTPUT:
1876 RETVAL
1877
1878 # Remove node from tree
1879 SV *
1880 remove(HTML5::DOM::Node self, HTML5::DOM::Node node = NULL)
1881 ALIAS:
1882 removeChild = 1
1883 CODE:
1884 if (ix == 1) {
1885 if (!node)
1886 sub_croak(cv, "%s is not of type %s", "node", "HTML5::DOM::Node");
1887 if (node->parent != self)
1888 sub_croak(cv, "The node to be removed is not a child of this node.");
1889 RETVAL = node_to_sv(myhtml_tree_node_remove(node));
1890 } else {
1891 RETVAL = node_to_sv(myhtml_tree_node_remove(self));
1892 }
1893 OUTPUT:
1894 RETVAL
1895
1896 # Append child to parent before current node
1897 SV *
1898 before(HTML5::DOM::Node self, HTML5::DOM::Node a, HTML5::DOM::Node b = NULL)
1899 ALIAS:
1900 insertBefore = 1
1901 CODE:
1902 myhtml_tree_node_t *reference_node, *new_node;
1903
1904 if (ix == 1) {
1905 new_node = a;
1906 reference_node = b;
1907
1908 if (!reference_node)
1909 sub_croak(cv, "%s is not of type %s", "reference_node", "HTML5::DOM::Node");
1910 if (reference_node->parent != self)
1911 sub_croak(cv, "The node before which the new node is to be inserted is not a child of this node.");
1912 } else {
1913 new_node = a;
1914 reference_node = self;
1915 }
1916
1917 if (!myhtml_node_parent(reference_node))
1918 sub_croak(cv, "can't insert before detached node");
1919
1920 if (reference_node->tree != new_node->tree) {
1921 myhtml_tree_node_remove(new_node);
1922 new_node = html5_dom_recursive_clone_node(reference_node->tree, new_node, NULL);
1923 if (!new_node)
1924 sub_croak(cv, "node copying internal error");
1925 }
1926
1927 if (html5_dom_is_fragment(new_node)) {
1928 myhtml_tree_node_t *fragment_child = myhtml_node_child(new_node);
1929 while (fragment_child) {
1930 myhtml_tree_node_t *next = myhtml_node_next(fragment_child);
1931 myhtml_tree_node_remove(fragment_child);
1932 myhtml_tree_node_insert_before(reference_node, fragment_child);
1933 fragment_child = next;
1934 }
1935 } else {
1936 myhtml_tree_node_remove(new_node);
1937 myhtml_tree_node_insert_before(reference_node, new_node);
1938 }
1939
1940 if (ix == 1) {
1941 RETVAL = node_to_sv(new_node);
1942 } else {
1943 RETVAL = SvREFCNT_inc(ST(0));
1944 }
1945 OUTPUT:
1946 RETVAL
1947
1948 # Append child to parent after current node
1949 SV *
1950 after(HTML5::DOM::Node self, HTML5::DOM::Node a, HTML5::DOM::Node b = NULL)
1951 ALIAS:
1952 insertAfter = 1
1953 CODE:
1954 myhtml_tree_node_t *reference_node, *new_node;
1955
1956 if (ix == 1) {
1957 new_node = a;
1958 reference_node = b;
1959
1960 if (!reference_node)
1961 sub_croak(cv, "%s is not of type %s", "reference_node", "HTML5::DOM::Node");
1962 if (reference_node->parent != self)
1963 sub_croak(cv, "The node after which the new node is to be inserted is not a child of this node.");
1964 } else {
1965 new_node = a;
1966 reference_node = self;
1967 }
1968
1969 if (!myhtml_node_parent(reference_node))
1970 sub_croak(cv, "can't insert before detached node");
1971
1972 if (reference_node->tree != new_node->tree) {
1973 myhtml_tree_node_remove(new_node);
1974 new_node = html5_dom_recursive_clone_node(reference_node->tree, new_node, NULL);
1975 if (!new_node)
1976 sub_croak(cv, "node copying internal error");
1977 }
1978
1979 if (html5_dom_is_fragment(new_node)) {
1980 myhtml_tree_node_t *fragment_child = myhtml_node_last_child(new_node);
1981 while (fragment_child) {
1982 myhtml_tree_node_t *next = myhtml_node_prev(fragment_child);
1983 myhtml_tree_node_remove(fragment_child);
1984 myhtml_tree_node_insert_after(reference_node, fragment_child);
1985 fragment_child = next;
1986 }
1987 } else {
1988 myhtml_tree_node_remove(new_node);
1989 myhtml_tree_node_insert_after(reference_node, new_node);
1990 }
1991
1992 if (ix == 1) {
1993 RETVAL = node_to_sv(new_node);
1994 } else {
1995 RETVAL = SvREFCNT_inc(ST(0));
1996 }
1997 OUTPUT:
1998 RETVAL
1999
2000 # Append node child
2001 SV *
2002 append(HTML5::DOM::Node self, HTML5::DOM::Node child)
2003 ALIAS:
2004 appendChild = 1
2005 CODE:
2006 if (!node_is_element(self))
2007 sub_croak(cv, "can't append children to non-element node");
2008
2009 if (self->tree != child->tree) {
2010 myhtml_tree_node_remove(child);
2011 child = html5_dom_recursive_clone_node(self->tree, child, NULL);
2012 if (!child)
2013 sub_croak(cv, "node copying internal error");
2014 }
2015
2016 if (html5_dom_is_fragment(child)) {
2017 myhtml_tree_node_t *fragment_child = myhtml_node_child(child);
2018 while (fragment_child) {
2019 myhtml_tree_node_t *next = myhtml_node_next(fragment_child);
2020 myhtml_tree_node_remove(fragment_child);
2021 myhtml_tree_node_add_child(self, fragment_child);
2022 fragment_child = next;
2023 }
2024 } else {
2025 myhtml_tree_node_remove(child);
2026 myhtml_tree_node_add_child(self, child);
2027 }
2028
2029 if (ix == 1) {
2030 RETVAL = node_to_sv(child);
2031 } else {
2032 RETVAL = SvREFCNT_inc(ST(0));
2033 }
2034 OUTPUT:
2035 RETVAL
2036
2037 # Prepend node child
2038 SV *
2039 prepend(HTML5::DOM::Node self, HTML5::DOM::Node child)
2040 ALIAS:
2041 prependChild = 1
2042 CODE:
2043 if (!node_is_element(self))
2044 sub_croak(cv, "can't prepend children to non-element node");
2045
2046 if (self->tree != child->tree) {
2047 myhtml_tree_node_remove(child);
2048 child = html5_dom_recursive_clone_node(self->tree, child, NULL);
2049 if (!child)
2050 sub_croak(cv, "node copying internal error");
2051 }
2052
2053 myhtml_tree_node_t *first_node = myhtml_node_child(self);
2054 if (html5_dom_is_fragment(child)) {
2055 myhtml_tree_node_t *fragment_child = myhtml_node_child(child);
2056 while (fragment_child) {
2057 myhtml_tree_node_t *next = myhtml_node_next(fragment_child);
2058 myhtml_tree_node_remove(fragment_child);
2059 if (first_node) {
2060 myhtml_tree_node_insert_before(first_node, fragment_child);
2061 } else {
2062 myhtml_tree_node_add_child(self, fragment_child);
2063 }
2064 fragment_child = next;
2065 }
2066 } else {
2067 myhtml_tree_node_remove(child);
2068 if (first_node) {
2069 myhtml_tree_node_insert_before(first_node, child);
2070 } else {
2071 myhtml_tree_node_add_child(self, child);
2072 }
2073 }
2074
2075 if (ix == 1) {
2076 RETVAL = node_to_sv(child);
2077 } else {
2078 RETVAL = SvREFCNT_inc(ST(0));
2079 }
2080 OUTPUT:
2081 RETVAL
2082
2083 # Replace node with child
2084 SV *
2085 replace(HTML5::DOM::Node self, HTML5::DOM::Node a, HTML5::DOM::Node b = NULL)
2086 ALIAS:
2087 replaceChild = 1
2088 CODE:
2089 myhtml_tree_node_t *old_node, *new_node;
2090
2091 if (ix == 1) {
2092 new_node = a;
2093 old_node = b;
2094
2095 if (!old_node)
2096 sub_croak(cv, "%s is not of type %s", "old_node", "HTML5::DOM::Node");
2097 if (old_node->parent != self)
2098 sub_croak(cv, "The node to be replaced is not a child of this node.");
2099 } else {
2100 new_node = a;
2101 old_node = self;
2102 }
2103
2104 if (old_node->tree != new_node->tree) {
2105 myhtml_tree_node_remove(new_node);
2106 new_node = html5_dom_recursive_clone_node(old_node->tree, new_node, NULL);
2107 if (!new_node)
2108 sub_croak(cv, "node copying internal error");
2109 }
2110
2111 if (html5_dom_is_fragment(new_node)) {
2112 myhtml_tree_node_t *fragment_child = myhtml_node_child(new_node);
2113 while (fragment_child) {
2114 myhtml_tree_node_t *next = myhtml_node_next(fragment_child);
2115 myhtml_tree_node_remove(fragment_child);
2116 myhtml_tree_node_insert_before(old_node, fragment_child);
2117 fragment_child = next;
2118 }
2119 } else {
2120 myhtml_tree_node_remove(new_node);
2121 myhtml_tree_node_insert_before(old_node, new_node);
2122 }
2123
2124 myhtml_tree_node_remove(old_node);
2125
2126 RETVAL = (ix == 1 ? node_to_sv(old_node) : SvREFCNT_inc(ST(0)));
2127 OUTPUT:
2128 RETVAL
2129
2130 # Clone node
2131 SV *
2132 clone(HTML5::DOM::Node self, bool deep = false, HTML5::DOM::Tree new_tree = NULL)
2133 ALIAS:
2134 cloneNode = 1
2135 CODE:
2136 myhtml_tree_t *tree = new_tree ? new_tree->tree : self->tree;
2137 if (deep) {
2138 RETVAL = node_to_sv(html5_dom_recursive_clone_node(tree, self, NULL));
2139 } else {
2140 RETVAL = node_to_sv(html5_dom_copy_foreign_node(tree, self));
2141 }
2142 OUTPUT:
2143 RETVAL
2144
2145 # True if node is void
2146 bool
2147 void(HTML5::DOM::Node self)
2148 CODE:
2149 RETVAL = myhtml_node_is_void_element(self);
2150 OUTPUT:
2151 RETVAL
2152
2153 # True if node is self-closed
2154 bool
2155 selfClosed(HTML5::DOM::Node self)
2156 CODE:
2157 RETVAL = myhtml_node_is_close_self(self);
2158 OUTPUT:
2159 RETVAL
2160
2161 # Node position in text input
2162 SV *
2163 position(HTML5::DOM::Node self)
2164 CODE:
2165 HV *hash = newHV();
2166 hv_store_ent(hash, sv_2mortal(newSVpv_utf8_auto(self->tree, "raw_begin", 9)), newSViv(self->token ? self->token->raw_begin : 0), 0);
2167 hv_store_ent(hash, sv_2mortal(newSVpv_utf8_auto(self->tree, "raw_length", 10)), newSViv(self->token ? self->token->raw_length : 0), 0);
2168 hv_store_ent(hash, sv_2mortal(newSVpv_utf8_auto(self->tree, "element_begin", 13)), newSViv(self->token ? self->token->element_begin : 0), 0);
2169 hv_store_ent(hash, sv_2mortal(newSVpv_utf8_auto(self->tree, "element_length", 14)), newSViv(self->token ? self->token->element_length : 0), 0);
2170 RETVAL = newRV_noinc((SV *) hash);
2171 OUTPUT:
2172 RETVAL
2173
2174 # Some bad idea to get "uniq id"
2175 SV *
2176 hash(HTML5::DOM::Node self)
2177 CODE:
2178 RETVAL = newSViv(PTR2IV(self));
2179 OUTPUT:
2180 RETVAL
2181
2182 # Compare node reference
2183 bool
2184 isSameNode(HTML5::DOM::Node self, SV *other_node)
2185 CODE:
2186 RETVAL = false;
2187 if (sv_derived_from(other_node, "HTML5::DOM::Node")) {
2188 myhtml_tree_node_t *node = INT2PTR(myhtml_tree_node_t *, SvIV((SV*)SvRV(other_node)));
2189 if (node == self)
2190 RETVAL = true;
2191 }
2192 OUTPUT:
2193 RETVAL
2194
2195 void
2196 DESTROY(HTML5::DOM::Node self)
2197 CODE:
2198 SV *sv = (SV *) myhtml_node_get_data(self);
2199
2200 DOM_GC_TRACE("DOM::Node::DESTROY (refcnt=%d)", sv ? SvREFCNT(sv) : -666);
2201
2202 if (sv) {
2203 html5_dom_tree_t *tree = (html5_dom_tree_t *) self->tree->context;
2204 myhtml_node_set_data(self, NULL);
2205 // detached node, can be deleted
2206 if (!myhtml_node_parent(self) && self != myhtml_tree_get_document(self->tree)) {
2207 if (self == self->tree->node_html) {
2208 self->tree->node_html = NULL;
2209 } else if (self == self->tree->node_body) {
2210 self->tree->node_body = NULL;
2211 } else if (self == self->tree->node_head) {
2212 self->tree->node_head = NULL;
2213 } else if (self == self->tree->node_form) {
2214 self->tree->node_form = NULL;
2215 } else if (self == self->tree->fragment) {
2216 self->tree->fragment = NULL;
2217 } else if (self == self->tree->document) {
2218 self->tree->document = NULL;
2219 }
2220 DOM_GC_TRACE("=> DOM::Node::FREE");
2221 html5_tree_node_delete_recursive(self);
2222 }
2223 SvREFCNT_dec(tree->sv);
2224 }
2225
2226 #################################################################
2227 # HTML5::DOM::Element (extends Node)
2228 #################################################################
2229 MODULE = HTML5::DOM PACKAGE = HTML5::DOM::Element
2230 # Find by css query
2231 SV *
2232 find(HTML5::DOM::Element self, SV *query, SV *combinator = NULL)
2233 ALIAS:
2234 at = 1
2235 querySelector = 2
2236 querySelectorAll = 3
2237 CODE:
2238 html5_dom_tree_t *tree_context = (html5_dom_tree_t *) self->tree->context;
2239 RETVAL = html5_node_find(cv, tree_context->parser, self, query, combinator, ix == 1 || ix == 2);
2240 OUTPUT:
2241 RETVAL
2242
2243 # findTag(val), getElementsByTagName(val) - get nodes by tag name
2244 # findClass(val), getElementsByClassName(val) - get nodes by class name
2245 # findId(val), getElementById(val) - get node by id
2246 # findAttr(key), getElementByAttribute(key) - get nodes by attribute key
2247 # findAttr(key, val, case, cmp), getElementByAttribute(key, val, case, cmp) - get nodes by attribute value
2248 SV *
2249 findTag(HTML5::DOM::Element self, SV *key, SV *val = NULL, bool icase = false, SV *cmp = NULL)
2250 ALIAS:
2251 getElementsByTagName = 1
2252 findClass = 2
2253 getElementsByClassName = 3
2254 findId = 4
2255 getElementById = 5
2256 findAttr = 6
2257 getElementByAttribute = 7
2258 CODE:
2259 RETVAL = html5_node_simple_find(cv, self, key, val, cmp, icase, ix);
2260 OUTPUT:
2261 RETVAL
2262
2263 # First child element
2264 SV *
2265 first(HTML5::DOM::Element self)
2266 ALIAS:
2267 firstElementChild = 1
2268 CODE:
2269 myhtml_tree_node_t *node = myhtml_node_child(self);
2270 while (node && !node_is_element(node))
2271 node = myhtml_node_next(node);
2272 RETVAL = node_to_sv(node);
2273 OUTPUT:
2274 RETVAL
2275
2276 # First child node
2277 SV *
2278 firstNode(HTML5::DOM::Element self)
2279 ALIAS:
2280 firstChild = 1
2281 CODE:
2282 RETVAL = node_to_sv(myhtml_node_child(self));
2283 OUTPUT:
2284 RETVAL
2285
2286 # Last child element
2287 SV *
2288 last(HTML5::DOM::Element self)
2289 ALIAS:
2290 lastElementChild = 1
2291 CODE:
2292 myhtml_tree_node_t *node = myhtml_node_last_child(self);
2293 while (node && !node_is_element(node))
2294 node = myhtml_node_prev(node);
2295 RETVAL = node_to_sv(node);
2296 OUTPUT:
2297 RETVAL
2298
2299 # Last child node
2300 SV *
2301 lastNode(HTML5::DOM::Element self)
2302 ALIAS:
2303 lastChild = 1
2304 CODE:
2305 RETVAL = node_to_sv(myhtml_node_last_child(self));
2306 OUTPUT:
2307 RETVAL
2308
2309 # return all attributes in a array
2310 SV *
2311 attrArray(HTML5::DOM::Element self)
2312 CODE:
2313 AV *array = newAV();
2314
2315 myhtml_tree_attr_t *attr = myhtml_node_attribute_first(self);
2316 while (attr) {
2317 HV *hash = newHV();
2318
2319 size_t attr_key_len = 0;
2320 const char *attr_key = myhtml_attribute_key(attr, &attr_key_len);
2321
2322 size_t attr_val_len = 0;
2323 const char *attr_val = myhtml_attribute_value(attr, &attr_val_len);
2324
2325 size_t ns_len = 0;
2326 const char *ns_name = myhtml_namespace_name_by_id(myhtml_attribute_namespace(attr), &ns_len);
2327
2328 hv_store_ent(hash, sv_2mortal(newSVpv_utf8_auto(self->tree, "name", 4)), newSVpv_utf8_auto(self->tree, attr_key ? attr_key : "", attr_key_len), 0);
2329 hv_store_ent(hash, sv_2mortal(newSVpv_utf8_auto(self->tree, "value", 5)), newSVpv_utf8_auto(self->tree, attr_val ? attr_val : "", attr_val_len), 0);
2330 hv_store_ent(hash, sv_2mortal(newSVpv_utf8_auto(self->tree, "namespace", 9)), newSVpv_utf8_auto(self->tree, ns_name ? ns_name : "", ns_len), 0);
2331
2332 av_push(array, newRV_noinc((SV *) hash));
2333
2334 attr = myhtml_attribute_next(attr);
2335 }
2336
2337 RETVAL = newRV_noinc((SV *) array);
2338 OUTPUT:
2339 RETVAL
2340
2341 # attr() - return all attributes in a hash
2342 # attr("key") - return value of attribute "key" (undef is not exists)
2343 # attr("key", "value") - set value for attribute "key" (return this)
2344 # attr({"key" => "value"}) - bulk set value for attribute "key" (return this)
2345 SV *
2346 attr(HTML5::DOM::Element self, SV *key = NULL, SV *value = NULL)
2347 ALIAS:
2348 setAttribute = 1
2349 getAttribute = 2
2350 CODE:
2351 RETVAL = &PL_sv_undef;
2352
2353 if (ix == 1) { // setAttribute
2354 if (!key)
2355 sub_croak(cv, "attribute key required for setAttribute");
2356
2357 if (!value)
2358 sub_croak(cv, "attribute value required for setAttribute");
2359 } else if (ix == 2) { // getAttribute
2360 if (!key)
2361 sub_croak(cv, "attribute key required for getAttribute");
2362
2363 key = sv_stringify(key);
2364 value = NULL;
2365 }
2366
2367 if (key && value) { // Set value by key or delete by key
2368 key = sv_stringify(key);
2369 value = sv_stringify(value);
2370
2371 STRLEN key_len = 0;
2372 const char *key_str = SvPV_const(key, key_len);
2373
2374 if (key_len) {
2375 // if value is undef - only remove attribute
2376 if (SvTYPE(value) != SVt_NULL) {
2377 STRLEN val_len = 0;
2378 const char *val_str = SvPV_const(value, val_len);
2379 html5_dom_replace_attr_value(self, key_str, key_len, val_str, val_len, MyENCODING_DEFAULT);
2380 } else {
2381 myhtml_attribute_remove_by_key(self, key_str, key_len);
2382 }
2383 }
2384
2385 // return self
2386 RETVAL = SvREFCNT_inc(ST(0));
2387 } else if (key && !value) {
2388 // Bulk attr set
2389 if (SvROK(key) && SvTYPE(SvRV(key)) == SVt_PVHV) {
2390 HE *entry;
2391 HV *hash = (HV *) SvRV(key);
2392
2393 while ((entry = hv_iternext(hash)) != NULL) {
2394 SV *value = hv_iterval(hash, entry);
2395 I32 key_len;
2396 const char *key_name = hv_iterkey(entry, &key_len);
2397 if (value && key_len) {
2398 value = sv_stringify(value);
2399
2400 // if value is undef - only remove attribute
2401 if (SvTYPE(value) != SVt_NULL) {
2402 STRLEN val_len = 0;
2403 const char *val_str = SvPV_const(value, val_len);
2404 html5_dom_replace_attr_value(self, key_name, key_len, val_str, val_len, MyENCODING_DEFAULT);
2405 } else {
2406 myhtml_attribute_remove_by_key(self, key_name, key_len);
2407 }
2408 }
2409 }
2410
2411 RETVAL = SvREFCNT_inc(ST(0));
2412 }
2413 // Get attribute by key
2414 else {
2415 key = sv_stringify(key);
2416
2417 STRLEN key_len = 0;
2418 const char *key_str = SvPV_const(key, key_len);
2419
2420 if (key_len) {
2421 myhtml_tree_attr_t *attr = myhtml_attribute_by_key(self, key_str, key_len);
2422 if (attr) {
2423 size_t attr_val_len = 0;
2424 const char *attr_val = myhtml_attribute_value(attr, &attr_val_len);
2425 RETVAL = newSVpv_utf8_auto(self->tree, attr_val ? attr_val : "", attr_val_len);
2426 }
2427 }
2428 }
2429 } else { // Return all attributes in hash
2430 HV *hash = newHV();
2431
2432 myhtml_tree_attr_t *attr = myhtml_node_attribute_first(self);
2433 while (attr) {
2434 size_t attr_key_len = 0;
2435 const char *attr_key = myhtml_attribute_key(attr, &attr_key_len);
2436
2437 size_t attr_val_len = 0;
2438 const char *attr_val = myhtml_attribute_value(attr, &attr_val_len);
2439
2440 hv_store_ent(hash, sv_2mortal(newSVpv_utf8_auto(self->tree, attr_key ? attr_key : "", attr_key_len)), newSVpv_utf8_auto(self->tree, attr_val ? attr_val : "", attr_val_len), 0);
2441
2442 attr = myhtml_attribute_next(attr);
2443 }
2444
2445 RETVAL = newRV_noinc((SV *) hash);
2446 }
2447 OUTPUT:
2448 RETVAL
2449
2450 # Remove attribute by key
2451 SV *
2452 removeAttr(HTML5::DOM::Element self, SV *key = NULL)
2453 ALIAS:
2454 removeAttribute = 1
2455 CODE:
2456 key = sv_stringify(key);
2457
2458 STRLEN key_len = 0;
2459 const char *key_str = SvPV_const(key, key_len);
2460
2461 if (key_len)
2462 myhtml_attribute_remove_by_key(self, key_str, key_len);
2463
2464 RETVAL = SvREFCNT_inc(ST(0));
2465 OUTPUT:
2466 RETVAL
2467
2468 # Return collection with children elements
2469 SV *
2470 children(HTML5::DOM::Element self)
2471 CODE:
2472 myhtml_tree_node_t *child = myhtml_node_child(self);
2473 AV *arr = newAV();
2474
2475 while (child) {
2476 if (node_is_element(child))
2477 av_push(arr, node_to_sv(child));
2478 child = myhtml_node_next(child);
2479 }
2480
2481 RETVAL = sv_bless(newRV_noinc((SV *) arr), gv_stashpv("HTML5::DOM::Collection", 0));
2482 OUTPUT:
2483 RETVAL
2484
2485 # Return collection with children nodes
2486 SV *
2487 childrenNode(HTML5::DOM::Element self)
2488 ALIAS:
2489 childNodes = 1
2490 CODE:
2491 myhtml_tree_node_t *child = myhtml_node_child(self);
2492 AV *arr = newAV();
2493
2494 while (child) {
2495 av_push(arr, node_to_sv(child));
2496 child = myhtml_node_next(child);
2497 }
2498
2499 RETVAL = sv_bless(newRV_noinc((SV *) arr), gv_stashpv("HTML5::DOM::Collection", 0));
2500 OUTPUT:
2501 RETVAL
2502
2503 # Return default display property for tag
2504 SV *
2505 getDefaultBoxType(HTML5::DOM::Element self)
2506 CODE:
2507 const char *ret = NULL;
2508 switch (html5_dom_get_ua_display_prop(self)) {
2509 case TAG_UA_STYLE_NONE:
2510 ret = "none";
2511 break;
2512 case TAG_UA_STYLE_INLINE:
2513 ret = "inline";
2514 break;
2515 case TAG_UA_STYLE_BLOCK:
2516 ret = "block";
2517 break;
2518 case TAG_UA_STYLE_INLINE_BLOCK:
2519 ret = "inline-block";
2520 break;
2521 case TAG_UA_STYLE_LIST_ITEM:
2522 ret = "list-item";
2523 break;
2524 case TAG_UA_STYLE_TABLE:
2525 ret = "table";
2526 break;
2527 case TAG_UA_STYLE_TABLE_CAPTION:
2528 ret = "table-caption";
2529 break;
2530 case TAG_UA_STYLE_TABLE_CELL:
2531 ret = "table-cell";
2532 break;
2533 case TAG_UA_STYLE_TABLE_COLUMN:
2534 ret = "table-column";
2535 break;
2536 case TAG_UA_STYLE_TABLE_COLUMN_GROUP:
2537 ret = "table-column-group";
2538 break;
2539 case TAG_UA_STYLE_TABLE_HEADER_GROUP:
2540 ret = "table-header-group";
2541 break;
2542 case TAG_UA_STYLE_TABLE_FOOTER_GROUP:
2543 ret = "table-footer-group";
2544 break;
2545 case TAG_UA_STYLE_TABLE_ROW:
2546 ret = "table-row";
2547 break;
2548 case TAG_UA_STYLE_TABLE_ROW_GROUP:
2549 ret = "table-row-group";
2550 break;
2551 case TAG_UA_STYLE_RUBY:
2552 ret = "ruby";
2553 break;
2554 case TAG_UA_STYLE_RUBY_BASE:
2555 ret = "ruby-base";
2556 break;
2557 case TAG_UA_STYLE_RUBY_TEXT:
2558 ret = "ruby-text";
2559 break;
2560 case TAG_UA_STYLE_RUBY_TEXT_CONTAINER:
2561 ret = "ruby-text-container";
2562 break;
2563 }
2564
2565 RETVAL = ret ? newSVpv_utf8_auto(self->tree, ret, strlen(ret)) : &PL_sv_undef;
2566 OUTPUT:
2567 RETVAL
2568
2569 #################################################################
2570 # HTML5::DOM::DocType (extends Node)
2571 #################################################################
2572 MODULE = HTML5::DOM PACKAGE = HTML5::DOM::DocType
2573 SV *name(HTML5::DOM::DocType self, SV *value = NULL)
2574 ALIAS:
2575 publicId = 1
2576 systemId = 2
2577 CODE:
2578 static const char *TYPE_SYSTEM = "SYSTEM";
2579 static const char *TYPE_PUBLIC = "PUBLIC";
2580
2581 myhtml_tree_attr_t *root_name = self->token ? self->token->attr_first : NULL;
2582 myhtml_tree_attr_t *restrict_type = root_name ? root_name->next : NULL;
2583 myhtml_tree_attr_t *public_id = restrict_type ? restrict_type->next : NULL;
2584 myhtml_tree_attr_t *system_id = public_id ? public_id->next : NULL;
2585
2586 if (restrict_type && restrict_type->value.length == 6) {
2587 if (mycore_strcasecmp(restrict_type->value.data, "SYSTEM") == 0) {
2588 system_id = public_id;
2589 public_id = NULL;
2590 }
2591 }
2592
2593 if (value) {
2594 value = sv_stringify(value);
2595
2596 myhtml_tree_attr_t *attr_first = self->token ? self->token->attr_first : NULL;
2597 myhtml_tree_attr_t *attr_last = self->token ? self->token->attr_last : NULL;
2598
2599 STRLEN val_len = 0;
2600 const char *val_str = SvPV_const(value, val_len);
2601
2602 // root element name
2603 if (ix == 0) {
2604 myhtml_attribute_add(self, val_str, val_len, "", 0, MyENCODING_DEFAULT);
2605 } else {
2606 myhtml_attribute_add(self, root_name && root_name->key.length ? root_name->key.data : "", root_name ? root_name->key.length : 0, "", 0, MyENCODING_DEFAULT);
2607 }
2608
2609 const char *restrict_type_str = NULL;
2610
2611 if ((ix == 2 && val_len) || (system_id && system_id->value.length))
2612 restrict_type_str = TYPE_SYSTEM;
2613
2614 if ((ix == 1 && val_len) || (public_id && public_id->value.length))
2615 restrict_type_str = TYPE_PUBLIC;
2616
2617 if (restrict_type_str) {
2618 // SYSTEM or PUBLIC
2619 myhtml_attribute_add(self, "", 0, restrict_type_str, 6, MyENCODING_DEFAULT);
2620
2621 if (restrict_type_str == TYPE_PUBLIC) {
2622 // publicId
2623 if (ix == 1) {
2624 myhtml_attribute_add(self, "", 0, val_str, val_len, MyENCODING_DEFAULT);
2625 } else {
2626 myhtml_attribute_add(self, "", 0, public_id && public_id->value.length ? public_id->value.data : "", public_id ? public_id->value.length : 0, MyENCODING_DEFAULT);
2627 }
2628 }
2629
2630 // systemId
2631 if (ix == 2) {
2632 myhtml_attribute_add(self, "", 0, val_str, val_len, MyENCODING_DEFAULT);
2633 } else {
2634 myhtml_attribute_add(self, "", 0, system_id && system_id->value.length ? system_id->value.data : "", system_id ? system_id->value.length : 0, MyENCODING_DEFAULT);
2635 }
2636 }
2637
2638 // remove old
2639 while (attr_last && attr_first) {
2640 myhtml_tree_attr_t *next = attr_first->next;
2641 myhtml_attribute_delete(self->tree, self, attr_first);
2642
2643 if (attr_first == attr_last)
2644 break;
2645
2646 attr_first = next;
2647 }
2648
2649 RETVAL = SvREFCNT_inc(ST(0));
2650 } else {
2651 RETVAL = &PL_sv_undef;
2652
2653 switch (ix) {
2654 case 0: /* name */
2655 RETVAL = newSVpv_utf8_auto(self->tree, root_name && root_name->key.length ? root_name->key.data : "", root_name ? root_name->key.length : 0);
2656 break;
2657
2658 case 1: /* publicId */
2659 RETVAL = newSVpv_utf8_auto(self->tree, public_id && public_id->value.length ? public_id->value.data : "", public_id ? public_id->value.length : 0);
2660 break;
2661
2662 case 2: /* systemId */
2663 RETVAL = newSVpv_utf8_auto(self->tree, system_id && system_id->value.length ? system_id->value.data : "", system_id ? system_id->value.length : 0);
2664 break;
2665 }
2666 }
2667 OUTPUT:
2668 RETVAL
2669
2670 #################################################################
2671 # HTML5::DOM::CSS (Parser)
2672 #################################################################
2673 MODULE = HTML5::DOM PACKAGE = HTML5::DOM::CSS
2674 HTML5::DOM::CSS
2675 new(SV *CLASS, HV *options = NULL)
2676 CODE:
2677 DOM_GC_TRACE("DOM::CSS::new");
2678 mystatus_t status;
2679
2680 mycss_t *mycss = mycss_create();
2681 status = mycss_init(mycss);
2682 if (status) {
2683 mycss_destroy(mycss, 1);
2684 sub_croak(cv, "mycss_init failed: %d (%s)", status, modest_strerror(status));
2685 }
2686
2687 mycss_entry_t *entry = mycss_entry_create();
2688 status = mycss_entry_init(mycss, entry);
2689 if (status) {
2690 mycss_destroy(mycss, 1);
2691 mycss_entry_destroy(entry, 1);
2692 sub_croak(cv, "mycss_entry_init failed: %d (%s)", status, modest_strerror(status));
2693 }
2694
2695 html5_css_parser_t *self = (html5_css_parser_t *) safemalloc(sizeof(html5_css_parser_t));
2696 self->mycss = mycss;
2697 self->entry = entry;
2698 self->encoding = MyENCODING_UTF_8;
2699
2700 html5_dom_parse_options(&self->opts, NULL, options);
2701
2702 RETVAL = self;
2703 OUTPUT:
2704 RETVAL
2705
2706 # Parse css selector
2707 SV *
2708 parseSelector(HTML5::DOM::CSS self, SV *query, HV *options = NULL)
2709 CODE:
2710 mystatus_t status;
2711
2712 html5_dom_options_t opts;
2713 html5_dom_parse_options(&opts, &self->opts, options);
2714
2715 query = sv_stringify(query);
2716
2717 STRLEN query_len;
2718 const char *query_str = SvPV_const(query, query_len);
2719
2720 mycss_selectors_list_t *list = mycss_selectors_parse(mycss_entry_selectors(self->entry), MyENCODING_UTF_8, query_str, query_len, &status);
2721
2722 DOM_GC_TRACE("DOM::CSS::Selector::NEW");
2723 html5_css_selector_t *selector = (html5_css_selector_t *) safemalloc(sizeof(html5_css_selector_t));
2724 selector->parent = SvRV(ST(0));
2725 selector->list = list;
2726 selector->parser = self;
2727
2728 if (opts.utf8 == 2) {
2729 selector->utf8 = SvUTF8(query) ? 1 : 0;
2730 } else {
2731 selector->utf8 = opts.utf8 != 0;
2732 }
2733
2734 SvREFCNT_inc(selector->parent);
2735 RETVAL = pack_pointer("HTML5::DOM::CSS::Selector", selector);
2736 OUTPUT:
2737 RETVAL
2738
2739 void
2740 DESTROY(HTML5::DOM::CSS self)
2741 CODE:
2742 DOM_GC_TRACE("DOM::CSS::DESTROY (refs=%d)", SvREFCNT(SvRV(ST(0))));
2743 mycss_entry_destroy(self->entry, 1);
2744 mycss_destroy(self->mycss, 1);
2745 safefree(self);
2746
2747
2748 #################################################################
2749 # HTML5::DOM::CSS::Selector
2750 #################################################################
2751 MODULE = HTML5::DOM PACKAGE = HTML5::DOM::CSS::Selector
2752
2753 # Serialize selector to text
2754 SV *
2755 text(HTML5::DOM::CSS::Selector self)
2756 CODE:
2757 RETVAL = newSVpv_utf8_auto_css(self, "", 0);
2758 if (self->list)
2759 mycss_selectors_serialization_list(mycss_entry_selectors(self->parser->entry), self->list, sv_serialization_callback, RETVAL);
2760 OUTPUT:
2761 RETVAL
2762
2763 # True, if selector is valid
2764 bool
2765 valid(HTML5::DOM::CSS::Selector self)
2766 CODE:
2767 RETVAL = self->list ? !(self->list->flags & MyCSS_SELECTORS_FLAGS_SELECTOR_BAD) : 0;
2768 OUTPUT:
2769 RETVAL
2770
2771 # Return AST tree
2772 SV *
2773 ast(HTML5::DOM::CSS::Selector self)
2774 CODE:
2775 AV *result = newAV();
2776 if (self->list)
2777 html5_dom_css_serialize_selector(self, self->list, result);
2778 RETVAL = newRV_noinc((SV *) result);
2779 OUTPUT:
2780 RETVAL
2781
2782 # Get count of selector entries
2783 int
2784 length(HTML5::DOM::CSS::Selector self)
2785 CODE:
2786 RETVAL = self->list ? self->list->entries_list_length : 0;
2787 OUTPUT:
2788 RETVAL
2789
2790 # Get selector entry by index
2791 SV *
2792 entry(HTML5::DOM::CSS::Selector self, int index)
2793 CODE:
2794 if (!self->list || index < 0 || index >= self->list->entries_list_length) {
2795 RETVAL = &PL_sv_undef;
2796 } else {
2797 DOM_GC_TRACE("DOM::CSS::Selector::Entry::NEW");
2798 html5_css_selector_entry_t *entry = (html5_css_selector_entry_t *) safemalloc(sizeof(html5_css_selector_entry_t));
2799 entry->parent = SvRV(ST(0));
2800 entry->selector = self;
2801 entry->list = &self->list->entries_list[index];
2802 SvREFCNT_inc(entry->parent);
2803 RETVAL = pack_pointer("HTML5::DOM::CSS::Selector::Entry", entry);
2804 }
2805 OUTPUT:
2806 RETVAL
2807
2808 # utf8(flag) - enable or disable utf8 mode
2809 # utf8() - get status of utf8 mode (0 - disabled, 1 - enabled)
2810 SV *
2811 utf8(HTML5::DOM::CSS::Selector self, SV *value = NULL)
2812 CODE:
2813 if (!value) {
2814 RETVAL = newSViv(self->utf8 ? 1 : 0);
2815 } else {
2816 value = sv_stringify(value);
2817
2818 STRLEN enc_length;
2819 const char *enc_str = SvPV_const(value, enc_length);
2820
2821 if (enc_length > 0) {
2822 if (isdigit(enc_str[0])) {
2823 self->utf8 = SvIV(value) != 0;
2824 } else {
2825 self->utf8 = 1;
2826 }
2827 }
2828
2829 self->utf8 = 0;
2830
2831 RETVAL = SvREFCNT_inc(ST(0));
2832 }
2833 OUTPUT:
2834 RETVAL
2835
2836 void
2837 DESTROY(HTML5::DOM::CSS::Selector self)
2838 CODE:
2839 DOM_GC_TRACE("DOM::CSS::Selector::DESTROY (refs=%d)", SvREFCNT(SvRV(ST(0))));
2840 if (self->list)
2841 mycss_selectors_list_destroy(mycss_entry_selectors(self->parser->entry), self->list, true);
2842 SvREFCNT_dec(self->parent);
2843 safefree(self);
2844
2845
2846 #################################################################
2847 # HTML5::DOM::CSS::Selector::Entry
2848 #################################################################
2849 MODULE = HTML5::DOM PACKAGE = HTML5::DOM::CSS::Selector::Entry
2850
2851 # Serialize selector to text
2852 SV *
2853 text(HTML5::DOM::CSS::Selector::Entry self)
2854 CODE:
2855 RETVAL = newSVpv_utf8_auto_css(self->selector, "", 0);
2856 mycss_selectors_serialization_chain(mycss_entry_selectors(self->selector->parser->entry), self->list->entry, sv_serialization_callback, RETVAL);
2857 OUTPUT:
2858 RETVAL
2859
2860 # Return AST tree
2861 SV *
2862 ast(HTML5::DOM::CSS::Selector::Entry self)
2863 CODE:
2864 AV *result = newAV();
2865 html5_dom_css_serialize_entry(self->selector, self->selector->list, self->list->entry, result);
2866 RETVAL = newRV_noinc((SV *) result);
2867 OUTPUT:
2868 RETVAL
2869
2870 # Return pseudo-element name
2871 SV *
2872 pseudoElement(HTML5::DOM::CSS::Selector::Entry self)
2873 CODE:
2874 mycss_selectors_entry_t *entry = self->list->entry;
2875 RETVAL = &PL_sv_undef;
2876 while (entry) {
2877 if (entry->type == MyCSS_SELECTORS_TYPE_PSEUDO_ELEMENT) {
2878 RETVAL = newSVpv_utf8_auto_css(self->selector, entry->key->length ? entry->key->data : "", entry->key->length);
2879 break;
2880 }
2881 entry = entry->next;
2882 }
2883 OUTPUT:
2884 RETVAL
2885
2886 # True, if selector is valid
2887 bool
2888 valid(HTML5::DOM::CSS::Selector::Entry self)
2889 CODE:
2890 RETVAL = !(self->selector->list->flags & MyCSS_SELECTORS_FLAGS_SELECTOR_BAD);
2891 OUTPUT:
2892 RETVAL
2893
2894 # Return selector specificity in hash {a, b, c}
2895 SV *
2896 specificity(HTML5::DOM::CSS::Selector::Entry self)
2897 CODE:
2898 HV *hash = newHV();
2899 hv_store_ent(hash, sv_2mortal(newSVpv_utf8_auto_css(self->selector, "a", 1)), newSViv(self->list->specificity.a), 0);
2900 hv_store_ent(hash, sv_2mortal(newSVpv_utf8_auto_css(self->selector, "b", 1)), newSViv(self->list->specificity.b), 0);
2901 hv_store_ent(hash, sv_2mortal(newSVpv_utf8_auto_css(self->selector, "c", 1)), newSViv(self->list->specificity.c), 0);
2902 RETVAL = newRV_noinc((SV *) hash);
2903 OUTPUT:
2904 RETVAL
2905
2906 # Return selector specificity in array [a, b, c]
2907 SV *
2908 specificityArray(HTML5::DOM::CSS::Selector::Entry self)
2909 CODE:
2910 AV *arr = newAV();
2911 av_push(arr, newSViv(self->list->specificity.a));
2912 av_push(arr, newSViv(self->list->specificity.b));
2913 av_push(arr, newSViv(self->list->specificity.c));
2914 RETVAL = newRV_noinc((SV *) arr);
2915 OUTPUT:
2916 RETVAL
2917
2918 void
2919 DESTROY(HTML5::DOM::CSS::Selector::Entry self)
2920 CODE:
2921 DOM_GC_TRACE("DOM::CSS::Selector::Entry::DESTROY (refs=%d)", SvREFCNT(SvRV(ST(0))));
2922 SvREFCNT_dec(self->parent);
2923 safefree(self);
2924
2925 #################################################################
2926 # HTML5::DOM::Encoding
2927 #################################################################
2928 MODULE = HTML5::DOM PACKAGE = HTML5::DOM::Encoding
2929
2930 SV *
2931 id2name(int id)
2932 CODE:
2933 size_t len = 0;
2934 const char *name = myencoding_name_by_id(id, &len);
2935 RETVAL = name ? newSVpv(name, len) : &PL_sv_undef;
2936 OUTPUT:
2937 RETVAL
2938
2939 SV *
2940 name2id(SV *text)
2941 CODE:
2942 text = sv_stringify(text);
2943
2944 STRLEN text_len;
2945 const char *text_str = SvPV_const(text, text_len);
2946
2947 myencoding_t encoding = MyENCODING_NOT_DETERMINED;
2948 myencoding_by_name(text_str, text_len, &encoding);
2949 RETVAL = encoding != MyENCODING_NOT_DETERMINED ? newSViv(encoding) : &PL_sv_undef;
2950 OUTPUT:
2951 RETVAL
2952
2953 int
2954 detect(SV *text, long max_len = 0)
2955 ALIAS:
2956 detectByPrescanStream = 1
2957 detectCyrillic = 2
2958 detectUkrainian = 21
2959 detectRussian = 22
2960 detectUnicode = 3
2961 detectBom = 4
2962 detectByCharset = 5
2963 CODE:
2964 text = sv_stringify(text);
2965
2966 STRLEN text_len;
2967 const char *text_str = SvPV_const(text, text_len);
2968
2969 if (max_len > 0 && max_len < text_len)
2970 text_len = max_len;
2971
2972 myencoding_t encoding;
2973
2974 switch (ix) {
2975 case 0:
2976 if (!myencoding_detect(text_str, text_len, &encoding))
2977 encoding = MyENCODING_NOT_DETERMINED;
2978 break;
2979 case 1:
2980 encoding = myencoding_prescan_stream_to_determine_encoding(text_str, text_len);
2981 break;
2982 case 2:
2983 case 21:
2984 case 22:
2985 if (!myencoding_detect_russian(text_str, text_len, &encoding))
2986 encoding = MyENCODING_NOT_DETERMINED;
2987 break;
2988 case 3:
2989 if (!myencoding_detect_unicode(text_str, text_len, &encoding))
2990 encoding = MyENCODING_NOT_DETERMINED;
2991 break;
2992 case 4:
2993 if (!myencoding_detect_bom(text_str, text_len, &encoding))
2994 encoding = MyENCODING_NOT_DETERMINED;
2995 break;
2996 case 5:
2997 if (!myencoding_extracting_character_encoding_from_charset(text_str, text_len, &encoding))
2998 encoding = MyENCODING_NOT_DETERMINED;
2999 break;
3000 }
3001
3002 RETVAL = encoding;
3003 OUTPUT:
3004 RETVAL
3005
3006 void
3007 detectBomAndCut(SV *text, long max_len = 0)
3008 CODE:
3009 text = sv_stringify(text);
3010
3011 STRLEN text_len;
3012 const char *text_str = SvPV_const(text, text_len);
3013
3014 if (max_len > 0 && max_len < text_len)
3015 text_len = max_len;
3016
3017 myencoding_t encoding;
3018
3019 if (!myencoding_detect_and_cut_bom(text_str, text_len, &encoding, &text_str, &text_len))
3020 encoding = MyENCODING_NOT_DETERMINED;
3021
3022 ST(0) = newSViv(encoding);
3023 ST(1) = newSVpv(text_str, text_len);
3024
3025 if (SvUTF8(text))
3026 SvUTF8_on(ST(0));
3027
3028 sv_2mortal(ST(0));
3029 sv_2mortal(ST(1));
3030
3031 XSRETURN(2);
3032
3033 void
3034 detectAuto(SV *text, long max_len = 0, HV *options = NULL)
3035 CODE:
3036 text = sv_stringify(text);
3037
3038 STRLEN text_len;
3039 const char *text_str = SvPV_const(text, text_len);
3040
3041 if (max_len > 0 && max_len < text_len)
3042 text_len = max_len;
3043
3044 html5_dom_options_t opts = {0};
3045 html5_dom_parse_options(&opts, NULL, options);
3046
3047 opts.encoding = MyENCODING_AUTO;
3048 opts.default_encoding = MyENCODING_NOT_DETERMINED;
3049 opts.encoding_prescan_limit = text_len;
3050
3051 myencoding_t encoding = html5_dom_auto_encoding(&opts, &text_str, &text_len);
3052
3053 ST(0) = newSViv(encoding);
3054 ST(1) = newSVpv(text_str, text_len);
3055
3056 if (SvUTF8(text))
3057 SvUTF8_on(ST(0));
3058
3059 sv_2mortal(ST(0));
3060 sv_2mortal(ST(1));
3061
3062 XSRETURN(2);
3063