• Home
  • History
  • Annotate
Name Date Size #Lines LOC

..03-May-2022-

examples/H06-Jul-2021-375283

gen/H06-Jul-2021-294292

lib/HTML5/H06-Jul-2021-4,3463,071

port/openbsd/H06-Jul-2021-11991

scripts/H03-May-2022-139110

t/H06-Jul-2021-1,8661,593

third_party/modest/H06-Jul-2021-153,356119,013

.editorconfigH A D26-Jun-202168 75

.travis.ymlH A D29-Jun-2021560 2825

CHANGESH A D06-Jul-20212.5 KiB4039

DOM.xsH A D29-Jun-202187.8 KiB3,0632,556

LICENSEH A D29-Jun-20211.1 KiB2217

MANIFESTH A D06-Jul-202124.8 KiB560559

MANIFEST.SKIPH A D06-Jul-2021322 2219

META.jsonH A D06-Jul-20211.2 KiB5352

META.ymlH A D06-Jul-2021733 2726

Makefile.PLH A D06-Jul-20211.8 KiB7864

README.podH A D29-Jun-202188.6 KiB3,4892,353

leaks.plH A D29-Jun-2021689 4034

modest_config.hH A D29-Jun-2021159 86

modest_modest.cH A D29-Jun-20211.3 KiB2623

modest_mycore.cH A D29-Jun-2021856 1815

modest_mycss.cH A D29-Jun-20213 KiB5451

modest_myencoding.cH A D29-Jun-2021286 105

modest_myfont.cH A D29-Jun-2021755 1815

modest_myhtml.cH A D29-Jun-20211 KiB2219

modest_myport.cH A D29-Jun-2021872 2318

modest_myurl.cH A D29-Jun-2021561 1411

ppport.hH A D29-Jun-2021195.4 KiB7,9603,604

typemapH A D29-Jun-2021462 1916

utils.cH A D29-Jun-202118.1 KiB650486

utils.hH A D29-Jun-20216.2 KiB200156

README.pod

1=encoding utf8
2
3=head1 NAME
4
5L<HTML5::DOM|https://metacpan.org/pod/HTML5::DOM> - Super fast html5 DOM library with css selectors (based on Modest/MyHTML)
6
7=for html <a href="https://travis-ci.org/Azq2/perl-html5-dom"><img src="https://travis-ci.org/Azq2/perl-html5-dom.svg?branch=master"></a>
8<a href="https://metacpan.org/pod/HTML5::DOM"><img src="https://img.shields.io/cpan/v/HTML5-DOM.svg"></a>
9
10=head1 SYNOPSIS
11
12 use warnings;
13 use strict;
14 use HTML5::DOM;
15
16 # create parser object
17 my $parser = HTML5::DOM->new;
18
19 # parse some html
20 my $tree = $parser->parse('
21  <label>Some list of OS:</label>
22  <ul class="list" data-what="os" title="OS list">
23     <li>UNIX</li>
24     <li>Linux</li>
25     <!-- comment -->
26     <li>OSX</li>
27     <li>Windows</li>
28     <li>FreeBSD</li>
29  </ul>
30 ');
31
32 # find one element by CSS selector
33 my $ul = $tree->at('ul.list');
34
35 # prints tag
36 print $ul->tag."\n"; # ul
37
38 # check if <ul> has class list
39 print "<ul> has class .list\n" if ($ul->classList->has('list'));
40
41 # add some class
42 $ul->classList->add('os-list');
43
44 # prints <ul> classes
45 print $ul->className."\n"; # list os-list
46
47 # prints <ul> attribute title
48 print $ul->attr("title")."\n"; # OS list
49
50 # changing <ul> attribute title
51 $ul->attr("title", "OS names list");
52
53 # find all os names
54 $ul->find('li')->each(sub {
55  my ($node, $index) = @_;
56  print "OS #$index: ".$node->text."\n";
57 });
58
59 # we can use precompiled selectors
60 my $css_parser = HTML5::DOM::CSS->new;
61 my $selector = $css_parser->parseSelector('li');
62
63 # remove OSX from OS
64 $ul->find($selector)->[2]->remove();
65
66 # serialize tree
67 print $tree->html."\n";
68
69 # TODO: more examples in SYNOPSIS
70 # But you can explore API documentation.
71 # My lib have simple API, which is intuitively familiar to anyone who used the DOM.
72
73=head1 DESCRIPTION
74
75L<HTML5::DOM|https://metacpan.org/pod/HTML5::DOM> is a fast HTML5 parser and DOM manipulatin library with CSS4 selectors, fully conformant with the HTML5 specification.
76
77It based on  L<https://github.com/lexborisov/Modest> as selector engine and L<https://github.com/lexborisov/myhtml> as HTML5 parser.
78
79=head3 Key features
80
81=over
82
83=item *
84
85Really fast HTML parsing.
86
87=item *
88
89Supports parsing by chunks.
90
91=item *
92
93Fully conformant with the HTML5 specification.
94
95=item *
96
97Fast CSS4 selectors.
98
99=item *
100
101Any manipulations using DOM-like API.
102
103=item *
104
105Auto-detect input encoding.
106
107=item *
108
109Fully integration in perl and memory management. You don't care about "free" or "destroy".
110
111=item *
112
113Supports async parsing, with optional event-loop intergration.
114
115=item *
116
117Perl utf8-enabled strings supports (See L<"WORK WITH UTF8"> for details.)
118
119=back
120
121=head1 HTML5::DOM
122
123HTML5 parser object.
124
125=head2 new
126
127 use warnings;
128 use strict;
129 use HTML5::DOM;
130
131 my $parser;
132
133 # with default options
134 $parser = HTML5::DOM->new;
135
136 # or override some options, if you need
137 $parser = HTML5::DOM->new({
138    threads                 => 0,
139    ignore_whitespace       => 0,
140    ignore_doctype          => 0,
141    scripts                 => 0,
142    encoding                => "auto",
143    default_encoding        => "UTF-8",
144    encoding_use_meta       => 1,
145    encoding_use_bom        => 1,
146    encoding_prescan_limit  => 1024
147 });
148
149Creates new parser object with options. See L<"PARSER OPTIONS"> for details.
150
151=head3 parse
152
153 use warnings;
154 use strict;
155 use HTML5::DOM;
156
157 my $parser = HTML5::DOM->new;
158
159 my $html = '<div>Hello world!</div>';
160
161 my $tree;
162
163 # parsing with options defined in HTML5::DOM->new
164 $tree = $parser->parse($html);
165
166 # parsing with custom options (extends options defined in HTML5::DOM->new)
167 $tree = $parser->parse($html, {
168     scripts     => 0,
169 });
170
171Parse html string and return L<HTML5::DOM::Tree|/"HTML5::DOM::Tree"> object.
172
173=head3 parseChunkStart
174
175 use warnings;
176 use strict;
177 use HTML5::DOM;
178
179 my $parser = HTML5::DOM->new;
180
181 # start chunked parsing with options defined in HTML5::DOM->new
182 # call parseChunkStart without options is useless,
183 # because first call of parseChunk automatically call parseChunkStart.
184 $parser->parseChunkStart();
185
186 # start chunked parsing with custom options (extends options defined in HTML5::DOM->new)
187 $parser->parseChunkStart({
188    scripts     => 0,
189 });
190
191Init chunked parsing. See L<"PARSER OPTIONS"> for details.
192
193=head3 parseChunk
194
195 use warnings;
196 use strict;
197 use HTML5::DOM;
198
199 my $parser = HTML5::DOM->new;
200
201 $parser->parseChunkStart()->parseChunk('<')->parseChunk('di')->parseChunk('v>');
202
203Parse chunk of html stream.
204
205=head3 parseChunkTree
206
207 use warnings;
208 use strict;
209 use HTML5::DOM;
210
211 my $parser = HTML5::DOM->new;
212
213 # start some chunked parsing
214 $parser->parseChunk('<')->parseChunk('di')->parseChunk('v>');
215
216 # get current tree
217 my $tree = $parser->parseChunkTree;
218
219 print $tree->html."\n"; # <html><head></head><body><div></div></body></html>
220
221 # more parse html
222 $parser->parseChunk('<div class="red">red div?</div>');
223
224 print $tree->html."\n"; # <html><head></head><body><div><div class="red">red div?</div></div></body></html>
225
226 # end parsing
227 $parser->parseChunkEnd();
228
229 print $tree->html."\n"; # <html><head></head><body><div><div class="red">red div?</div></div></body></html>
230
231Return current L<HTML5::DOM::Tree|/"HTML5::DOM::Tree"> object (live result of all calls parseChunk).
232
233=head3 parseChunkEnd
234
235 use warnings;
236 use strict;
237 use HTML5::DOM;
238
239 my $parser = HTML5::DOM->new;
240
241 # start some chunked parsing
242 $parser->parseChunk('<')->parseChunk('di')->parseChunk('v>');
243
244 # end parsing and get tree
245 my $tree = $parser->parseChunkEnd();
246
247 print $tree->html; # <html><head></head><body><div></div></body></html>
248
249Completes chunked parsing and return L<HTML5::DOM::Tree|/"HTML5::DOM::Tree"> object.
250
251=head3 parseAsync
252
253Parsing html in background thread. Can use with different ways:
254
2551. Manual wait parsing completion when you need.
256
257 use warnings;
258 use strict;
259 use HTML5::DOM;
260
261 my $parser = HTML5::DOM->new;
262
263 my $html = '<div>Hello world!</div>';
264
265 my $async;
266
267 # start async parsing
268 $async = $parser->parseAsync($html);
269
270 # or with options
271 $async = $parser->parseAsync($html, { scripts => 0 });
272
273 # ...do some work...
274
275 # wait for parsing done
276 my $tree = $async->wait;
277
278 # work with tree
279 print $tree->html;
280
281C<$async-E<gt>wait> returns L<HTML5::DOM::AsyncResult|/"HTML5::DOM::AsyncResult"> object.
282
2832. Non-blocking check for parsing completion.
284
285 use warnings;
286 use strict;
287 use HTML5::DOM;
288
289 my $parser = HTML5::DOM->new;
290
291 my $html = '<div>Hello world!</div>';
292
293 my $tree;
294 my $async;
295
296 # start async parsing
297 $async = $parser->parseAsync($html);
298
299 # or with options
300 $async = $parser->parseAsync($html, { scripts => 0 });
301
302 while (!$async->parsed) {
303     # do some work
304 }
305 $tree = $async->tree; # HTML5::DOM::Tree
306 # work with $tree
307 print $tree->root->at('div')->text."\n"; # Hello world!
308
309 # or another way
310
311 # start async parsing
312 $async = $parser->parseAsync($html);
313
314 # or with options
315 $async = $parser->parseAsync($html, { scripts => 0 });
316
317 while (!($tree = $async->tree)) {
318     # do some work
319 }
320 # work with $tree
321 print $tree->root->at('div')->text."\n"; # Hello world!
322
323C<$async-E<gt>parsed> returns C<1> if parsing done. Else returns C<0>.
324
325C<$async-E<gt>tree> returns L<HTML5::DOM::Tree|/"HTML5::DOM::Tree"> object if parsing done. Else returns C<undef>.
326
3273. Intergation with L<EV|https://metacpan.org/pod/EV>
328
329Required packages (only if you want use event loop):
330
331=over
332
333=item *
334
335L<EV|https://metacpan.org/pod/EV>
336
337=item *
338
339L<AnyEvent::Util|https://metacpan.org/pod/AnyEvent::Util>
340
341=back
342
343 use warnings;
344 use strict;
345 use EV;
346 use HTML5::DOM;
347
348 my $parser = HTML5::DOM->new;
349 my $html = '<div>Hello world!</div>';
350
351 my $custom_options = { scripts => 0 };
352
353 $parser->parseAsync($html, $custom_options, sub {
354     my $tree = shift;
355     # work with $tree
356     print $tree->root->at('div')->text."\n"; # Hello world!
357 });
358
359 # do some work
360
361 EV::loop;
362
363Function returns L<HTML5::DOM::AsyncResult|/"HTML5::DOM::AsyncResult"> object.
364
365C<$tree> in callback is a L<HTML5::DOM::Tree|/"HTML5::DOM::Tree"> object.
366
3674. Intergation with custom event-loop (example with AnyEvent loop)
368
369 use warnings;
370 use strict;
371 use AnyEvent;
372 use AnyEvent::Util;
373 use HTML5::DOM;
374
375 my $parser = HTML5::DOM->new;
376 my $html = '<div>Hello world!</div>';
377
378 my $custom_options = { scripts => 0 };
379
380 # create pipe
381 my ($r, $w) = AnyEvent::Util::portable_pipe();
382 AnyEvent::fh_unblock $r;
383
384 # fd for parseAsync communications
385 my $write_fd = fileno($w);
386
387 # after parsing complete module writes to $write_fd:
388 # value "1" - if success
389 # value "0" - if error
390 my $async = $parser->parseAsync($html, $custom_options, $write_fd);
391
392 # watch for value
393 my $async_watcher;
394 $async_watcher = AE::io $r, 0, sub {
395     <$r>; # read "1" or "0"
396     $async_watcher = undef; # destroy watcher
397
398     # work with $tree
399     my $tree = $async->wait;
400     print $tree->root->at('div')->text."\n"; # Hello world!
401 };
402
403 # ...do some work...
404
405 AE::cv->recv;
406
407C<$tree> in callback is a L<HTML5::DOM::Tree|/"HTML5::DOM::Tree"> object.
408
409
410=head1 HTML5::DOM::Tree
411
412DOM tree object.
413
414=head3 createElement
415
416 # create new node with tag "div"
417 my $node = $tree->createElement("div");
418
419 # create new node with tag "g" with namespace "svg"
420 my $node = $tree->createElement("div", "svg");
421
422Create new L<HTML5::DOM::Element|/"HTML5::DOM::Element"> with specified tag and namespace.
423
424=head3 createComment
425
426 # create new comment
427 my $node = $tree->createComment(" ololo ");
428
429 print $node->html; # <!-- ololo -->
430
431Create new L<HTML5::DOM::Comment|/"HTML5::DOM::Comment"> with specified value.
432
433=head3 createTextNode
434
435 # create new text node
436 my $node = $tree->createTextNode("psh psh ololo i am driver of ufo >>>");
437
438 print $node->html; # psh psh ololo i am driver of ufo &gt;&gt;&gt;
439
440Create new L<HTML5::DOM::Text|/"HTML5::DOM::Text"> with specified value.
441
442=head3 parseFragment
443
444 my $fragment = $tree->parseFragment($html);
445 my $fragment = $tree->parseFragment($html, $context);
446 my $fragment = $tree->parseFragment($html, $context, $context_ns);
447 my $fragment = $tree->parseFragment($html, $context, $context_ns, $options);
448
449Parse fragment html and create new L<HTML5::DOM::Fragment|/"HTML5::DOM::Fragment">.
450For more details about fragments: L<https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments>
451
452=over
453
454=item *
455
456C<$html> - html fragment string
457
458=item *
459
460C<$context> - context tag name, default C<div>
461
462=item *
463
464C<$context_ns> - context tag namespace, default C<html>
465
466=item *
467
468C<$options> - parser options
469
470See L<"PARSER OPTIONS"> for details.
471
472=back
473
474 # simple create new fragment
475 my $node = $tree->parseFragment("some <b>bold</b> and <i>italic</i> text");
476
477 # create new fragment node with custom context tag/namespace and options
478 my $node = $tree->parseFragment("some <b>bold</b> and <i>italic</i> text", "div", "html", {
479    # some options override
480    encoding => "windows-1251"
481 });
482
483 print $node->html; # some <b>bold</b> and <i>italic</i> text
484
485=head3 document
486
487 my $node = $tree->document;
488
489Return L<HTML5::DOM::Document|/"HTML5::DOM::Document"> node of current tree;
490
491=head3 root
492
493 my $node = $tree->root;
494
495Return root node of current tree. (always <html>)
496
497=head3 head
498
499 my $node = $tree->head;
500
501Return <head> node of current tree.
502
503=head3 body
504
505 my $node = $tree->body;
506
507Return <body> node of current tree.
508
509=head3 at
510
511=head3 querySelector
512
513 my $node = $tree->at($selector);
514 my $node = $tree->querySelector($selector); # alias
515
516Find one element node in tree using L<CSS Selectors Level 4|https://www.w3.org/TR/selectors-4/>
517
518Return node, or C<undef> if not find.
519
520=over
521
522=item *
523
524C<$selector> - selector query as plain text or precompiled as L<HTML5::DOM::CSS::Selector|/HTML5::DOM::CSS::Selector> or
525L<HTML5::DOM::CSS::Selector|/HTML5::DOM::CSS::Selector::Entry>.
526
527=back
528
529 my $tree = HTML5::DOM->new->parse('<div class="red">red</div><div class="blue">blue</div>')
530 my $node = $tree->at('body > div.red');
531 print $node->html; # <div class="red">red</div>
532
533=head3 find
534
535=head3 querySelectorAll
536
537 my $collection = $tree->find($selector);
538 my $collection = $tree->querySelectorAll($selector); # alias
539
540Find all element nodes in tree using L<CSS Selectors Level 4|https://www.w3.org/TR/selectors-4/>
541
542Return L<HTML5::DOM::Collection|/"HTML5::DOM::Collection">.
543
544=over
545
546=item *
547
548C<$selector> - selector query as plain text or precompiled as L<HTML5::DOM::CSS::Selector|/HTML5::DOM::CSS::Selector> or
549L<HTML5::DOM::CSS::Selector|/HTML5::DOM::CSS::Selector::Entry>.
550
551=back
552
553 my $tree = HTML5::DOM->new->parse('<div class="red">red</div><div class="blue">blue</div>')
554 my $collection = $tree->at('body > div.red, body > div.blue');
555 print $collection->[0]->html; # <div class="red">red</div>
556 print $collection->[1]->html; # <div class="red">blue</div>
557
558=head3 findId
559
560=head3 getElementById
561
562 my $collection = $tree->findId($tag);
563 my $collection = $tree->getElementById($tag); # alias
564
565Find element node with specified id.
566
567Return L<HTML5::DOM::Node|/"HTML5::DOM::Node"> or C<undef>.
568
569 my $tree = HTML5::DOM->new->parse('<div class="red">red</div><div class="blue" id="test">blue</div>')
570 my $node = $tree->findId('test');
571 print $node->html; # <div class="blue" id="test">blue</div>
572
573=head3 findTag
574
575=head3 getElementsByTagName
576
577 my $collection = $tree->findTag($tag);
578 my $collection = $tree->getElementsByTagName($tag); # alias
579
580Find all element nodes in tree with specified tag name.
581
582Return L<HTML5::DOM::Collection|/"HTML5::DOM::Collection">.
583
584 my $tree = HTML5::DOM->new->parse('<div class="red">red</div><div class="blue">blue</div>')
585 my $collection = $tree->findTag('div');
586 print $collection->[0]->html; # <div class="red">red</div>
587 print $collection->[1]->html; # <div class="red">blue</div>
588
589=head3 findClass
590
591=head3 getElementsByClassName
592
593 my $collection = $tree->findClass($class);
594 my $collection = $tree->getElementsByClassName($class); # alias
595
596Find all element nodes in tree with specified class name.
597This is more fast equivalent to [class~="value"] selector.
598
599Return L<HTML5::DOM::Collection|/"HTML5::DOM::Collection">.
600
601 my $tree = HTML5::DOM->new
602    ->parse('<div class="red color">red</div><div class="blue color">blue</div>');
603 my $collection = $tree->findClass('color');
604 print $collection->[0]->html; # <div class="red color">red</div>
605 print $collection->[1]->html; # <div class="red color">blue</div>
606
607=head3 findAttr
608
609=head3 getElementByAttribute
610
611 # Find all elements with attribute
612 my $collection = $tree->findAttr($attribute);
613 my $collection = $tree->getElementByAttribute($attribute); # alias
614
615 # Find all elements with attribute and mathcing value
616 my $collection = $tree->findAttr($attribute, $value, $case = 0, $cmp = '=');
617 my $collection = $tree->getElementByAttribute($attribute, $value, $case = 0, $cmp = '='); # alias
618
619Find all element nodes in tree with specified attribute and optional matching value.
620
621Return L<HTML5::DOM::Collection|/"HTML5::DOM::Collection">.
622
623 my $tree = HTML5::DOM->new
624    ->parse('<div class="red color">red</div><div class="blue color">blue</div>');
625 my $collection = $tree->findAttr('class', 'CoLoR', 1, '~');
626 print $collection->[0]->html; # <div class="red color">red</div>
627 print $collection->[1]->html; # <div class="red color">blue</div>
628
629CSS selector analogs:
630
631 # [$attribute=$value]
632 my $collection = $tree->findAttr($attribute, $value, 0, '=');
633
634 # [$attribute=$value i]
635 my $collection = $tree->findAttr($attribute, $value, 1, '=');
636
637 # [$attribute~=$value]
638 my $collection = $tree->findAttr($attribute, $value, 0, '~');
639
640 # [$attribute|=$value]
641 my $collection = $tree->findAttr($attribute, $value, 0, '|');
642
643 # [$attribute*=$value]
644 my $collection = $tree->findAttr($attribute, $value, 0, '*');
645
646 # [$attribute^=$value]
647 my $collection = $tree->findAttr($attribute, $value, 0, '^');
648
649 # [$attribute$=$value]
650 my $collection = $tree->findAttr($attribute, $value, 0, '$');
651
652=head3 encoding
653
654=head3 encodingId
655
656 print "encoding: ".$tree->encoding."\n"; # UTF-8
657 print "encodingId: ".$tree->encodingId."\n"; # 0
658
659Return current tree encoding. See L<"ENCODINGS"> for details.
660
661=head3 tag2id
662
663 print "tag id: ".HTML5::DOM->TAG_A."\n"; # tag id: 4
664 print "tag id: ".$tree->tag2id("a")."\n"; # tag id: 4
665
666Convert tag name to id. Return 0 (HTML5::DOM->TAG__UNDEF), if tag not exists in tree.
667See L<"TAGS"> for tag constants list.
668
669=head3 id2tag
670
671 print "tag name: ".$tree->id2tag(4)."\n"; # tag name: a
672 print "tag name: ".$tree->id2tag(HTML5::DOM->TAG_A)."\n"; # tag name: a
673
674Convert tag id to name. Return C<undef>, if tag id not exists in tree.
675See L<"TAGS"> for tag constants list.
676
677=head3 namespace2id
678
679 print "ns id: ".HTML5::DOM->NS_HTML."\n"; # ns id: 1
680 print "ns id: ".$tree->namespace2id("html")."\n"; # ns id: 1
681
682Convert namespace name to id. Return 0 (HTML5::DOM->NS_UNDEF), if namespace not exists in tree.
683See L<"NAMESPACES"> for namespace constants list.
684
685=head3 id2namespace
686
687 print "ns name: ".$tree->id2namespace(1)."\n"; # ns name: html
688 print "ns name: ".$tree->id2namespace(HTML5::DOM->NS_HTML)."\n"; # ns name: html
689
690Convert namespace id to name. Return C<undef>, if namespace id not exists.
691See L<"NAMESPACES"> for namespace constants list.
692
693=head3 parser
694
695 my $parser = $tree->parser;
696
697Return parent L<HTML5::DOM|/"HTML5::DOM">.
698
699=head3 utf8
700
701As getter - get C<1> if all methods returns all strings with utf8 flag.
702
703Example with utf8:
704
705 use warnings;
706 use strict;
707 use HTML5::DOM;
708 use utf8;
709
710 my $tree = HTML5::DOM->new->parse("<b>тест</b>");
711 my $is_utf8_enabled = $tree->utf8;
712 print "is_utf8_enabled=".($tree ? "true" : "false")."\n"; # true
713
714Or example with bytes:
715
716 use warnings;
717 use strict;
718 use HTML5::DOM;
719
720 my $tree = HTML5::DOM->new->parse("<b>тест</b>");
721 my $is_utf8_enabled = $tree->utf8;
722 print "is_utf8_enabled=".($tree ? "true" : "false")."\n"; # false
723
724As setter - enable or disable utf8 flag on all returned strings.
725
726 use warnings;
727 use strict;
728 use HTML5::DOM;
729 use utf8;
730
731 my $tree = HTML5::DOM->new->parse("<b>тест</b>");
732
733 print "is_utf8_enabled=".($tree->utf8 ? "true" : "false")."\n"; # true
734 print length($tree->at('b')->text)." chars\n"; # 4 chars
735
736 $selector->utf8(0);
737
738 print "is_utf8_enabled=".($tree->utf8 ? "true" : "false")."\n"; # false
739 print length($tree->at('b')->text)." bytes\n"; # 8 bytes
740
741
742=head1 HTML5::DOM::Node
743
744DOM node object.
745
746=head3 tag
747
748=head3 nodeName
749
750 my $tag_name = $node->tag;
751 my $tag_name = $node->nodeName; # uppercase
752 my $tag_name = $node->tagName;  # uppercase
753
754Return node tag name (eg. div or span)
755
756 $node->tag($tag);
757 $node->nodeName($tag); # alias
758 $node->tagName($tag);  # alias
759
760Set new node tag name. Allow only for L<HTML5::DOM::Element|/"HTML5::DOM::Element"> nodes.
761
762 print $node->html; # <div></div>
763 $node->tag('span');
764 print $node->html; # <span></span>
765 print $node->tag; # span
766 print $node->tag; # SPAN
767
768=head3 tagId
769
770 my $tag_id = $node->tagId;
771
772Return node tag id. See L<"TAGS"> for tag constants list.
773
774 $node->tagId($tag_id);
775
776Set new node tag id. Allow only for L<HTML5::DOM::Element|/"HTML5::DOM::Element"> nodes.
777
778 print $node->html; # <div></div>
779 $node->tagId(HTML5::DOM->TAG_SPAN);
780 print $node->html; # <span></span>
781 print $node->tagId; # 117
782
783=head3 namespace
784
785 my $tag_ns = $node->namespace;
786
787Return node namespace (eg. html or svg)
788
789 $node->namespace($namespace);
790
791Set new node namespace name. Allow only for L<HTML5::DOM::Element|/"HTML5::DOM::Element"> nodes.
792
793 print $node->namespace; # html
794 $node->namespace('svg');
795 print $node->namespace; # svg
796
797=head3 namespaceId
798
799 my $tag_ns_id = $node->namespaceId;
800
801Return node namespace id. See L<"NAMESPACES"> for tag constants list.
802
803 $node->namespaceId($tag_id);
804
805Set new node namespace by id. Allow only for L<HTML5::DOM::Element|/"HTML5::DOM::Element"> nodes.
806
807 print $node->namespace; # html
808 $node->namespaceId(HTML5::DOM->NS_SVG);
809 print $node->namespaceId; # 3
810 print $node->namespace; # svg
811
812=head3 tree
813
814 my $tree = $node->tree;
815
816Return parent L<HTML5::DOM::Tree|/"HTML5::DOM::Tree">.
817
818=head3 nodeType
819
820 my $type = $node->nodeType;
821
822Return node type. All types:
823
824 HTML5::DOM->ELEMENT_NODE                   => 1,
825 HTML5::DOM->ATTRIBUTE_NODE                 => 2,   # not supported
826 HTML5::DOM->TEXT_NODE                      => 3,
827 HTML5::DOM->CDATA_SECTION_NODE             => 4,   # not supported
828 HTML5::DOM->ENTITY_REFERENCE_NODE          => 5,   # not supported
829 HTML5::DOM->ENTITY_NODE                    => 6,   # not supported
830 HTML5::DOM->PROCESSING_INSTRUCTION_NODE    => 7,   # not supported
831 HTML5::DOM->COMMENT_NODE                   => 8,
832 HTML5::DOM->DOCUMENT_NODE                  => 9,
833 HTML5::DOM->DOCUMENT_TYPE_NODE             => 10,
834 HTML5::DOM->DOCUMENT_FRAGMENT_NODE         => 11,
835 HTML5::DOM->NOTATION_NODE                  => 12   # not supported
836
837Compatible with: L<https://developer.mozilla.org/ru/docs/Web/API/Node/nodeType>
838
839=head3 next
840
841=head3 nextElementSibling
842
843 my $node2 = $node->next;
844 my $node2 = $node->nextElementSibling; # alias
845
846Return next sibling element node
847
848 my $tree = HTML5::DOM->new->parse('
849    <ul>
850        <li>Linux</li>
851        <!-- comment -->
852        <li>OSX</li>
853        <li>Windows</li>
854    </ul>
855 ');
856 my $li = $tree->at('ul li');
857 print $li->text;               # Linux
858 print $li->next->text;         # OSX
859 print $li->next->next->text;   # Windows
860
861=head3 prev
862
863=head3 previousElementSibling
864
865 my $node2 = $node->prev;
866 my $node2 = $node->previousElementSibling; # alias
867
868Return previous sibling element node
869
870 my $tree = HTML5::DOM->new->parse('
871    <ul>
872        <li>Linux</li>
873        <!-- comment -->
874        <li>OSX</li>
875        <li class="win">Windows</li>
876    </ul>
877 ');
878 my $li = $tree->at('ul li.win');
879 print $li->text;               # Windows
880 print $li->prev->text;         # OSX
881 print $li->prev->prev->text;   # Linux
882
883=head3 nextNode
884
885=head3 nextSibling
886
887 my $node2 = $node->nextNode;
888 my $node2 = $node->nextSibling; # alias
889
890Return next sibling node
891
892 my $tree = HTML5::DOM->new->parse('
893    <ul>
894        <li>Linux</li>
895        <!-- comment -->
896        <li>OSX</li>
897        <li>Windows</li>
898    </ul>
899 ');
900 my $li = $tree->at('ul li');
901 print $li->text;                       # Linux
902 print $li->nextNode->text;             # <!-- comment -->
903 print $li->nextNode->nextNode->text;   # OSX
904
905=head3 prevNode
906
907=head3 previousSibling
908
909 my $node2 = $node->prevNode;
910 my $node2 = $node->previousSibling; # alias
911
912Return previous sibling node
913
914 my $tree = HTML5::DOM->new->parse('
915    <ul>
916        <li>Linux</li>
917        <!-- comment -->
918        <li>OSX</li>
919        <li class="win">Windows</li>
920    </ul>
921 ');
922 my $li = $tree->at('ul li.win');
923 print $li->text;                       # Windows
924 print $li->prevNode->text;             # OSX
925 print $li->prevNode->prevNode->text;   # <!-- comment -->
926
927=head3 first
928
929=head3 firstElementChild
930
931 my $node2 = $node->first;
932 my $node2 = $node->firstElementChild; # alias
933
934Return first children element
935
936 my $tree = HTML5::DOM->new->parse('
937    <ul>
938        <!-- comment -->
939        <li>Linux</li>
940        <li>OSX</li>
941        <li class="win">Windows</li>
942    </ul>
943 ');
944 my $ul = $tree->at('ul');
945 print $ul->first->text; # Linux
946
947=head3 last
948
949=head3 lastElementChild
950
951 my $node2 = $node->last;
952 my $node2 = $node->lastElementChild; # alias
953
954Return last children element
955
956 my $tree = HTML5::DOM->new->parse('
957    <ul>
958        <li>Linux</li>
959        <li>OSX</li>
960        <li class="win">Windows</li>
961        <!-- comment -->
962    </ul>
963 ');
964 my $ul = $tree->at('ul');
965 print $ul->last->text; # Windows
966
967=head3 firstNode
968
969=head3 firstChild
970
971 my $node2 = $node->firstNode;
972 my $node2 = $node->firstChild; # alias
973
974Return first children node
975
976 my $tree = HTML5::DOM->new->parse('
977    <ul>
978        <!-- comment -->
979        <li>Linux</li>
980        <li>OSX</li>
981        <li class="win">Windows</li>
982    </ul>
983 ');
984 my $ul = $tree->at('ul');
985 print $ul->firstNode->html; # <!-- comment -->
986
987=head3 lastNode
988
989=head3 lastChild
990
991 my $node2 = $node->lastNode;
992 my $node2 = $node->lastChild; # alias
993
994Return last children node
995
996 my $tree = HTML5::DOM->new->parse('
997    <ul>
998        <li>Linux</li>
999        <li>OSX</li>
1000        <li class="win">Windows</li>
1001        <!-- comment -->
1002    </ul>
1003 ');
1004 my $ul = $tree->at('ul');
1005 print $ul->lastNode->html; # <!-- comment -->
1006
1007=head3 html
1008
1009Universal html serialization and fragment parsing acessor, for single human-friendly api.
1010
1011 my $html = $node->html();
1012 my $node = $node->html($new_html);
1013
1014=over
1015
1016=item *
1017
1018As getter this similar to L<outerText|/outerText>
1019
1020=item *
1021
1022As setter this similar to L<innerText|/innerText>
1023
1024=item *
1025
1026As setter for non-element nodes this similar to L<nodeValue|/nodeValue>
1027
1028=back
1029
1030 my $tree = HTML5::DOM->new->parse('<div id="test">some   text <b>bold</b></div>');
1031
1032 # get text content for element
1033 my $node = $tree->at('#test');
1034 print $node->html;                     # <div id="test">some   text <b>bold</b></div>
1035 $comment->html('<b>new</b>');
1036 print $comment->html;                  # <div id="test"><b>new</b></div>
1037
1038 my $comment = $tree->createComment(" comment text ");
1039 print $comment->html;                  # <!-- comment text -->
1040 $comment->html(' new comment text ');
1041 print $comment->html;                  # <!-- new comment text -->
1042
1043 my $text_node = $tree->createTextNode("plain text >");
1044 print $text_node->html;                # plain text &gt;
1045 $text_node->html('new>plain>text');
1046 print $text_node->html;                # new&gt;plain&gt;text
1047
1048=head3 innerHTML
1049
1050=head3 outerHTML
1051
1052=over
1053
1054=item *
1055
1056HTML serialization of the node's descendants.
1057
1058 my $html = $node->html;
1059 my $html = $node->outerHTML;
1060
1061Example:
1062
1063 my $tree = HTML5::DOM->new->parse('<div id="test">some <b>bold</b> test</div>');
1064 print $tree->getElementById('test')->outerHTML;   # <div id="test">some <b>bold</b> test</div>
1065 print $tree->createComment(' test ')->outerHTML;  # <!-- test -->
1066 print $tree->createTextNode('test')->outerHTML;   # test
1067
1068=item *
1069
1070HTML serialization of the node and its descendants.
1071
1072 # serialize descendants, without node
1073 my $html = $node->innerHTML;
1074
1075Example:
1076
1077 my $tree = HTML5::DOM->new->parse('<div id="test">some <b>bold</b> test</div>');
1078 print $tree->getElementById('test')->innerHTML;   # some <b>bold</b> test
1079 print $tree->createComment(' test ')->innerHTML;  # <!-- test -->
1080 print $tree->createTextNode('test')->innerHTML;   # test
1081
1082=item *
1083
1084Removes all of the element's descendants and replaces them with nodes constructed by parsing the HTML given in the string B<$new_html>.
1085
1086 # parse fragment and replace child nodes with it
1087 my $html = $node->html($new_html);
1088 my $html = $node->innerHTML($new_html);
1089
1090Example:
1091
1092 my $tree = HTML5::DOM->new->parse('<div id="test">some <b>bold</b> test</div>');
1093 print $tree->at('#test')->innerHTML('<i>italic</i>');
1094 print $tree->body->innerHTML;  # <div id="test"><i>italic</i></div>
1095
1096=item *
1097
1098HTML serialization of entire document
1099
1100  my $html = $tree->document->html;
1101  my $html = $tree->document->outerHTML;
1102
1103Example:
1104
1105  my $tree = HTML5::DOM->new->parse('<!DOCTYPE html><div id="test">some <b>bold</b> test</div>');
1106  print $tree->document->outerHTML;   # <!DOCTYPE html><html><head></head><body><div id="test">some <b>bold</b> test</div></body></html>
1107
1108=item *
1109
1110Replaces the element and all of its descendants with a new DOM tree constructed by parsing the specified B<$new_html>.
1111
1112 # parse fragment and node in parent node childs with it
1113 my $html = $node->outerHTML($new_html);
1114
1115Example:
1116
1117 my $tree = HTML5::DOM->new->parse('<div id="test">some <b>bold</b> test</div>');
1118 print $tree->at('#test')->outerHTML('<i>italic</i>');
1119 print $tree->body->innerHTML;  # <i>italic</i>
1120
1121=back
1122
1123See, for more info:
1124
1125L<https://developer.mozilla.org/en-US/docs/Web/API/Element/innerHTML>
1126
1127L<https://developer.mozilla.org/en-US/docs/Web/API/Element/outerHTML>
1128
1129=head3 text
1130
1131Universal text acessor, for single human-friendly api.
1132
1133 my $text = $node->text();
1134 my $node = $node->text($new_text);
1135
1136=over
1137
1138=item *
1139
1140For L<HTML5::DOM::Text|/"HTML5::DOM::Text"> is similar to L<nodeValue|/nodeValue> (as setter/getter)
1141
1142=item *
1143
1144For L<HTML5::DOM::Comment|/"HTML5::DOM::Comment"> is similar to L<nodeValue|/nodeValue> (as setter/getter)
1145
1146=item *
1147
1148For L<HTML5::DOM::DocType|/"HTML5::DOM::DocType"> is similar to L<nodeValue|/nodeValue> (as setter/getter)
1149
1150=item *
1151
1152For L<HTML5::DOM::Element|/"HTML5::DOM::Element"> is similar to L<textContent|/textContent> (as setter/getter)
1153
1154=back
1155
1156 my $tree = HTML5::DOM->new->parse('<div id="test">some   text <b>bold</b></div>');
1157
1158 # get text content for element
1159 my $node = $tree->at('#test');
1160 print $node->text;                     # some   text bold
1161 $comment->text('<new node content>');
1162 print $comment->html;                  # &lt;new node conten&gt;
1163
1164 my $comment = $tree->createComment("comment text");
1165 print $comment->text;                  # comment text
1166 $comment->text(' new comment text ');
1167 print $comment->html;                  # <!-- new comment text -->
1168
1169 my $text_node = $tree->createTextNode("plain text");
1170 print $text_node->text;                # plain text
1171 $text_node->text('new>plain>text');
1172 print $text_node->html;                # new&gt;plain&gt;text
1173
1174=head3 innerText
1175
1176=head3 outerText
1177
1178=head3 textContent
1179
1180=over
1181
1182=item *
1183
1184Represents the "rendered" text content of a node and its descendants.
1185Using default CSS "display" property for tags based on Firefox user-agent style.
1186
1187Only works for elements, for other nodes return C<undef>.
1188
1189 my $text = $node->innerText;
1190 my $text = $node->outerText; # alias
1191
1192Example:
1193
1194 my $tree = HTML5::DOM->new->parse('
1195    <div id="test">
1196        some
1197        <b>      bold     </b>
1198        test
1199        <script>alert()</script>
1200    </div>
1201 ');
1202 print $tree->body->innerText; # some bold test
1203
1204See, for more info: L<https://html.spec.whatwg.org/multipage/dom.html#the-innertext-idl-attribute>
1205
1206=item *
1207
1208Removes all of its children and replaces them with a text nodes and <br> with the given value.
1209Only works for elements, for other nodes throws exception.
1210
1211=over
1212
1213=item *
1214
1215All new line chars (\r\n, \r, \n) replaces to <br />
1216
1217=item *
1218
1219All other text content replaces to text nodes
1220
1221=back
1222
1223 my $node = $node->innerText($text);
1224
1225Example:
1226
1227 my $tree = HTML5::DOM->new->parse('<div id="test">some text <b>bold</b></div>');
1228 $tree->at('#test')->innerText("some\nnew\ntext >");
1229 print $tree->at('#test')->html;    # <div id="test">some<br />new<br />text &gt;</div>
1230
1231See, for more info: L<https://html.spec.whatwg.org/multipage/dom.html#the-innertext-idl-attribute>
1232
1233=item *
1234
1235Removes the current node and replaces it with the given text.
1236Only works for elements, for other nodes throws exception.
1237
1238=over
1239
1240=item *
1241
1242All new line chars (\r\n, \r, \n) replaces to <br />
1243
1244=item *
1245
1246All other text content replaces to text nodes
1247
1248=item *
1249
1250Similar to innerText($text), but removes current node
1251
1252=back
1253
1254 my $node = $node->outerText($text);
1255
1256Example:
1257
1258 my $tree = HTML5::DOM->new->parse('<div id="test">some text <b>bold</b></div>');
1259 $tree->at('#test')->outerText("some\nnew\ntext >");
1260 print $tree->body->html;   # <body>some<br />new<br />text &gt;</body>
1261
1262See, for more info: L<https://developer.mozilla.org/en-US/docs/Web/API/HTMLElement/outerText>
1263
1264=item *
1265
1266Represents the text content of a node and its descendants.
1267
1268Only works for elements, for other nodes return C<undef>.
1269
1270 my $text = $node->text;
1271 my $text = $node->textContent; # alias
1272
1273Example:
1274
1275 my $tree = HTML5::DOM->new->parse('<b>    test      </b><script>alert()</script>');
1276 print $tree->body->text; #     test      alert()
1277
1278See, for more info: L<https://developer.mozilla.org/en-US/docs/Web/API/Node/textContent>
1279
1280=item *
1281
1282Removes all of its children and replaces them with a single text node with the given value.
1283
1284 my $node = $node->text($new_text);
1285 my $node = $node->textContent($new_text);
1286
1287Example:
1288
1289 my $tree = HTML5::DOM->new->parse('<div id="test">some <b>bold</b> test</div>');
1290 print $tree->at('#test')->text('<bla bla bla>');
1291 print $tree->at('#test')->html;  # <div id="test">&lt;bla bla bla&gt;</div>
1292
1293See, for more info: L<https://developer.mozilla.org/en-US/docs/Web/API/Node/textContent>
1294
1295=back
1296
1297=head3 nodeHtml
1298
1299 my $html = $node->nodeHtml();
1300
1301Serialize to html, without descendants and closing tag.
1302
1303 my $tree = HTML5::DOM->new->parse('<div id="test">some <b>bold</b> test</div>');
1304 print $tree->at('#test')->nodeHtml(); # <div id="test">
1305
1306=head3 nodeValue
1307
1308=head3 data
1309
1310 my $value = $node->nodeValue();
1311 my $value = $node->data(); # alias
1312
1313 my $node = $node->nodeValue($new_value);
1314 my $node = $node->data($new_value); # alias
1315
1316Get or set value of node. Only works for non-element nodes, such as  L<HTML5::DOM::Element|/"HTML5::DOM::Text">,  L<HTML5::DOM::Element|/"HTML5::DOM::DocType">,
1317L<HTML5::DOM::Element|/"HTML5::DOM::Comment">. Return C<undef> for other.
1318
1319 my $tree = HTML5::DOM->new->parse('');
1320 my $comment = $tree->createComment("comment text");
1321 print $comment->nodeValue;                 # comment text
1322 $comment->nodeValue(' new comment text ');
1323 print $comment->html;                      # <!-- new comment text -->
1324
1325=head3 isConnected
1326
1327 my $flag = $node->isConnected;
1328
1329Return true, if node has parent.
1330
1331 my $tree = HTML5::DOM->new->parse('
1332    <div id="test"></div>
1333 ');
1334 print $tree->at('#test')->isConnected;             # 1
1335 print $tree->createElement("div")->isConnected;    # 0
1336
1337=head3 parent
1338
1339=head3 parentElement
1340
1341 my $node = $node->parent;
1342 my $node = $node->parentElement; # alias
1343
1344Return parent node. Return C<undef>, if node detached.
1345
1346 my $tree = HTML5::DOM->new->parse('
1347    <div id="test"></div>
1348 ');
1349 print $tree->at('#test')->parent->tag; # body
1350
1351=head3 document
1352
1353=head3 ownerDocument
1354
1355 my $doc = $node->document;
1356 my $doc = $node->ownerDocument; # alias
1357
1358Return parent L<HTML5::DOM::Document|/"HTML5::DOM::Document">.
1359
1360 my $tree = HTML5::DOM->new->parse('
1361    <div id="test"></div>
1362 ');
1363 print ref($tree->at('#test')->document);   # HTML5::DOM::Document
1364
1365=head3 append
1366
1367=head3 appendChild
1368
1369 my $node = $node->append($child);
1370 my $child = $node->appendChild($child); # alias
1371
1372Append node to child nodes.
1373
1374B<append> - returned value is the self node, for chain calls
1375
1376B<appendChild> - returned value is the appended child except when the given child is a L<HTML5::DOM::Fragment|/"HTML5::DOM::Fragment">,
1377in which case the empty L<HTML5::DOM::Fragment|/"HTML5::DOM::Fragment"> is returned.
1378
1379 my $tree = HTML5::DOM->new->parse('
1380    <div>some <b>bold</b> text</div>
1381 ');
1382 $tree->at('div')
1383    ->append($tree->createElement('br'))
1384    ->append($tree->createElement('br'));
1385 print $tree->at('div')->html; # <div>some <b>bold</b> text<br /><br /></div>
1386
1387=head3 prepend
1388
1389=head3 prependChild
1390
1391 my $node = $node->prepend($child);
1392 my $child = $node->prependChild($child); # alias
1393
1394Prepend node to child nodes.
1395
1396B<prepend> - returned value is the self node, for chain calls
1397
1398B<prependChild> - returned value is the prepended child except when the given child is a L<HTML5::DOM::Fragment|/"HTML5::DOM::Fragment">,
1399in which case the empty L<HTML5::DOM::Fragment|/"HTML5::DOM::Fragment"> is returned.
1400
1401 my $tree = HTML5::DOM->new->parse('
1402    <div>some <b>bold</b> text</div>
1403 ');
1404 $tree->at('div')
1405    ->prepend($tree->createElement('br'))
1406    ->prepend($tree->createElement('br'));
1407 print $tree->at('div')->html; # <div><br /><br />some <b>bold</b> text</div>
1408
1409=head3 replace
1410
1411=head3 replaceChild
1412
1413 my $old_node = $old_node->replace($new_node);
1414 my $old_node = $old_node->parent->replaceChild($new_node, $old_node); # alias
1415
1416Replace node in parent child nodes.
1417
1418 my $tree = HTML5::DOM->new->parse('
1419    <div>some <b>bold</b> text</div>
1420 ');
1421 my $old = $tree->at('b')->replace($tree->createElement('br'));
1422 print $old->html;              # <b>bold</b>
1423 print $tree->at('div')->html;  # <div>some <br /> text</div>
1424
1425=head3 before
1426
1427=head3 insertBefore
1428
1429 my $node = $node->before($new_node);
1430 my $new_node = $node->parent->insertBefore($new_node, $node); # alias
1431
1432Insert new node before current node.
1433
1434B<before> - returned value is the self node, for chain calls
1435
1436B<insertBefore> - returned value is the added child except when the given child is a L<HTML5::DOM::Fragment|/"HTML5::DOM::Fragment">,
1437in which case the empty L<HTML5::DOM::Fragment|/"HTML5::DOM::Fragment"> is returned.
1438
1439 my $tree = HTML5::DOM->new->parse('
1440    <div>some <b>bold</b> text</div>
1441 ');
1442 $tree->at('b')->before($tree->createElement('br'));
1443 print $tree->at('div')->html; # <div>some <br /><b>bold</b> text</div>
1444
1445=head3 after
1446
1447=head3 insertAfter
1448
1449 my $node = $node->after($new_node);
1450 my $new_node = $node->parent->insertAfter($new_node, $node); # alias
1451
1452Insert new node after current node.
1453
1454B<after> - returned value is the self node, for chain calls
1455
1456B<insertAfter> - returned value is the added child except when the given child is a L<HTML5::DOM::Fragment|/"HTML5::DOM::Fragment">,
1457in which case the empty L<HTML5::DOM::Fragment|/"HTML5::DOM::Fragment"> is returned.
1458
1459 my $tree = HTML5::DOM->new->parse('
1460    <div>some <b>bold</b> text</div>
1461 ');
1462 $tree->at('b')->after($tree->createElement('br'));
1463 print $tree->at('div')->html; # <div>some <b>bold</b><br /> text</div>
1464
1465=head3 remove
1466
1467=head3 removeChild
1468
1469 my $node = $node->remove;
1470 my $node = $node->parent->removeChild($node); # alias
1471
1472Remove node from parent. Return removed node.
1473
1474 my $tree = HTML5::DOM->new->parse('
1475    <div>some <b>bold</b> text</div>
1476 ');
1477 print $tree->at('b')->remove->html;    # <b>bold</b>
1478 print $tree->at('div')->html;          # <div>some  text</div>
1479
1480=head3 clone
1481
1482=head3 cloneNode
1483
1484 # clone node to current tree
1485 my $node = $node->clone($deep = 0);
1486 my $node = $node->cloneNode($deep = 0); # alias
1487
1488 # clone node to foreign tree
1489 my $node = $node->clone($deep, $new_tree);
1490 my $node = $node->cloneNode($deep, $new_tree); # alias
1491
1492Clone node.
1493
1494B<deep> = 0 - only specified node, without childs.
1495
1496B<deep> = 1 - deep copy with all child nodes.
1497
1498B<new_tree> - destination tree (if need copy to foreign tree)
1499
1500 my $tree = HTML5::DOM->new->parse('
1501    <div>some <b>bold</b> text</div>
1502 ');
1503 print $tree->at('b')->clone(0)->html; # <b></b>
1504 print $tree->at('b')->clone(1)->html; # <b>bold</b>
1505
1506=head3 void
1507
1508 my $flag = $node->void;
1509
1510Return true if node is void. For more details: L<http://w3c.github.io/html-reference/syntax.html#void-elements>
1511
1512 print $tree->createElement('br')->void; # 1
1513
1514=head3 selfClosed
1515
1516 my $flag = $node->selfClosed;
1517
1518Return true if node self closed.
1519
1520 print $tree->createElement('br')->selfClosed; # 1
1521
1522=head3 position
1523
1524 my $position = $node->position;
1525
1526Return offsets in input buffer.
1527
1528 print Dumper($node->position);
1529 # $VAR1 = {'raw_length' => 3, 'raw_begin' => 144, 'element_begin' => 143, 'element_length' => 5}
1530
1531=head3 isSameNode
1532
1533 my $flag = $node->isSameNode($other_node);
1534
1535Tests whether two nodes are the same, that is if they reference the same object.
1536
1537 my $tree = HTML5::DOM->new->parse('
1538    <ul>
1539        <li>test</li>
1540        <li>not test</li>
1541        <li>test</li>
1542    </ul>
1543 ');
1544 my $li = $tree->find('li');
1545 print $li->[0]->isSameNode($li->[0]); # 1
1546 print $li->[0]->isSameNode($li->[1]); # 0
1547 print $li->[0]->isSameNode($li->[2]); # 0
1548
1549
1550=head1 HTML5::DOM::Element
1551
1552DOM node object for elements. Inherit all methods from L<HTML5::DOM::Node|/HTML5::DOM::Node>.
1553
1554=head3 children
1555
1556 my $collection = $node->children;
1557
1558Returns all child elements of current node in L<HTML5::DOM::Collection|/HTML5::DOM::Collection>.
1559
1560 my $tree = HTML5::DOM->new->parse('
1561    <ul>
1562        <li>Perl</li>
1563        <!-- comment -->
1564        <li>PHP</li>
1565        <li>C++</li>
1566    </ul>
1567 ');
1568 my $collection = $tree->at('ul')->children;
1569 print $collection->[0]->html; # <li>Perl</li>
1570 print $collection->[1]->html; # <li>PHP</li>
1571 print $collection->[2]->html; # <li>C++</li>
1572
1573=head3 childrenNode
1574
1575=head3 childNodes
1576
1577 my $collection = $node->childrenNode;
1578 my $collection = $node->childNodes; # alias
1579
1580Returns all child nodes of current node in L<HTML5::DOM::Collection|/HTML5::DOM::Collection>.
1581
1582 my $tree = HTML5::DOM->new->parse('
1583    <ul>
1584        <li>Perl</li>
1585        <!-- comment -->
1586        <li>PHP</li>
1587        <li>C++</li>
1588    </ul>
1589 ');
1590 my $collection = $tree->at('ul')->childrenNode;
1591 print $collection->[0]->html; # <li>Perl</li>
1592 print $collection->[1]->html; # <!-- comment -->
1593 print $collection->[2]->html; # <li>PHP</li>
1594 print $collection->[3]->html; # <li>C++</li>
1595
1596=head3 attr
1597
1598=head3 removeAttr
1599
1600Universal attributes accessor, for single human-friendly api.
1601
1602 # attribute get
1603 my $value = $node->attr($key);
1604
1605 # attribute set
1606 my $node = $node->attr($key, $value);
1607 my $node = $node->attr($key => $value);
1608
1609 # attribute remove
1610 my $node = $node->attr($key, undef);
1611 my $node = $node->attr($key => undef);
1612 my $node = $node->removeAttr($key);
1613
1614 # bulk attributes set
1615 my $node = $node->attr({$key => $value, $key2 => $value2});
1616
1617 # bulk attributes remove
1618 my $node = $node->attr({$key => undef, $key2 => undef});
1619
1620 # bulk get all attributes in hash
1621 my $hash = $node->attr;
1622
1623Example:
1624
1625 my $tree = HTML5::DOM->new->parse('
1626    <div id="test" data-test="test value" data-href="#"></div>
1627 ');
1628 my $div = $tree->at('#test');
1629 $div->attr("data-new", "test");
1630 print $div->attr("data-test");     # test value
1631 print $div->{"data-test"};         # test value
1632 print $div->attr->{"data-test"};   # test value
1633
1634 # {id => "test", "data-test" => "test value", "data-href" => "#", "data-new" => "test"}
1635 print Dumper($div->attr);
1636
1637 $div->removeAttr("data-test");
1638
1639 # {id => "test", "data-href" => "#", "data-new" => "test"}
1640 print Dumper($div->attr);
1641
1642=head3 attrArray
1643
1644 my $arr = $node->attrArray;
1645
1646Get all attributes in array (in tree order).
1647
1648 my $tree = HTML5::DOM->new->parse('
1649    <div id="test" data-test="test value" data-href="#"></div>
1650 ');
1651 my $div = $tree->at('#test');
1652
1653 # [{key => 'id', value => 'test'}, {key => 'data-test', value => 'test'}, {key => 'data-href', value => '#'}]
1654 print Dumper($div->attrArray);
1655
1656=head3 getAttribute
1657
1658 my $value = $node->getAttribute($key);
1659 my $value = $node->attr($key); # alias
1660
1661Get attribute value by key.
1662
1663=head3 setAttribute
1664
1665 my $node = $node->setAttribute($key, $value);
1666 my $node = $node->attr($key, $value); # alias
1667
1668Set new value or create new attibute.
1669
1670=head3 removeAttribute
1671
1672 my $node = $node->removeAttribute($key);
1673 my $node = $node->removeAttr($key); # alias
1674
1675Remove attribute.
1676
1677=head3 className
1678
1679 my $classes = $node->className;
1680 # alias for
1681 my $classes = $node->attr("class");
1682
1683=head3 classList
1684
1685 my $class_list = $node->classList;
1686
1687 # has class
1688 my $flag = $class_list->has($class_name);
1689 my $flag = $class_list->contains($class_name);
1690
1691 # add class
1692 my $class_list = $class_list->add($class_name);
1693 my $class_list = $class_list->add($class_name, $class_name1, $class_name2, ...);
1694
1695 # add class
1696 my $class_list = $class_list->remove($class_name);
1697 my $class_list = $class_list->remove($class_name, $class_name1, $class_name2, ...);
1698
1699 # toggle class
1700 my $state = $class_list->toggle($class_name);
1701 my $state = $class_list->toggle($class_name, $force_state);
1702
1703Manipulations with classes. Returns L<HTML5::DOM::TokenList|/HTML5::DOM::TokenList>.
1704
1705Similar to L<https://developer.mozilla.org/en-US/docs/Web/API/Element/classList>
1706
1707 my $tree = HTML5::DOM->new->parse('<div class="red">red</div>')
1708 my $node = $tree->body->at('.red');
1709 print $node->has('red');                       # 1
1710 print $node->has('blue');                      # 0
1711 $node->add('blue', 'red', 'yellow', 'orange');
1712 print $node->className;                        # red blue yellow orange
1713 $node->remove('blue', 'orange');
1714 print $node->className;                        # red yellow
1715 print $node->toggle('blue');                   # 1
1716 print $node->className;                        # red yellow blue
1717 print $node->toggle('blue');                   # 0
1718 print $node->className;                        # red yellow
1719
1720=head3 at
1721
1722=head3 querySelector
1723
1724 my $node = $node->at($selector);
1725 my $node = $node->at($selector, $combinator);
1726 my $node = $node->querySelector($selector); # alias
1727 my $node = $node->querySelector($selector, $combinator); # alias
1728
1729Find one element node in current node descendants using L<CSS Selectors Level 4|https://www.w3.org/TR/selectors-4/>
1730
1731Return node, or C<undef> if not find.
1732
1733=over
1734
1735=item *
1736
1737C<$selector> - selector query as plain text or precompiled as L<HTML5::DOM::CSS::Selector|/HTML5::DOM::CSS::Selector> or
1738L<HTML5::DOM::CSS::Selector|/HTML5::DOM::CSS::Selector::Entry>.
1739
1740=item *
1741
1742C<$combinator> - custom selector combinator, applies to current node
1743
1744=over
1745
1746=item *
1747
1748C<E<gt>E<gt>> - descendant selector (default)
1749
1750=item *
1751
1752C<E<gt>> - child selector
1753
1754=item *
1755
1756C<+> - adjacent sibling selector
1757
1758=item *
1759
1760C<~> - general sibling selector
1761
1762=item *
1763
1764C<||> - column combinator
1765
1766=back
1767
1768=back
1769
1770 my $tree = HTML5::DOM->new->parse('<div class="red">red</div><div class="blue">blue</div>')
1771 my $node = $tree->body->at('body > div.red');
1772 print $node->html; # <div class="red">red</div>
1773
1774=head3 find
1775
1776=head3 querySelectorAll
1777
1778 my $collection = $node->find($selector);
1779 my $collection = $node->find($selector, $combinator);
1780 my $collection = $node->querySelectorAll($selector); # alias
1781 my $collection = $node->querySelectorAll($selector, $combinator); # alias
1782
1783Find all element nodes in current node descendants using L<CSS Selectors Level 4|https://www.w3.org/TR/selectors-4/>
1784
1785Return L<HTML5::DOM::Collection|/"HTML5::DOM::Collection">.
1786
1787=over
1788
1789=item *
1790
1791C<$selector> - selector query as plain text or precompiled as L<HTML5::DOM::CSS::Selector|/HTML5::DOM::CSS::Selector> or
1792L<HTML5::DOM::CSS::Selector|/HTML5::DOM::CSS::Selector::Entry>.
1793
1794=item *
1795
1796C<$combinator> - custom selector combinator, applies to current node
1797
1798=over
1799
1800=item *
1801
1802C<E<gt>E<gt>> - descendant selector (default)
1803
1804=item *
1805
1806C<E<gt>> - child selector
1807
1808=item *
1809
1810C<+> - adjacent sibling selector
1811
1812=item *
1813
1814C<~> - general sibling selector
1815
1816=item *
1817
1818C<||> - column combinator
1819
1820=back
1821
1822=back
1823
1824 my $tree = HTML5::DOM->new->parse('<div class="red">red</div><div class="blue">blue</div>')
1825 my $collection = $tree->body->at('body > div.red, body > div.blue');
1826 print $collection->[0]->html; # <div class="red">red</div>
1827 print $collection->[1]->html; # <div class="red">blue</div>
1828
1829=head3 findId
1830
1831=head3 getElementById
1832
1833 my $node = $node->findId($tag);
1834 my $node = $node->getElementById($tag); # alias
1835
1836Find element node with specified id in current node descendants.
1837
1838Return L<HTML5::DOM::Node|/"HTML5::DOM::Node"> or C<undef>.
1839
1840 my $tree = HTML5::DOM->new->parse('<div class="red">red</div><div class="blue" id="test">blue</div>')
1841 my $node = $tree->body->findId('test');
1842 print $node->html; # <div class="blue" id="test">blue</div>
1843
1844=head3 findTag
1845
1846=head3 getElementsByTagName
1847
1848 my $node = $node->findTag($tag);
1849 my $node = $node->getElementsByTagName($tag); # alias
1850
1851Find all element nodes in current node descendants with specified tag name.
1852
1853Return L<HTML5::DOM::Collection|/"HTML5::DOM::Collection">.
1854
1855 my $tree = HTML5::DOM->new->parse('<div class="red">red</div><div class="blue">blue</div>')
1856 my $collection = $tree->body->findTag('div');
1857 print $collection->[0]->html; # <div class="red">red</div>
1858 print $collection->[1]->html; # <div class="red">blue</div>
1859
1860=head3 findClass
1861
1862=head3 getElementsByClassName
1863
1864 my $collection = $node->findClass($class);
1865 my $collection = $node->getElementsByClassName($class); # alias
1866
1867Find all element nodes in current node descendants with specified class name.
1868This is more fast equivalent to [class~="value"] selector.
1869
1870Return L<HTML5::DOM::Collection|/"HTML5::DOM::Collection">.
1871
1872 my $tree = HTML5::DOM->new
1873    ->parse('<div class="red color">red</div><div class="blue color">blue</div>');
1874 my $collection = $tree->body->findClass('color');
1875 print $collection->[0]->html; # <div class="red color">red</div>
1876 print $collection->[1]->html; # <div class="red color">blue</div>
1877
1878=head3 findAttr
1879
1880=head3 getElementByAttribute
1881
1882 # Find all elements with attribute
1883 my $collection = $node->findAttr($attribute);
1884 my $collection = $node->getElementByAttribute($attribute); # alias
1885
1886 # Find all elements with attribute and mathcing value
1887 my $collection = $node->findAttr($attribute, $value, $case = 0, $cmp = '=');
1888 my $collection = $node->getElementByAttribute($attribute, $value, $case = 0, $cmp = '='); # alias
1889
1890Find all element nodes in tree with specified attribute and optional matching value.
1891
1892Return L<HTML5::DOM::Collection|/"HTML5::DOM::Collection">.
1893
1894 my $tree = HTML5::DOM->new
1895    ->parse('<div class="red color">red</div><div class="blue color">blue</div>');
1896 my $collection = $tree->body->findAttr('class', 'CoLoR', 1, '~');
1897 print $collection->[0]->html; # <div class="red color">red</div>
1898 print $collection->[1]->html; # <div class="red color">blue</div>
1899
1900CSS selector analogs:
1901
1902 # [$attribute=$value]
1903 my $collection = $node->findAttr($attribute, $value, 0, '=');
1904
1905 # [$attribute=$value i]
1906 my $collection = $node->findAttr($attribute, $value, 1, '=');
1907
1908 # [$attribute~=$value]
1909 my $collection = $node->findAttr($attribute, $value, 0, '~');
1910
1911 # [$attribute|=$value]
1912 my $collection = $node->findAttr($attribute, $value, 0, '|');
1913
1914 # [$attribute*=$value]
1915 my $collection = $node->findAttr($attribute, $value, 0, '*');
1916
1917 # [$attribute^=$value]
1918 my $collection = $node->findAttr($attribute, $value, 0, '^');
1919
1920 # [$attribute$=$value]
1921 my $collection = $node->findAttr($attribute, $value, 0, '$');
1922
1923=head3 getDefaultBoxType
1924
1925 my $display = $node->getDefaultBoxType;
1926
1927Get default CSS "display" property for tag (useful for functions like a L<innerText|/innerText>).
1928
1929 my $tree = HTML5::DOM->new
1930    ->parse('<div class="red color">red</div><script>alert()</script><b>bbb</b>');
1931 print $tree->at('div')->getDefaultBoxType();       # block
1932 print $tree->at('script')->getDefaultBoxType();    # none
1933 print $tree->at('b')->getDefaultBoxType();         # inline
1934
1935=head1 HTML5::DOM::Document
1936
1937DOM node object for document. Inherit all methods from L<HTML5::DOM::Element|/HTML5::DOM::Element>.
1938
1939
1940=head1 HTML5::DOM::Fragment
1941
1942DOM node object for fragments. Inherit all methods from L<HTML5::DOM::Element|/HTML5::DOM::Element>.
1943
1944
1945=head1 HTML5::DOM::Text
1946
1947DOM node object for text. Inherit all methods from L<HTML5::DOM::Node|/HTML5::DOM::Node>.
1948
1949
1950=head1 HTML5::DOM::Comment
1951
1952DOM node object for comments. Inherit all methods from L<HTML5::DOM::Node|/HTML5::DOM::Node>.
1953
1954
1955=head1 HTML5::DOM::DocType
1956
1957DOM node object for document type. Inherit all methods from L<HTML5::DOM::Node|/HTML5::DOM::Node>.
1958
1959=head3 name
1960
1961 my $name = $node->name;
1962 my $node = $node->name($new_name);
1963
1964Return or change root element name from doctype.
1965
1966 my $tree = HTML5::DOM->new->parse('
1967	<!DOCTYPE svg>
1968 ');
1969
1970 # get
1971 print $tree->document->firstChild->name; # svg
1972
1973 # set
1974 $tree->document->firstChild->name('html');
1975 print $tree->document->firstChild->html; # <!DOCTYPE html>
1976
1977=head3 publicId
1978
1979 my $public_id = $node->publicId;
1980 my $node = $node->publicId($new_public_id);
1981
1982Return or change public id from doctype.
1983
1984 my $tree = HTML5::DOM->new->parse('
1985	<!DOCTYPE svg:svg PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
1986 ');
1987
1988 # get
1989 print $tree->document->firstChild->publicId; # -//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN
1990
1991 # set
1992 print $tree->document->firstChild->publicId('-//W3C//DTD SVG 1.1//EN');
1993 print $tree->document->firstChild->html; # <!DOCTYPE svg:svg PUBLIC "-//W3C//DTD SVG 1.1//EN" "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
1994
1995=head3 systemId
1996
1997 my $system_id = $node->systemId;
1998 my $node = $node->systemId($new_system_id);
1999
2000Return or change public id from doctype.
2001
2002 my $tree = HTML5::DOM->new->parse('
2003	<!DOCTYPE svg:svg PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" "http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd">
2004 ');
2005
2006 # get
2007 print $tree->document->firstChild->systemId; # http://www.w3.org/2002/04/xhtml-math-svg/xhtml-math-svg.dtd
2008
2009 # set
2010 print $tree->document->firstChild->systemId('http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd');
2011 print $tree->document->firstChild->html; # <!DOCTYPE svg:svg PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0 plus SVG 1.1//EN" "http://www.w3.org/Graphics/SVG/1.1/DTD/svg11.dtd">
2012
2013
2014
2015=head1 HTML5::DOM::Collection
2016
2017CSS Parser object
2018
2019=head3 new
2020
2021 my $collection = HTML5::DOM::Collection->new($nodes);
2022
2023Creates new collection from C<$nodes> (reference to array with L<HTML5::DOM::Node|/HTML5::DOM::Node>).
2024
2025=head3 each
2026
2027 $collection->each(sub {...});
2028 $collection->each(sub {...}, @additional_args);
2029
2030Foreach all nodes in collection. Returns self.
2031
2032Example:
2033
2034 $collection->each(sub {
2035    my ($node, $index) = @_;
2036    print "FOUND: node[$index] is a '$node'\n";
2037 });
2038
2039 # Also can bypass additional arguments
2040 $collection->each(sub {
2041    my ($node, $index, $title) = @_;
2042    print $title."node[$index] is a '$node'\n";
2043 }, "FOUND: ");
2044
2045=head3 map
2046
2047 my $new_collection = $collection->map(sub {
2048    my ($token, $index) = @_;
2049    return "FOUND: ".$node->tag." => $index";
2050 });
2051
2052 # Also can bypass additional arguments
2053 my $new_collection = $collection->map(sub {
2054    my ($token, $index, $title) = @_;
2055    return $title.$node->tag." => $index";
2056 }, "FOUND: ");
2057
2058Apply callback for each node in collection. Returns new array from results.
2059
2060 my $new_collection = $collection->map($method, @args);
2061
2062Call method for each node in collection. Returns new L<HTML5::DOM::Collection|/HTML5::DOM::Collection> from results.
2063
2064Example:
2065
2066 # set text 'test!' for all nodes
2067 $collection->map('text', 'test!');
2068
2069 # get all tag names as array
2070 my $new_collection = $collection->map('tag');
2071
2072 # remove all nodes in collection
2073 $collection->map('remove');
2074
2075=head3 add
2076
2077 my $collection = $collection->add($node);
2078
2079Add new item to collection.
2080
2081=head3 length
2082
2083 my $length = $collection->length;
2084
2085Items count in collection.
2086
2087 my $tree = HTML5::DOM->new->parse('
2088    <ul>
2089        <li>Linux</li>
2090        <!-- comment -->
2091        <li>OSX</li>
2092        <li>Windows</li>
2093    </ul>
2094 ');
2095 my $collection = $tree->find('ul li');
2096 print $collection->length; # 3
2097
2098=head3 grep
2099
2100 my $new_collection = $collection->grep(qr/regexp/);
2101
2102Evaluates regexp for html code of each element in collection and creates new collection with all matched elements.
2103
2104 my $new_collection = $collection->grep(sub {...});
2105 my $new_collection = $collection->grep(sub {...}, @args);
2106
2107Evaluates callback foreach element in collection and creates new collection with all elements for which callback returned true.
2108
2109Example for regexp:
2110
2111 my $tree = HTML5::DOM->new->parse('
2112    <ul>
2113        <li>Linux</li>
2114        <!-- comment -->
2115        <li>OSX (not supported)</li>
2116        <li>Windows (not supported)</li>
2117    </ul>
2118 ');
2119 my $collection = $tree->find('ul li')->grep(qr/not supported/);
2120 print $collection->length; # 2
2121
2122Example for callback:
2123
2124 my $tree = HTML5::DOM->new->parse('
2125    <ul>
2126        <li>Linux</li>
2127        <!-- comment -->
2128        <li>OSX (not supported)</li>
2129        <li>Windows (not supported)</li>
2130    </ul>
2131 ');
2132 my $collection = $tree->find('ul li')->grep(sub { $_->html =~ /not supported/ });
2133 print $collection->length; # 2
2134
2135=head3 first
2136
2137 my $node = $collection->first;
2138
2139Get first item in collection.
2140
2141 my $node = $collection->first(qr/regexp/);
2142
2143Get first element in collection which html code matches regexp.
2144
2145 my $node = $collection->first(sub {...});
2146 my $node = $collection->first(sub {...}, @args);
2147
2148Get first element in collection which where callback returned true.
2149
2150Example for regexp:
2151
2152 my $tree = HTML5::DOM->new->parse('
2153    <ul>
2154        <li>Linux</li>
2155        <!-- comment -->
2156        <li>OSX (not supported)</li>
2157        <li>Windows (not supported)</li>
2158    </ul>
2159 ');
2160 my $collection = $tree->find('ul li');
2161 print $collection->first->html; # <li>Linux</li>
2162 print $collection->first(qr/not supported/)->html; # <li>OSX (not supported)</li>
2163
2164Example for callback:
2165
2166 my $tree = HTML5::DOM->new->parse('
2167    <ul>
2168        <li>Linux</li>
2169        <!-- comment -->
2170        <li>OSX (not supported)</li>
2171        <li>Windows (not supported)</li>
2172    </ul>
2173 ');
2174 my $collection = $tree->find('ul li');
2175 print $collection->first->html; # <li>Linux</li>
2176 print $collection->first(sub { $_->html =~ /not supported })->html; # <li>OSX (not supported)</li>
2177
2178=head3 last
2179
2180 my $node = $collection->last;
2181
2182Get last item in collection.
2183
2184 my $tree = HTML5::DOM->new->parse('
2185    <ul>
2186        <li>Linux</li>
2187        <!-- comment -->
2188        <li>OSX</li>
2189        <li>Windows</li>
2190    </ul>
2191 ');
2192 my $collection = $tree->find('ul li');
2193 print $collection->last->html; # <li>Windows</li>
2194
2195=head3 item
2196
2197 my $node = $collection->item($index);
2198 my $node = $collection->[$index];
2199
2200Get item by C<$index> in collection.
2201
2202 my $tree = HTML5::DOM->new->parse('
2203    <ul>
2204        <li>Linux</li>
2205        <!-- comment -->
2206        <li>OSX</li>
2207        <li>Windows</li>
2208    </ul>
2209 ');
2210 my $collection = $tree->find('ul li');
2211 print $collection->item(1)->html;      # <li>OSX</li>
2212 print $collection->[1]->html;          # <li>OSX</li>
2213
2214=head3 reverse
2215
2216 my $reversed_collection = $collection->reverse;
2217
2218Returns copy of collection in reverse order.
2219
2220 my $tree = HTML5::DOM->new->parse('
2221    <ul>
2222        <li>Linux</li>
2223        <!-- comment -->
2224        <li>OSX</li>
2225        <li>Windows</li>
2226    </ul>
2227 ');
2228 my $collection = $tree->find('ul li');
2229 print join(', ', @{$collection->map('text')};            # Linux, OSX, Windows
2230 print join(', ', @{$collection->reverse()->map('text')}; # Windows, OSX, Linux
2231
2232=head3 shuffle
2233
2234 my $shuffled_collection = $collection->shuffle;
2235
2236Returns copy of collection in random order.
2237
2238 my $tree = HTML5::DOM->new->parse('
2239    <ul>
2240        <li>Linux</li>
2241        <!-- comment -->
2242        <li>OSX</li>
2243        <li>Windows</li>
2244    </ul>
2245 ');
2246 my $collection = $tree->find('ul li');
2247 print join(', ', @{$collection->shuffle()->map('text')}; # Windows, Linux, OSX
2248 print join(', ', @{$collection->shuffle()->map('text')}; # Windows, OSX, Linux
2249 print join(', ', @{$collection->shuffle()->map('text')}; # OSX, Windows, Linux
2250
2251=head3 head
2252
2253 my $new_collection = $collection->head($length);
2254
2255Returns copy of collection with only first C<$length> items.
2256
2257 my $tree = HTML5::DOM->new->parse('
2258    <ul>
2259        <li>Linux</li>
2260        <!-- comment -->
2261        <li>OSX</li>
2262        <li>Windows</li>
2263    </ul>
2264 ');
2265 my $collection = $tree->find('ul li');
2266 print join(', ', @{$collection->head(2)->map('text')}; # Linux, OSX
2267
2268=head3 tail
2269
2270 my $new_collection = $collection->tail($length);
2271
2272Returns copy of collection with only last C<$length> items.
2273
2274 my $tree = HTML5::DOM->new->parse('
2275    <ul>
2276        <li>Linux</li>
2277        <!-- comment -->
2278        <li>OSX</li>
2279        <li>Windows</li>
2280    </ul>
2281 ');
2282 my $collection = $tree->find('ul li');
2283 print join(', ', @{$collection->tail(2)->map('text')}; # OSX, Windows
2284
2285=head3 slice
2286
2287 my $new_collection = $collection->slice($offset);
2288
2289Returns new collection with sequence by specified C<$offset>.
2290
2291If C<$offset> is positive, the sequence will start at that C<$offset> in the C<$collection>.
2292If C<$offset> is negative, the sequence will start that far from the end of the C<$collection>.
2293
2294 my $new_collection = $collection->slice($offset, $length);
2295
2296Returns new collection with sequence by specified C<$offset> and C<$length>.
2297
2298If C<$offset> is positive, the sequence will start at that C<$offset> in the C<$collection>.
2299
2300If C<$offset> is negative, the sequence will start that far from the end of the C<$collection>.
2301
2302
2303If C<$length> is positive, then the sequence will have up to that many elements in it.
2304
2305If the C<$collection> is shorter than the C<$length>, then only the available C<$collection> elements will be present.
2306
2307If C<$length> is negative then the sequence will stop that many elements from the end of the C<$collection>.
2308
2309 my $tree = HTML5::DOM->new->parse('
2310    <ul>
2311        <li>Linux</li>
2312        <!-- comment -->
2313        <li>NetBSD</li>
2314        <li>OSX</li>
2315        <li>Windows</li>
2316    </ul>
2317 ');
2318 my $collection = $tree->find('ul li');
2319 print join(', ', @{$collection->slice(1)->map('text')};      # NetBSD, OSX, Windows
2320 print join(', ', @{$collection->slice(1, 2)->map('text')};   # NetBSD, OSX
2321 print join(', ', @{$collection->slice(-2)->map('text')};     # OSX, Windows
2322 print join(', ', @{$collection->slice(-2, 1)->map('text')};  # OSX
2323 print join(', ', @{$collection->slice(-3, -1)->map('text')}; # NetBSD, OSX
2324
2325=head3 uniq
2326
2327 my $new_collection = $collection->uniq();
2328
2329Returns copy of collection with only uniq nodes.
2330
2331 my $new_collection = $collection->uniq(sub {...});
2332
2333Returns copy of collection with only unique nodes which unique identifier of each node returned by callback.
2334
2335Example:
2336
2337 my $tree = HTML5::DOM->new->parse('
2338    <ul>
2339        <li data-kernel="linux">Ubuntu</li>
2340        <li data-kernel="linux">Arch Linux</li>
2341        <!-- comment -->
2342        <li data-kernel="darwin">OSX</li>
2343        <li data-kernel="nt">Windows</li>
2344    </ul>
2345 ');
2346 my $collection = $tree->find('ul li');
2347 print join(', ', @{$collection->uniq->map('text')};                                   # Ubuntu, Arch Linux, OSX, Windows
2348 print join(', ', @{$collection->uniq(sub { $_->attr("data-kernel") })->map('text')};  # Ubuntu, OSX, Windows
2349
2350=head3 array
2351
2352 my $node = $collection->array();
2353
2354Get collection items as array.
2355
2356=head3 html
2357
2358 my $html = $collection->html;
2359
2360Concat <outerHTML|/outerHTML> from all items.
2361
2362=head3 text
2363
2364 my $text = $collection->text;
2365
2366Concat <textContent|/textContent> from all items.
2367
2368
2369=head1 HTML5::DOM::TokenList
2370
2371Similar to L<https://developer.mozilla.org/en-US/docs/Web/API/DOMTokenList>
2372
2373=head3 has
2374
2375=head3 contains
2376
2377 my $flag = $tokens->has($token);
2378 my $flag = $tokens->contains($token); # alias
2379
2380Check if token contains in current tokens list.
2381
2382=head3 add
2383
2384 my $tokens = $tokens->add($token);
2385 my $tokens = $tokens->add($token, $token2, ...);
2386
2387Add new token (or tokens) to current tokens list. Returns self.
2388
2389=head3 remove
2390
2391 my $tokens = $tokens->add($token);
2392 my $tokens = $tokens->add($token, $token2, ...);
2393
2394Remove one or more tokens from current tokens list. Returns self.
2395
2396=head3 toggle
2397
2398 my $state = $tokens->toggle($token);
2399 my $state = $tokens->toggle($token, $force_state);
2400
2401=over
2402
2403=item *
2404
2405C<$token> - specified token name
2406
2407=item *
2408
2409C<$force_state> - optional force state.
2410
2411If 1 - similar to L<add>
2412
2413If 0 - similar to L<remove>
2414
2415=back
2416
2417Toggle specified token in current tokens list.
2418
2419=over
2420
2421=item *
2422
2423If token exists - remove it
2424
2425=item *
2426
2427If token not exists - add it
2428
2429=back
2430
2431=head3 length
2432
2433 my $length = $tokens->length;
2434
2435Returns tokens count in current list.
2436
2437=head3 item
2438
2439 my $token = $tokens->item($index);
2440 my $token = $tokens->[$index];
2441
2442Return token by index.
2443
2444=head3 each
2445
2446 my $token = $tokens->each(sub {
2447    my ($token, $index) = @_;
2448    print "tokens[$index] is a '$token'\n";
2449 });
2450
2451Forach all tokens in list.
2452
2453
2454=head1 HTML5::DOM::AsyncResult
2455
2456Get result and check status from async parsing.
2457
2458=head3 parsed
2459
2460Non-blocking check status.
2461
2462 use warnings;
2463 use strict;
2464 use HTML5::DOM;
2465
2466 my $parser = HTML5::DOM->new;
2467 my $async = $parser->parseAsync('<div>Hello world!</div>' x 1000);
2468
2469 my $is_parsed;
2470 while (!($is_parsed = $async->parsed)) {
2471     print "is_parsed=$is_parsed\n";
2472 }
2473
2474Returns 1 if async parsing done. Otherwise returns 0.
2475
2476=head3 tree
2477
2478Non-blocking get result.
2479
2480 use warnings;
2481 use strict;
2482 use HTML5::DOM;
2483
2484 my $parser = HTML5::DOM->new;
2485 my $async = $parser->parseAsync('<div>Hello world!</div>' x 1000);
2486
2487 my $tree;
2488 while (!($tree = $async->tree)) {
2489     print "is_parsed=".($tree ? 1 : 0)."\n";
2490 }
2491
2492 print $tree->at('div')->text."\n"; # Hello world!
2493
2494Returns L<HTML5::DOM::Tree|/"HTML5::DOM::Tree"> object if async parsing done. Otherwise returns C<undef>.
2495
2496=head3 wait
2497
2498 use warnings;
2499 use strict;
2500 use HTML5::DOM;
2501
2502 my $parser = HTML5::DOM->new;
2503 my $async = $parser->parseAsync('<div>Hello world!</div>' x 1000);
2504
2505 my $tree = $async->wait;
2506
2507 print $tree->at('div')->text."\n"; # Hello world!
2508
2509Blocking waits for parsing done and returns L<HTML5::DOM::Tree|/"HTML5::DOM::Tree"> object.
2510
2511
2512=head1 HTML5::DOM::CSS
2513
2514CSS Parser object
2515
2516=head3 new
2517
2518 # with default options
2519 my $css = HTML5::DOM::CSS->new;
2520
2521 # or override some options, if you need
2522 my $css = HTML5::DOM::CSS->new({
2523     utf8 => 0
2524 });
2525
2526Create new css parser object wuth options. See L<"CSS PARSER OPTIONS"> for details.
2527
2528=head3 parseSelector
2529
2530 my $selector = HTML5::DOM::CSS->parseSelector($selector_text);
2531
2532Parse C<$selector_text> and return L<HTML5::DOM::CSS::Selector|/HTML5::DOM::CSS::Selector>.
2533
2534 my $css = HTML5::DOM::CSS->new;
2535 my $selector = $css->parseSelector('body div.red, body span.blue');
2536
2537 # with custom options (extends options defined in HTML5::DOM::CSS->new)
2538 my $selector = $css->parseSelector('body div.red, body span.blue', { utf8 => 0 });
2539
2540=head1 HTML5::DOM::CSS::Selector
2541
2542CSS Selector object (precompiled selector)
2543
2544=head3 new
2545
2546 my $selector = HTML5::DOM::CSS::Selector->new($selector_text);
2547
2548Parse C<$selector_text> and create new css selector object.
2549If your need parse many selectors, more efficient way using
2550single instance of parser L<HTML5::DOM::CSS|/HTML5::DOM::CSS> and
2551L<parseSelector|/parseSelector> method.
2552
2553=head3 text
2554
2555 my $selector_text = $selector->text;
2556
2557Serialize selector to text.
2558
2559 my $css = HTML5::DOM::CSS->new;
2560 my $selector = $css->parseSelector('body div.red, body span.blue');
2561 print $selector->text."\n"; # body div.red, body span.blue
2562
2563=head3 ast
2564
2565 my $ast = $entry->ast;
2566
2567Serialize selector to very simple AST format.
2568
2569 my $css = HTML5::DOM::CSS->new;
2570 my $selector = $css->parseSelector('div > .red');
2571 print Dumper($selector->ast);
2572
2573 # $VAR1 = [[
2574 #     {
2575 #         'value' => 'div',
2576 #         'type' => 'tag'
2577 #     },
2578 #     {
2579 #         'type'  => 'combinator',
2580 #         'value' => 'child'
2581 #     },
2582 #     {
2583 #         'type' => 'class',
2584 #         'value' => 'red'
2585 #     }
2586 # ]];
2587
2588=head3 length
2589
2590 my $length = $selector->length;
2591
2592Get selector entries count (selectors separated by "," combinator)
2593
2594 my $css = HTML5::DOM::CSS->new;
2595 my $selector = $css->parseSelector('body div.red, body span.blue');
2596 print $selector->length."\n"; # 2
2597
2598=head3 entry
2599
2600 my $entry = $selector->entry($index);
2601
2602Get selector entry by C<$index> end return L<HTML5::DOM::CSS::Selector::Entry|/HTML5::DOM::CSS::Selector::Entry>.
2603
2604 my $css = HTML5::DOM::CSS->new;
2605 my $selector = $css->parseSelector('body div.red, body span.blue');
2606 print $selector->entry(0)->text."\n"; # body div.red
2607 print $selector->entry(1)->text."\n"; # body span.blue
2608
2609=head3 utf8
2610
2611As getter - get C<1> if current selector object returns all strings with utf8 flag.
2612
2613Example with utf8:
2614
2615 use warnings;
2616 use strict;
2617 use HTML5::DOM;
2618 use utf8;
2619
2620 my $selector = HTML5::DOM::CSS->new->parseSelector("[name=\"тест\"]");
2621 my $is_utf8_enabled = $selector->utf8;
2622 print "is_utf8_enabled=".($is_utf8_enabled ? "true" : "false")."\n"; # true
2623
2624Or example with bytes:
2625
2626 use warnings;
2627 use strict;
2628 use HTML5::DOM;
2629
2630 my $selector = HTML5::DOM::CSS->new->parseSelector("[name=\"тест\"]");
2631 my $is_utf8_enabled = $selector->utf8;
2632 print "is_utf8_enabled=".($is_utf8_enabled ? "true" : "false")."\n"; # false
2633
2634As setter - enable or disable utf8 flag on all returned strings.
2635
2636 use warnings;
2637 use strict;
2638 use HTML5::DOM;
2639 use utf8;
2640
2641 my $selector = HTML5::DOM::CSS->new->parseSelector("[name=\"тест\"]");
2642
2643 print "is_utf8_enabled=".($selector->utf8 ? "true" : "false")."\n"; # true
2644 print length($selector->text)." chars\n"; # 13 chars
2645
2646 $selector->utf8(0);
2647
2648 print "is_utf8_enabled=".($selector->utf8 ? "true" : "false")."\n"; # false
2649 print length($selector->text)." bytes\n"; # 17 bytes
2650
2651=head1 HTML5::DOM::CSS::Selector::Entry
2652
2653CSS selector entry object (precompiled selector)
2654
2655=head3 text
2656
2657 my $selector_text = $entry->text;
2658
2659Serialize entry to text.
2660
2661 my $css = HTML5::DOM::CSS->new;
2662 my $selector = $css->parseSelector('body div.red, body span.blue');
2663 my $entry = $selector->entry(0);
2664 print $entry->text."\n"; # body div.red
2665
2666=head3 pseudoElement
2667
2668 my $pseudo_name = $entry->pseudoElement;
2669
2670Return pseudo-element name for entry.
2671
2672 my $css = HTML5::DOM::CSS->new;
2673 my $selector = $css->parseSelector('div::after');
2674 my $entry = $selector->entry(0);
2675 print $entry->pseudoElement."\n"; # after
2676
2677=head3 ast
2678
2679 my $ast = $entry->ast;
2680
2681Serialize entry to very simple AST format.
2682
2683 my $css = HTML5::DOM::CSS->new;
2684 my $selector = $css->parseSelector('div > .red');
2685 my $entry = $selector->entry(0);
2686 print Dumper($entry->ast);
2687
2688 # $VAR1 = [
2689 #     {
2690 #         'value' => 'div',
2691 #         'type' => 'tag'
2692 #     },
2693 #     {
2694 #         'type'  => 'combinator',
2695 #         'value' => 'child'
2696 #     },
2697 #     {
2698 #         'type' => 'class',
2699 #         'value' => 'red'
2700 #     }
2701 # ];
2702
2703=head3 specificity
2704
2705 my $specificity = $entry->specificity;
2706
2707Get specificity in hash C<{a, b, c}>
2708
2709 my $css = HTML5::DOM::CSS->new;
2710 my $selector = $css->parseSelector('body div.red, body span.blue');
2711 my $entry = $selector->entry(0);
2712 print Dumper($entry->specificity); # {a => 0, b => 1, c => 2}
2713
2714=head3 specificityArray
2715
2716 my $specificity = $entry->specificityArray;
2717
2718Get specificity in array C<[a, b, c]> (ordered by weight)
2719
2720 my $css = HTML5::DOM::CSS->new;
2721 my $selector = $css->parseSelector('body div.red, body span.blue');
2722 my $entry = $selector->entry(0);
2723 print Dumper($entry->specificityArray); # [0, 1, 2]
2724
2725
2726=head1 HTML5::DOM::Encoding
2727
2728Encoding detection.
2729
2730See for available encodings: L</ENCODINGS>
2731
2732=head3 id2name
2733
2734 my $encoding = HTML5::DOM::Encoding::id2name($encoding_id);
2735
2736Get encoding name by id.
2737
2738 print HTML5::DOM::Encoding::id2name(HTML5::DOM::Encoding->UTF_8); # UTF-8
2739
2740=head3 name2id
2741
2742 my $encoding_id = HTML5::DOM::Encoding::name2id($encoding);
2743
2744Get id by name.
2745
2746 print HTML5::DOM::Encoding->UTF_8;             # 0
2747 print HTML5::DOM::Encoding::id2name("UTF-8");  # 0
2748
2749=head3 detectAuto
2750
2751 my ($encoding_id, $new_text) = HTML5::DOM::Encoding::detectAuto($text, $max_length = 0);
2752
2753Auto detect text encoding using (in this order):
2754
2755=over
2756
2757=item *
2758
2759L<detectByPrescanStream|/detectByPrescanStream>
2760
2761=item *
2762
2763L<detectBomAndCut|/detectBomAndCut>
2764
2765=item *
2766
2767L<detect|/detect>
2768
2769=back
2770
2771Returns array with encoding id and new text without BOM, if success.
2772
2773If fail, then encoding id equal HTML5::DOM::Encoding->NOT_DETERMINED.
2774
2775 my ($encoding_id, $new_text) = HTML5::DOM::Encoding::detectAuto("ололо");
2776 my $encoding = HTML5::DOM::Encoding::id2name($encoding_id);
2777 print $encoding; # UTF-8
2778
2779=head3 detect
2780
2781 my $encoding_id = HTML5::DOM::Encoding::detect($text, $max_length = 0);
2782
2783Detect text encoding. Single method for both L<detectCyrillic|/detectCyrillic> and L<detectUnicode|/detectUnicode>.
2784
2785Returns encoding id, if success. And returns HTML5::DOM::Encoding->NOT_DETERMINED if fail.
2786
2787 my $encoding_id = HTML5::DOM::Encoding::detect("ололо");
2788 my $encoding = HTML5::DOM::Encoding::id2name($encoding_id);
2789 print $encoding; # UTF-8
2790
2791=head3 detectCyrillic
2792
2793 my $encoding_id = HTML5::DOM::Encoding::detectCyrillic($text, $max_length = 0);
2794
2795Detect cyrillic text encoding (using lowercase B<trigrams>), such as C<windows-1251>, C<koi8-r>, C<iso-8859-5>, C<x-mac-cyrillic>, C<ibm866>.
2796
2797Returns encoding id, if success. And returns HTML5::DOM::Encoding->NOT_DETERMINED if fail.
2798
2799This method also have aliases for compatibility reasons: C<detectUkrainian>, C<detectRussian>
2800
2801=head3 detectUnicode
2802
2803 my $encoding_id = HTML5::DOM::Encoding::detectUnicode($text, $max_length = 0);
2804
2805Detect unicode family text encoding, such as C<UTF-8>, C<UTF-16LE>, C<UTF-16BE>.
2806
2807Returns encoding id, if success. And returns HTML5::DOM::Encoding->NOT_DETERMINED if fail.
2808
2809 # get UTF-16LE data for test
2810 my $str = "ололо";
2811 Encode::from_to($str, "UTF-8", "UTF-16LE");
2812
2813 my $encoding_id = HTML5::DOM::Encoding::detectUnicode($str);
2814 my $encoding = HTML5::DOM::Encoding::id2name($encoding_id);
2815 print $encoding; # UTF-16LE
2816
2817=head3 detectByPrescanStream
2818
2819 my $encoding_id = HTML5::DOM::Encoding::detectByPrescanStream($text, $max_length = 0);
2820
2821Detect encoding by parsing C<E<lt>metaE<gt>> tags in html.
2822
2823Returns encoding id, if success. And returns HTML5::DOM::Encoding->NOT_DETERMINED if fail.
2824
2825See for more info: L<https://html.spec.whatwg.org/multipage/syntax.html#prescan-a-byte-stream-to-determine-its-encoding>
2826
2827 my $encoding_id = HTML5::DOM::Encoding::detectByPrescanStream('
2828    <meta http-equiv="content-type" content="text/html; charset=windows-1251">
2829 ');
2830 my $encoding = HTML5::DOM::Encoding::id2name($encoding_id);
2831 print $encoding; # WINDOWS-1251
2832
2833=head3 detectByCharset
2834
2835 my $encoding_id = HTML5::DOM::Encoding::detectByCharset($text, $max_length = 0);
2836
2837Extracting character encoding from string. Find "charset=" and see encoding. Return found raw data.
2838
2839For example: "text/html; charset=windows-1251". Return HTML5::DOM::Encoding->WINDOWS_1251
2840
2841And returns HTML5::DOM::Encoding->NOT_DETERMINED if fail.
2842
2843See for more info: L<https://html.spec.whatwg.org/multipage/infrastructure.html#algorithm-for-extracting-a-character-encoding-from-a-meta-element>
2844
2845 my $encoding_id = HTML5::DOM::Encoding::detectByPrescanStream('
2846    <meta http-equiv="content-type" content="text/html; charset=windows-1251">
2847 ');
2848 my $encoding = HTML5::DOM::Encoding::id2name($encoding_id);
2849 print $encoding; # WINDOWS-1251
2850
2851=head3 detectBomAndCut
2852
2853 my ($encoding_id, $new_text) = HTML5::DOM::Encoding::detectBomAndCut($text, $max_length = 0);
2854
2855Returns array with encoding id and new text without BOM.
2856
2857If fail, then encoding id equal HTML5::DOM::Encoding->NOT_DETERMINED.
2858
2859 my ($encoding_id, $new_text) = HTML5::DOM::Encoding::detectBomAndCut("\xEF\xBB\xBFололо");
2860 my $encoding = HTML5::DOM::Encoding::id2name($encoding_id);
2861 print $encoding; # UTF-8
2862 print $new_text; # ололо
2863
2864=head1 NAMESPACES
2865
2866=head3 Supported namespace names
2867
2868 html, matml, svg, xlink, xml, xmlns
2869
2870=head3 Supported namespace id constants
2871
2872 HTML5::DOM->NS_UNDEF
2873 HTML5::DOM->NS_HTML
2874 HTML5::DOM->NS_MATHML
2875 HTML5::DOM->NS_SVG
2876 HTML5::DOM->NS_XLINK
2877 HTML5::DOM->NS_XML
2878 HTML5::DOM->NS_XMLNS
2879 HTML5::DOM->NS_ANY
2880 HTML5::DOM->NS_LAST_ENTRY
2881
2882=head1 TAGS
2883
2884 HTML5::DOM->TAG__UNDEF
2885 HTML5::DOM->TAG__TEXT
2886 HTML5::DOM->TAG__COMMENT
2887 HTML5::DOM->TAG__DOCTYPE
2888 HTML5::DOM->TAG_A
2889 HTML5::DOM->TAG_ABBR
2890 HTML5::DOM->TAG_ACRONYM
2891 HTML5::DOM->TAG_ADDRESS
2892 HTML5::DOM->TAG_ANNOTATION_XML
2893 HTML5::DOM->TAG_APPLET
2894 HTML5::DOM->TAG_AREA
2895 HTML5::DOM->TAG_ARTICLE
2896 HTML5::DOM->TAG_ASIDE
2897 HTML5::DOM->TAG_AUDIO
2898 HTML5::DOM->TAG_B
2899 HTML5::DOM->TAG_BASE
2900 HTML5::DOM->TAG_BASEFONT
2901 HTML5::DOM->TAG_BDI
2902 HTML5::DOM->TAG_BDO
2903 HTML5::DOM->TAG_BGSOUND
2904 HTML5::DOM->TAG_BIG
2905 HTML5::DOM->TAG_BLINK
2906 HTML5::DOM->TAG_BLOCKQUOTE
2907 HTML5::DOM->TAG_BODY
2908 HTML5::DOM->TAG_BR
2909 HTML5::DOM->TAG_BUTTON
2910 HTML5::DOM->TAG_CANVAS
2911 HTML5::DOM->TAG_CAPTION
2912 HTML5::DOM->TAG_CENTER
2913 HTML5::DOM->TAG_CITE
2914 HTML5::DOM->TAG_CODE
2915 HTML5::DOM->TAG_COL
2916 HTML5::DOM->TAG_COLGROUP
2917 HTML5::DOM->TAG_COMMAND
2918 HTML5::DOM->TAG_COMMENT
2919 HTML5::DOM->TAG_DATALIST
2920 HTML5::DOM->TAG_DD
2921 HTML5::DOM->TAG_DEL
2922 HTML5::DOM->TAG_DETAILS
2923 HTML5::DOM->TAG_DFN
2924 HTML5::DOM->TAG_DIALOG
2925 HTML5::DOM->TAG_DIR
2926 HTML5::DOM->TAG_DIV
2927 HTML5::DOM->TAG_DL
2928 HTML5::DOM->TAG_DT
2929 HTML5::DOM->TAG_EM
2930 HTML5::DOM->TAG_EMBED
2931 HTML5::DOM->TAG_FIELDSET
2932 HTML5::DOM->TAG_FIGCAPTION
2933 HTML5::DOM->TAG_FIGURE
2934 HTML5::DOM->TAG_FONT
2935 HTML5::DOM->TAG_FOOTER
2936 HTML5::DOM->TAG_FORM
2937 HTML5::DOM->TAG_FRAME
2938 HTML5::DOM->TAG_FRAMESET
2939 HTML5::DOM->TAG_H1
2940 HTML5::DOM->TAG_H2
2941 HTML5::DOM->TAG_H3
2942 HTML5::DOM->TAG_H4
2943 HTML5::DOM->TAG_H5
2944 HTML5::DOM->TAG_H6
2945 HTML5::DOM->TAG_HEAD
2946 HTML5::DOM->TAG_HEADER
2947 HTML5::DOM->TAG_HGROUP
2948 HTML5::DOM->TAG_HR
2949 HTML5::DOM->TAG_HTML
2950 HTML5::DOM->TAG_I
2951 HTML5::DOM->TAG_IFRAME
2952 HTML5::DOM->TAG_IMAGE
2953 HTML5::DOM->TAG_IMG
2954 HTML5::DOM->TAG_INPUT
2955 HTML5::DOM->TAG_INS
2956 HTML5::DOM->TAG_ISINDEX
2957 HTML5::DOM->TAG_KBD
2958 HTML5::DOM->TAG_KEYGEN
2959 HTML5::DOM->TAG_LABEL
2960 HTML5::DOM->TAG_LEGEND
2961 HTML5::DOM->TAG_LI
2962 HTML5::DOM->TAG_LINK
2963 HTML5::DOM->TAG_LISTING
2964 HTML5::DOM->TAG_MAIN
2965 HTML5::DOM->TAG_MAP
2966 HTML5::DOM->TAG_MARK
2967 HTML5::DOM->TAG_MARQUEE
2968 HTML5::DOM->TAG_MENU
2969 HTML5::DOM->TAG_MENUITEM
2970 HTML5::DOM->TAG_META
2971 HTML5::DOM->TAG_METER
2972 HTML5::DOM->TAG_MTEXT
2973 HTML5::DOM->TAG_NAV
2974 HTML5::DOM->TAG_NOBR
2975 HTML5::DOM->TAG_NOEMBED
2976 HTML5::DOM->TAG_NOFRAMES
2977 HTML5::DOM->TAG_NOSCRIPT
2978 HTML5::DOM->TAG_OBJECT
2979 HTML5::DOM->TAG_OL
2980 HTML5::DOM->TAG_OPTGROUP
2981 HTML5::DOM->TAG_OPTION
2982 HTML5::DOM->TAG_OUTPUT
2983 HTML5::DOM->TAG_P
2984 HTML5::DOM->TAG_PARAM
2985 HTML5::DOM->TAG_PLAINTEXT
2986 HTML5::DOM->TAG_PRE
2987 HTML5::DOM->TAG_PROGRESS
2988 HTML5::DOM->TAG_Q
2989 HTML5::DOM->TAG_RB
2990 HTML5::DOM->TAG_RP
2991 HTML5::DOM->TAG_RT
2992 HTML5::DOM->TAG_RTC
2993 HTML5::DOM->TAG_RUBY
2994 HTML5::DOM->TAG_S
2995 HTML5::DOM->TAG_SAMP
2996 HTML5::DOM->TAG_SCRIPT
2997 HTML5::DOM->TAG_SECTION
2998 HTML5::DOM->TAG_SELECT
2999 HTML5::DOM->TAG_SMALL
3000 HTML5::DOM->TAG_SOURCE
3001 HTML5::DOM->TAG_SPAN
3002 HTML5::DOM->TAG_STRIKE
3003 HTML5::DOM->TAG_STRONG
3004 HTML5::DOM->TAG_STYLE
3005 HTML5::DOM->TAG_SUB
3006 HTML5::DOM->TAG_SUMMARY
3007 HTML5::DOM->TAG_SUP
3008 HTML5::DOM->TAG_SVG
3009 HTML5::DOM->TAG_TABLE
3010 HTML5::DOM->TAG_TBODY
3011 HTML5::DOM->TAG_TD
3012 HTML5::DOM->TAG_TEMPLATE
3013 HTML5::DOM->TAG_TEXTAREA
3014 HTML5::DOM->TAG_TFOOT
3015 HTML5::DOM->TAG_TH
3016 HTML5::DOM->TAG_THEAD
3017 HTML5::DOM->TAG_TIME
3018 HTML5::DOM->TAG_TITLE
3019 HTML5::DOM->TAG_TR
3020 HTML5::DOM->TAG_TRACK
3021 HTML5::DOM->TAG_TT
3022 HTML5::DOM->TAG_U
3023 HTML5::DOM->TAG_UL
3024 HTML5::DOM->TAG_VAR
3025 HTML5::DOM->TAG_VIDEO
3026 HTML5::DOM->TAG_WBR
3027 HTML5::DOM->TAG_XMP
3028 HTML5::DOM->TAG_ALTGLYPH
3029 HTML5::DOM->TAG_ALTGLYPHDEF
3030 HTML5::DOM->TAG_ALTGLYPHITEM
3031 HTML5::DOM->TAG_ANIMATE
3032 HTML5::DOM->TAG_ANIMATECOLOR
3033 HTML5::DOM->TAG_ANIMATEMOTION
3034 HTML5::DOM->TAG_ANIMATETRANSFORM
3035 HTML5::DOM->TAG_CIRCLE
3036 HTML5::DOM->TAG_CLIPPATH
3037 HTML5::DOM->TAG_COLOR_PROFILE
3038 HTML5::DOM->TAG_CURSOR
3039 HTML5::DOM->TAG_DEFS
3040 HTML5::DOM->TAG_DESC
3041 HTML5::DOM->TAG_ELLIPSE
3042 HTML5::DOM->TAG_FEBLEND
3043 HTML5::DOM->TAG_FECOLORMATRIX
3044 HTML5::DOM->TAG_FECOMPONENTTRANSFER
3045 HTML5::DOM->TAG_FECOMPOSITE
3046 HTML5::DOM->TAG_FECONVOLVEMATRIX
3047 HTML5::DOM->TAG_FEDIFFUSELIGHTING
3048 HTML5::DOM->TAG_FEDISPLACEMENTMAP
3049 HTML5::DOM->TAG_FEDISTANTLIGHT
3050 HTML5::DOM->TAG_FEDROPSHADOW
3051 HTML5::DOM->TAG_FEFLOOD
3052 HTML5::DOM->TAG_FEFUNCA
3053 HTML5::DOM->TAG_FEFUNCB
3054 HTML5::DOM->TAG_FEFUNCG
3055 HTML5::DOM->TAG_FEFUNCR
3056 HTML5::DOM->TAG_FEGAUSSIANBLUR
3057 HTML5::DOM->TAG_FEIMAGE
3058 HTML5::DOM->TAG_FEMERGE
3059 HTML5::DOM->TAG_FEMERGENODE
3060 HTML5::DOM->TAG_FEMORPHOLOGY
3061 HTML5::DOM->TAG_FEOFFSET
3062 HTML5::DOM->TAG_FEPOINTLIGHT
3063 HTML5::DOM->TAG_FESPECULARLIGHTING
3064 HTML5::DOM->TAG_FESPOTLIGHT
3065 HTML5::DOM->TAG_FETILE
3066 HTML5::DOM->TAG_FETURBULENCE
3067 HTML5::DOM->TAG_FILTER
3068 HTML5::DOM->TAG_FONT_FACE
3069 HTML5::DOM->TAG_FONT_FACE_FORMAT
3070 HTML5::DOM->TAG_FONT_FACE_NAME
3071 HTML5::DOM->TAG_FONT_FACE_SRC
3072 HTML5::DOM->TAG_FONT_FACE_URI
3073 HTML5::DOM->TAG_FOREIGNOBJECT
3074 HTML5::DOM->TAG_G
3075 HTML5::DOM->TAG_GLYPH
3076 HTML5::DOM->TAG_GLYPHREF
3077 HTML5::DOM->TAG_HKERN
3078 HTML5::DOM->TAG_LINE
3079 HTML5::DOM->TAG_LINEARGRADIENT
3080 HTML5::DOM->TAG_MARKER
3081 HTML5::DOM->TAG_MASK
3082 HTML5::DOM->TAG_METADATA
3083 HTML5::DOM->TAG_MISSING_GLYPH
3084 HTML5::DOM->TAG_MPATH
3085 HTML5::DOM->TAG_PATH
3086 HTML5::DOM->TAG_PATTERN
3087 HTML5::DOM->TAG_POLYGON
3088 HTML5::DOM->TAG_POLYLINE
3089 HTML5::DOM->TAG_RADIALGRADIENT
3090 HTML5::DOM->TAG_RECT
3091 HTML5::DOM->TAG_SET
3092 HTML5::DOM->TAG_STOP
3093 HTML5::DOM->TAG_SWITCH
3094 HTML5::DOM->TAG_SYMBOL
3095 HTML5::DOM->TAG_TEXT
3096 HTML5::DOM->TAG_TEXTPATH
3097 HTML5::DOM->TAG_TREF
3098 HTML5::DOM->TAG_TSPAN
3099 HTML5::DOM->TAG_USE
3100 HTML5::DOM->TAG_VIEW
3101 HTML5::DOM->TAG_VKERN
3102 HTML5::DOM->TAG_MATH
3103 HTML5::DOM->TAG_MACTION
3104 HTML5::DOM->TAG_MALIGNGROUP
3105 HTML5::DOM->TAG_MALIGNMARK
3106 HTML5::DOM->TAG_MENCLOSE
3107 HTML5::DOM->TAG_MERROR
3108 HTML5::DOM->TAG_MFENCED
3109 HTML5::DOM->TAG_MFRAC
3110 HTML5::DOM->TAG_MGLYPH
3111 HTML5::DOM->TAG_MI
3112 HTML5::DOM->TAG_MLABELEDTR
3113 HTML5::DOM->TAG_MLONGDIV
3114 HTML5::DOM->TAG_MMULTISCRIPTS
3115 HTML5::DOM->TAG_MN
3116 HTML5::DOM->TAG_MO
3117 HTML5::DOM->TAG_MOVER
3118 HTML5::DOM->TAG_MPADDED
3119 HTML5::DOM->TAG_MPHANTOM
3120 HTML5::DOM->TAG_MROOT
3121 HTML5::DOM->TAG_MROW
3122 HTML5::DOM->TAG_MS
3123 HTML5::DOM->TAG_MSCARRIES
3124 HTML5::DOM->TAG_MSCARRY
3125 HTML5::DOM->TAG_MSGROUP
3126 HTML5::DOM->TAG_MSLINE
3127 HTML5::DOM->TAG_MSPACE
3128 HTML5::DOM->TAG_MSQRT
3129 HTML5::DOM->TAG_MSROW
3130 HTML5::DOM->TAG_MSTACK
3131 HTML5::DOM->TAG_MSTYLE
3132 HTML5::DOM->TAG_MSUB
3133 HTML5::DOM->TAG_MSUP
3134 HTML5::DOM->TAG_MSUBSUP
3135 HTML5::DOM->TAG__END_OF_FILE
3136 HTML5::DOM->TAG_LAST_ENTRY
3137
3138=head1 ENCODINGS
3139
3140=head3 Supported encoding names
3141
3142 AUTO, NOT-DETERMINED, X-USER-DEFINED,
3143 BIG5, EUC-JP, EUC-KR, GB18030, GBK, IBM866, MACINTOSH, X-MAC-CYRILLIC, SHIFT_JIS,
3144 ISO-2022-JP, ISO-8859-10, ISO-8859-13, ISO-8859-14, ISO-8859-15, ISO-8859-16, ISO-8859-2,
3145 ISO-8859-3, ISO-8859-4, ISO-8859-5, ISO-8859-6, ISO-8859-7, ISO-8859-8, ISO-8859-8-I,
3146 WINDOWS-1250, WINDOWS-1251, WINDOWS-1252, WINDOWS-1253, WINDOWS-1254,
3147 WINDOWS-1255, WINDOWS-1256, WINDOWS-1257, WINDOWS-1258, WINDOWS-874,
3148 UTF-8, UTF-16BE, UTF-16LE, KOI8-R, KOI8-U
3149
3150=head3 Supported encoding id consts
3151
3152 HTML5::DOM::Encoding->DEFAULT
3153 HTML5::DOM::Encoding->AUTO
3154 HTML5::DOM::Encoding->NOT_DETERMINED
3155 HTML5::DOM::Encoding->UTF_8
3156 HTML5::DOM::Encoding->UTF_16LE
3157 HTML5::DOM::Encoding->UTF_16BE
3158 HTML5::DOM::Encoding->X_USER_DEFINED
3159 HTML5::DOM::Encoding->BIG5
3160 HTML5::DOM::Encoding->EUC_JP
3161 HTML5::DOM::Encoding->EUC_KR
3162 HTML5::DOM::Encoding->GB18030
3163 HTML5::DOM::Encoding->GBK
3164 HTML5::DOM::Encoding->IBM866
3165 HTML5::DOM::Encoding->ISO_2022_JP
3166 HTML5::DOM::Encoding->ISO_8859_10
3167 HTML5::DOM::Encoding->ISO_8859_13
3168 HTML5::DOM::Encoding->ISO_8859_14
3169 HTML5::DOM::Encoding->ISO_8859_15
3170 HTML5::DOM::Encoding->ISO_8859_16
3171 HTML5::DOM::Encoding->ISO_8859_2
3172 HTML5::DOM::Encoding->ISO_8859_3
3173 HTML5::DOM::Encoding->ISO_8859_4
3174 HTML5::DOM::Encoding->ISO_8859_5
3175 HTML5::DOM::Encoding->ISO_8859_6
3176 HTML5::DOM::Encoding->ISO_8859_7
3177 HTML5::DOM::Encoding->ISO_8859_8
3178 HTML5::DOM::Encoding->ISO_8859_8_I
3179 HTML5::DOM::Encoding->KOI8_R
3180 HTML5::DOM::Encoding->KOI8_U
3181 HTML5::DOM::Encoding->MACINTOSH
3182 HTML5::DOM::Encoding->SHIFT_JIS
3183 HTML5::DOM::Encoding->WINDOWS_1250
3184 HTML5::DOM::Encoding->WINDOWS_1251
3185 HTML5::DOM::Encoding->WINDOWS_1252
3186 HTML5::DOM::Encoding->WINDOWS_1253
3187 HTML5::DOM::Encoding->WINDOWS_1254
3188 HTML5::DOM::Encoding->WINDOWS_1255
3189 HTML5::DOM::Encoding->WINDOWS_1256
3190 HTML5::DOM::Encoding->WINDOWS_1257
3191 HTML5::DOM::Encoding->WINDOWS_1258
3192 HTML5::DOM::Encoding->WINDOWS_874
3193 HTML5::DOM::Encoding->X_MAC_CYRILLIC
3194 HTML5::DOM::Encoding->LAST_ENTRY
3195
3196=head1 PARSER OPTIONS
3197
3198Options for:
3199
3200=over
3201
3202=item *
3203
3204L<HTML5::DOM::new|/new>
3205
3206=item *
3207
3208L<HTML5::DOM::parse|/parse>
3209
3210=item *
3211
3212L<HTML5::DOM::parseChunkEnd|/parseChunkEnd>
3213
3214=item *
3215
3216L<HTML5::DOM::Tree::parseFragment|/parseFragment>
3217
3218=back
3219
3220=head4 threads
3221
3222Threads count, if < 2 - parsing in single mode without threads (default 0)
3223
3224This option affects only for L<HTML5::DOM::new|/new>.
3225
3226Originaly, L<MyHTML|https://github.com/lexborisov/myhtml/blob/master/LICENSE> can use mulithread parsing.
3227
3228But in real cases this mode slower than single mode (threads=0). Result speed very OS-specific and depends on input html.
3229
3230Not recommended use if don't known what you do. B<Single mode faster in 99.9% cases.>
3231
3232=head4 ignore_whitespace
3233
3234Ignore whitespace tokens (default 0)
3235
3236=head4 ignore_doctype
3237
3238Do not parse DOCTYPE (default 0)
3239
3240=head4 scripts
3241
3242If 1 - <noscript> contents parsed to single text node (default)
3243
3244If 0 - <noscript> contents parsed to child nodes
3245
3246=head4 encoding
3247
3248Encoding of input HTML, if C<auto> - library can tree to automaticaly determine encoding. (default "auto")
3249
3250Allowed both encoding name or id.
3251
3252=head4 default_encoding
3253
3254Default encoding, this affects only if C<encoding> set to C<auto> and encoding not determined. (default "UTF-8")
3255
3256Allowed both encoding name or id.
3257
3258See for available encodings: L</ENCODINGS>
3259
3260=head4 encoding_use_meta
3261
3262Allow use C<E<lt>metaE<gt>> tags to determine input HTML encoding. (default 1)
3263
3264See L<detectByPrescanStream|/detectByPrescanStream>.
3265
3266=head4 encoding_prescan_limit
3267
3268Limit string length to determine encoding by C<E<lt>metaE<gt>> tags. (default 1024, from spec)
3269
3270See L<detectByPrescanStream|/detectByPrescanStream>.
3271
3272=head4 encoding_use_bom
3273
3274Allow use detecding BOM to determine input HTML encoding. (default 1)
3275
3276See L<detectBomAndCut|/detectBomAndCut>.
3277
3278=head4 utf8
3279
3280Default: C<"auto">
3281
3282If 1, then all returned strings have utf8 flag (chars).
3283
3284If 0, then all returned strings haven't utf8 flag (bytes).
3285
3286If C<"auto">, then utf8 flag detected by input string. Automaticaly enables C<utf8=1> if input string have utf8 flag.
3287
3288C<"auto"> works only in L<parse|/parse>, L<parseChunk|/parseChunk>, L<parseAsync|/parseAsync> methods.
3289
3290
3291=head1 CSS PARSER OPTIONS
3292
3293Options for:
3294
3295=over
3296
3297=item *
3298
3299L<HTML5::DOM::CSS::new|/new>
3300
3301=item *
3302
3303L<HTML5::DOM::CSS::parseSelector|/parseSelector>
3304
3305=back
3306
3307=head4 utf8
3308
3309Default: C<"auto">
3310
3311If 1, then all returned strings have utf8 flag (chars).
3312
3313If 0, then all returned strings haven't utf8 flag (bytes).
3314
3315If C<"auto">, then utf8 flag detected by input string. Automaticaly enables C<utf8=1> if input string have utf8 flag.
3316
3317
3318=head1 HTML5 SUPPORT
3319
3320Tested with L<html5lib-tests|https://github.com/html5lib/html5lib-tests> (at 2021-06-26)
3321
3322 -------------------------------------------------------------
3323 test                        total    ok      fail    skip
3324 -------------------------------------------------------------
3325 foreign-fragment.dat        66       54      12      0
3326 tests26.dat                 19       16      3       0
3327 menuitem-element.dat        19       16      3       0
3328 tests11.dat                 12       11      1       0
3329 tests1.dat                  112      112     0       0
3330 tests4.dat                  6        6       0       0
3331 tests6.dat                  51       51      0       0
3332 ruby.dat                    20       20      0       0
3333 adoption01.dat              17       17      0       0
3334 tests14.dat                 6        6       0       0
3335 tests19.dat                 104      104     0       0
3336 tests7.dat                  30       30      0       0
3337 noscript01.dat              17       17      0       0
3338 tests17.dat                 12       12      0       0
3339 tests23.dat                 4        4       0       0
3340 pending-spec-changes.dat    2        2       0       0
3341 tables01.dat                16       16      0       0
3342 entities02.dat              25       25      0       0
3343 tests22.dat                 4        4       0       0
3344 tests10.dat                 53       53      0       0
3345 tests15.dat                 13       13      0       0
3346 inbody01.dat                3        3       0       0
3347 template.dat                107      107     0       0
3348 plain-text-unsafe.dat       32       32      0       0
3349 comments01.dat              15       15      0       0
3350 scriptdata01.dat            26       26      0       0
3351 svg.dat                     7        7       0       0
3352 tests25.dat                 25       25      0       0
3353 tests3.dat                  23       23      0       0
3354 tests20.dat                 43       43      0       0
3355 tests12.dat                 1        1       0       0
3356 tests21.dat                 24       24      0       0
3357 math.dat                    7        7       0       0
3358 webkit01.dat                49       49      0       0
3359 main-element.dat            2        2       0       0
3360 adoption02.dat              1        1       0       0
3361 domjs-unsafe.dat            48       48      0       0
3362 tests16.dat                 196      196     0       0
3363 blocks.dat                  47       47      0       0
3364 tests5.dat                  16       16      0       0
3365 tests8.dat                  9        9       0       0
3366 tricky01.dat                8        8       0       0
3367 tests18.dat                 35       35      0       0
3368 webkit02.dat                20       20      0       0
3369 tests24.dat                 7        7       0       0
3370 html5test-com.dat           23       23      0       0
3371 isindex.dat                 3        3       0       0
3372 doctype01.dat               36       36      0       0
3373 entities01.dat              74       74      0       0
3374 tests2.dat                  61       61      0       0
3375 tests9.dat                  26       26      0       0
3376 tests_innerHTML_1.dat       84       84      0       0
3377 summary                     1666     1647    19      0
3378
3379Tested with C<examples/html5lib_tests.pl>
3380
3381 perl examples/html5lib_tests.pl --dir=../html5lib-tests/tree-construction --colordiff
3382
3383Send patches to lexborisov's L<MyHTML|https://github.com/lexborisov/myhtml> if you want improve this result.
3384
3385=head1 WORK WITH UTF8
3386
3387In normal cases you must don't care about utf8. Everything works out of the box.
3388
3389By default utf8 mode enabled automaticaly if you specify string with utf8 flag.
3390
3391For example:
3392
3393Perfect work with C<use utf8>:
3394
3395 use warnings;
3396 use strict;
3397 use HTML5::DOM;
3398 use utf8;
3399
3400 my $parser = HTML5::DOM->new;
3401 my $str = HTML5::DOM->new->parse('<b>тест тест</b>')->at('b')->text;
3402 print "length=".length($str)." [$str]\n"; # length=9 [тест тест]
3403
3404Perfect work without C<use utf8>:
3405
3406 use warnings;
3407 use strict;
3408 use HTML5::DOM;
3409
3410 # Perfect work with default mode of perl strings (bytes)
3411 my $parser = HTML5::DOM->new;
3412 my $str = HTML5::DOM->new->parse('<b>тест тест</b>')->at('b')->text;
3413 print "length=".length($str)." [$str]\n"; # length=17 [тест тест]
3414
3415 # You can pass string with utf8 flag without "use utf8" and it perfect works
3416 use Encode;
3417 my $test = '<b>тест тест</b>';
3418 Encode::_utf8_on($test);
3419
3420 $str = HTML5::DOM->new->parse($test)->at('b')->text;
3421 print "length=".length($str)." [$str]\n"; # length=9 [тест тест]
3422
3423But you can override this behavior - see L<"PARSER OPTIONS"> for details.
3424
3425Force use bytes:
3426
3427 use warnings;
3428 use strict;
3429 use HTML5::DOM;
3430 use utf8;
3431
3432 my $parser = HTML5::DOM->new({ utf8 => 0 });
3433 my $str = $parser->parse('<b>тест тест</b>')->at('b')->text;
3434 print "length=".length($str)." [$str]\n"; # length=17 [тест тест]
3435
3436Force use utf8:
3437
3438 use warnings;
3439 use strict;
3440 use HTML5::DOM;
3441
3442 my $parser = HTML5::DOM->new({ utf8 => 1 });
3443 my $str = $parser->parse('<b>тест тест</b>')->at('b')->text;
3444 print "length=".length($str)." [$str]\n"; # length=13 [тест тест]
3445
3446=head1 BUGS
3447
3448L<https://github.com/Azq2/perl-html5-dom/issues>
3449
3450=head1 SEE ALSO
3451
3452=over
3453
3454=item *
3455
3456L<HTML::MyHTML|https://metacpan.org/pod/HTML::MyHTML> - more low-level myhtml bindings.
3457
3458=item *
3459
3460L<Mojo::DOM|https://metacpan.org/pod/Mojo::DOM> - pure perl HTML5 DOM library with CSS selectors.
3461
3462=back
3463
3464=head1 AUTHOR
3465
3466Kirill Zhumarin <kirill.zhumarin@gmail.com>
3467
3468=head1 LICENSE
3469
3470=over
3471
3472=item *
3473
3474HTML5::DOM - L<MIT|https://github.com/Azq2/perl-html5-dom/blob/master/LICENSE>
3475
3476=item *
3477
3478Modest - L<LGPL 2.1|https://github.com/lexborisov/Modest/blob/master/LICENSE>
3479
3480=item *
3481
3482MyHTML - L<LGPL 2.1|https://github.com/lexborisov/myhtml/blob/master/LICENSE>
3483
3484=item *
3485
3486MyCSS - L<LGPL 2.1|https://github.com/lexborisov/mycss/blob/master/LICENSE>
3487
3488=back
3489