1use strict; 2use warnings; 3package YAML::PP::Lexer; 4 5our $VERSION = '0.020'; # VERSION 6 7use constant TRACE => $ENV{YAML_PP_TRACE} ? 1 : 0; 8use constant DEBUG => ($ENV{YAML_PP_DEBUG} || $ENV{YAML_PP_TRACE}) ? 1 : 0; 9 10use YAML::PP::Grammar qw/ $GRAMMAR /; 11use Carp qw/ croak /; 12 13sub new { 14 my ($class, %args) = @_; 15 my $self = bless { 16 reader => $args{reader}, 17 }, $class; 18 $self->init; 19 return $self; 20} 21 22sub init { 23 my ($self) = @_; 24 $self->{next_tokens} = []; 25 $self->{next_line} = undef; 26 $self->{line} = 0; 27 $self->{offset} = 0; 28 $self->{flowcontext} = 0; 29} 30 31sub next_line { return $_[0]->{next_line} } 32sub set_next_line { $_[0]->{next_line} = $_[1] } 33sub reader { return $_[0]->{reader} } 34sub set_reader { $_[0]->{reader} = $_[1] } 35sub next_tokens { return $_[0]->{next_tokens} } 36sub line { return $_[0]->{line} } 37sub set_line { $_[0]->{line} = $_[1] } 38sub offset { return $_[0]->{offset} } 39sub set_offset { $_[0]->{offset} = $_[1] } 40sub inc_line { return $_[0]->{line}++ } 41sub context { return $_[0]->{context} } 42sub set_context { $_[0]->{context} = $_[1] } 43sub flowcontext { return $_[0]->{flowcontext} } 44sub set_flowcontext { $_[0]->{flowcontext} = $_[1] } 45 46my $RE_WS = '[\t ]'; 47my $RE_LB = '[\r\n]'; 48my $RE_DOC_END = qr/\A(\.\.\.)(?=$RE_WS|$)/m; 49my $RE_DOC_START = qr/\A(---)(?=$RE_WS|$)/m; 50my $RE_EOL = qr/\A($RE_WS+#.*|$RE_WS+)\z/; 51#my $RE_COMMENT_EOL = qr/\A(#.*)?(?:$RE_LB|\z)/; 52 53#ns-word-char ::= ns-dec-digit | ns-ascii-letter | “-” 54my $RE_NS_WORD_CHAR = '[0-9A-Za-z-]'; 55my $RE_URI_CHAR = '(?:' . '%[0-9a-fA-F]{2}' .'|'. q{[0-9A-Za-z#;/?:@&=+$,_.!*'\(\)\[\]-]} . ')'; 56my $RE_NS_TAG_CHAR = '(?:' . '%[0-9a-fA-F]{2}' .'|'. q{[0-9A-Za-z#;/?:@&=+$_.*'\(\)-]} . ')'; 57 58# [#x21-#x7E] /* 8 bit */ 59# | #x85 | [#xA0-#xD7FF] | [#xE000-#xFFFD] /* 16 bit */ 60# | [#x10000-#x10FFFF] /* 32 bit */ 61 62#nb-char ::= c-printable - b-char - c-byte-order-mark 63#my $RE_NB_CHAR = '[\x21-\x7E]'; 64my $RE_ANCHOR_CAR = '[\x21-\x2B\x2D-\x5A\x5C\x5E-\x7A\x7C\x7E\xA0-\xFF\x{100}-\x{10FFFF}]'; 65 66my $RE_PLAIN_START = '[\x21\x22\x24-\x39\x3B-\x7E\xA0-\xFF\x{100}-\x{10FFFF}]'; 67my $RE_PLAIN_END = '[\x21-\x39\x3B-\x7E\x85\xA0-\x{D7FF}\x{E000}-\x{FEFE}\x{FF00}-\x{FFFD}\x{10000}-\x{10FFFF}]'; 68my $RE_PLAIN_FIRST = '[\x24\x28-\x29\x2B\x2E-\x39\x3B-\x3D\x41-\x5A\x5C\x5E-\x5F\x61-\x7A\x7E\xA0-\xFF\x{100}-\x{10FFFF}]'; 69 70my $RE_PLAIN_START_FLOW = '[\x21\x22\x24-\x2B\x2D-\x39\x3B-\x5A\x5C\x5E-\x7A\x7C\x7E\xA0-\xFF\x{100}-\x{10FFFF}]'; 71my $RE_PLAIN_END_FLOW = '[\x21-\x2B\x2D-\x39\x3B-\x5A\x5C\x5E-\x7A\x7C\x7E\x85\xA0-\x{D7FF}\x{E000}-\x{FEFE}\x{FF00}-\x{FFFD}\x{10000}-\x{10FFFF}]'; 72my $RE_PLAIN_FIRST_FLOW = '[\x24\x28-\x29\x2B\x2E-\x39\x3B-\x3D\x41-\x5A\x5C\x5E-\x5F\x61-\x7A\x7C\x7E\xA0-\xFF\x{100}-\x{10FFFF}]'; 73# c-indicators 74#! 21 75#" 22 76## 23 77#% 25 78#& 26 79#' 27 80#* 2A 81#, 2C FLOW 82#- 2D XX 83#: 3A XX 84#> 3E 85#? 3F XX 86#@ 40 87#[ 5B FLOW 88#] 5D FLOW 89#` 60 90#{ 7B FLOW 91#| 7C 92#} 7D FLOW 93 94 95my $RE_PLAIN_WORD = "(?::+$RE_PLAIN_END|$RE_PLAIN_START)(?::+$RE_PLAIN_END|$RE_PLAIN_END)*"; 96my $RE_PLAIN_FIRST_WORD = "(?:[:?-]+$RE_PLAIN_END|$RE_PLAIN_FIRST)(?::+$RE_PLAIN_END|$RE_PLAIN_END)*"; 97my $RE_PLAIN_WORDS = "(?:$RE_PLAIN_FIRST_WORD(?:$RE_WS+$RE_PLAIN_WORD)*)"; 98my $RE_PLAIN_WORDS2 = "(?:$RE_PLAIN_WORD(?:$RE_WS+$RE_PLAIN_WORD)*)"; 99 100my $RE_PLAIN_WORD_FLOW = "(?::+$RE_PLAIN_END_FLOW|$RE_PLAIN_START_FLOW)(?::+$RE_PLAIN_END_FLOW|$RE_PLAIN_END_FLOW)*"; 101my $RE_PLAIN_FIRST_WORD_FLOW = "(?:[:?-]+$RE_PLAIN_END_FLOW|$RE_PLAIN_FIRST_FLOW)(?::+$RE_PLAIN_END_FLOW|$RE_PLAIN_END_FLOW)*"; 102my $RE_PLAIN_WORDS_FLOW = "(?:$RE_PLAIN_FIRST_WORD_FLOW(?:$RE_WS+$RE_PLAIN_WORD_FLOW)*)"; 103my $RE_PLAIN_WORDS_FLOW2 = "(?:$RE_PLAIN_WORD_FLOW(?:$RE_WS+$RE_PLAIN_WORD_FLOW)*)"; 104 105 106#c-secondary-tag-handle ::= “!” “!” 107#c-named-tag-handle ::= “!” ns-word-char+ “!” 108#ns-tag-char ::= ns-uri-char - “!” - c-flow-indicator 109#ns-global-tag-prefix ::= ns-tag-char ns-uri-char* 110#c-ns-local-tag-prefix ::= “!” ns-uri-char* 111my $RE_TAG = "!(?:$RE_NS_WORD_CHAR*!$RE_NS_TAG_CHAR+|$RE_NS_TAG_CHAR+|<$RE_URI_CHAR+>|)"; 112 113#c-ns-anchor-property ::= “&” ns-anchor-name 114#ns-char ::= nb-char - s-white 115#ns-anchor-char ::= ns-char - c-flow-indicator 116#ns-anchor-name ::= ns-anchor-char+ 117 118my $RE_SEQSTART = qr/\A(-)(?=$RE_WS|$)/m; 119my $RE_COMPLEX = qr/(\?)(?=$RE_WS|$)/m; 120my $RE_COMPLEXCOLON = qr/\A(:)(?=$RE_WS|$)/m; 121my $RE_ANCHOR = "&$RE_ANCHOR_CAR+"; 122my $RE_ALIAS = "\\*$RE_ANCHOR_CAR+"; 123 124 125my %REGEXES = ( 126 ANCHOR => qr{($RE_ANCHOR)}, 127 TAG => qr{($RE_TAG)}, 128 ALIAS => qr{($RE_ALIAS)}, 129 SINGLEQUOTED => qr{(?:''|[^'\r\n]+)*}, 130); 131 132sub fetch_next_line { 133 my ($self) = @_; 134 my $next_line = $self->next_line; 135 if (defined $next_line ) { 136 return $next_line; 137 } 138 139 my $line = $self->reader->readline; 140 unless (defined $line) { 141 $self->set_next_line(undef); 142 return; 143 } 144 $self->inc_line; 145 $line =~ m/\A( *)([^\r\n]*)([\r\n]|\z)/ or die "Unexpected"; 146 $next_line = [ $1, $2, $3 ]; 147 $self->set_next_line($next_line); 148 # $ESCAPE_CHAR from YAML.pm 149 if ($line =~ tr/\x00-\x08\x0b-\x0c\x0e-\x1f//) { 150 $self->exception("Control characters are not allowed"); 151 } 152 153 return $next_line; 154} 155 156my %TOKEN_NAMES = ( 157 '"' => 'DOUBLEQUOTE', 158 "'" => 'SINGLEQUOTE', 159 '|' => 'LITERAL', 160 '>' => 'FOLDED', 161 '!' => 'TAG', 162 '*' => 'ALIAS', 163 '&' => 'ANCHOR', 164 ':' => 'COLON', 165 '-' => 'DASH', 166 '?' => 'QUESTION', 167 '[' => 'FLOWSEQ_START', 168 ']' => 'FLOWSEQ_END', 169 '{' => 'FLOWMAP_START', 170 '}' => 'FLOWMAP_END', 171 ',' => 'FLOW_COMMA', 172 '---' => 'DOC_START', 173 '...' => 'DOC_END', 174); 175 176 177sub fetch_next_tokens { 178 my ($self) = @_; 179 my $next = $self->next_tokens; 180 return $next if @$next; 181 182 my $next_line = $self->fetch_next_line; 183 if (not $next_line) { 184 return []; 185 } 186 187 my $spaces = $next_line->[0]; 188 my $yaml = \$next_line->[1]; 189 if (not length $$yaml) { 190 $self->push_tokens([ EOL => join('', @$next_line), $self->line ]); 191 $self->set_next_line(undef); 192 return $next; 193 } 194 if (substr($$yaml, 0, 1) eq '#') { 195 $self->push_tokens([ EOL => join('', @$next_line), $self->line ]); 196 $self->set_next_line(undef); 197 return $next; 198 } 199 if (not $spaces and substr($$yaml, 0, 1) eq "%") { 200 $self->_fetch_next_tokens_directive($yaml, $next_line->[2]); 201 $self->set_context(0); 202 $self->set_next_line(undef); 203 return $next; 204 } 205 if (not $spaces and $$yaml =~ s/\A(---|\.\.\.)(?=$RE_WS|\z)//) { 206 $self->push_tokens([ $TOKEN_NAMES{ $1 } => $1, $self->line ]); 207 } 208 else { 209 $self->push_tokens([ SPACE => $spaces, $self->line ]); 210 } 211 212 my $partial = $self->_fetch_next_tokens($next_line); 213 unless ($partial) { 214 $self->set_next_line(undef); 215 } 216 return $next; 217} 218 219my %ANCHOR_ALIAS_TAG = ( '&' => 1, '*' => 1, '!' => 1 ); 220my %BLOCK_SCALAR = ( '|' => 1, '>' => 1 ); 221my %COLON_DASH_QUESTION = ( ':' => 1, '-' => 1, '?' => 1 ); 222my %QUOTED = ( '"' => 1, "'" => 1 ); 223my %FLOW = ( '{' => 1, '[' => 1, '}' => 1, ']' => 1, ',' => 1 ); 224my %CONTEXT = ( '"' => 1, "'" => 1, '>' => 1, '|' => 1 ); 225 226my $RE_ESCAPES = qr{(?: 227 \\([ \\\/_0abefnrtvLNP"]) | \\x([0-9a-fA-F]{2}) 228 | \\u([A-Fa-f0-9]{4}) | \\U([A-Fa-f0-9]{4,8}) 229)}x; 230my %CONTROL = ( 231 '\\' => '\\', '/' => '/', n => "\n", t => "\t", r => "\r", b => "\b", 232 'a' => "\a", 'b' => "\b", 'e' => "\e", 'f' => "\f", 'v' => "\x0b", 233 'P' => "\x{2029}", L => "\x{2028}", 'N' => "\x85", 234 '0' => "\0", '_' => "\xa0", ' ' => ' ', q/"/ => q/"/, 235); 236 237sub _fetch_next_tokens { 238 TRACE and warn __PACKAGE__.':'.__LINE__.": _fetch_next_tokens\n"; 239 my ($self, $next_line) = @_; 240 241 my $yaml = \$next_line->[1]; 242 my $eol = $next_line->[2]; 243 244 my @tokens; 245 246 while (1) { 247 unless (length $$yaml) { 248 push @tokens, ( EOL => $eol, $self->line ); 249 $self->push_tokens(\@tokens); 250 return; 251 } 252 my $first = substr($$yaml, 0, 1); 253 my $plain = 0; 254 255 if ($self->context) { 256 if ($$yaml =~ s/\A($RE_WS*)://) { 257 push @tokens, ( WS => $1, $self->line ) if $1; 258 push @tokens, ( COLON => ':', $self->line ); 259 $self->set_context(0); 260 next; 261 } 262 if ($$yaml =~ s/\A($RE_WS*(?: #.*))\z//) { 263 push @tokens, ( EOL => $1 . $eol, $self->line ); 264 $self->push_tokens(\@tokens); 265 return; 266 } 267 $self->set_context(0); 268 } 269 if ($CONTEXT{ $first }) { 270 push @tokens, ( CONTEXT => $first, $self->line ); 271 $self->push_tokens(\@tokens); 272 return 1; 273 } 274 elsif ($COLON_DASH_QUESTION{ $first }) { 275 my $token_name = $TOKEN_NAMES{ $first }; 276 if ($$yaml =~ s/\A\Q$first\E(?:($RE_WS+)|\z)//) { 277 my $token_name = $TOKEN_NAMES{ $first }; 278 push @tokens, ( $token_name => $first, $self->line ); 279 if (not defined $1) { 280 push @tokens, ( EOL => $eol, $self->line ); 281 $self->push_tokens(\@tokens); 282 return; 283 } 284 my $ws = $1; 285 if ($$yaml =~ s/\A(#.*|)\z//) { 286 push @tokens, ( EOL => $ws . $1 . $eol, $self->line ); 287 $self->push_tokens(\@tokens); 288 return; 289 } 290 push @tokens, ( WS => $ws, $self->line ); 291 next; 292 } 293 elsif ($self->flowcontext and $$yaml =~ s/\A:(?=[,\{\}\[\]])//) { 294 push @tokens, ( $token_name => $first, $self->line ); 295 next; 296 } 297 $plain = 1; 298 } 299 elsif ($ANCHOR_ALIAS_TAG{ $first }) { 300 my $token_name = $TOKEN_NAMES{ $first }; 301 my $REGEX = $REGEXES{ $token_name }; 302 if ($$yaml =~ s/\A$REGEX//) { 303 push @tokens, ( $token_name => $1, $self->line ); 304 } 305 else { 306 push @tokens, ( "Invalid $token_name" => $$yaml, $self->line ); 307 $self->push_tokens(\@tokens); 308 return; 309 } 310 } 311 elsif ($first eq ' ' or $first eq "\t") { 312 if ($$yaml =~ s/\A($RE_WS+)//) { 313 my $ws = $1; 314 if ($$yaml =~ s/\A((?:#.*)?\z)//) { 315 push @tokens, ( EOL => $ws . $1 . $eol, $self->line ); 316 $self->push_tokens(\@tokens); 317 return; 318 } 319 push @tokens, ( WS => $ws, $self->line ); 320 } 321 } 322 elsif ($FLOW{ $first }) { 323 push @tokens, ( $TOKEN_NAMES{ $first } => $first, $self->line ); 324 substr($$yaml, 0, 1, ''); 325 my $flowcontext = $self->flowcontext; 326 if ($first eq '{' or $first eq '[') { 327 $self->set_flowcontext(++$flowcontext); 328 } 329 elsif ($first eq '}' or $first eq ']') { 330 $self->set_flowcontext(--$flowcontext); 331 } 332 } 333 else { 334 $plain = 1; 335 } 336 337 if ($plain) { 338 push @tokens, ( CONTEXT => '', $self->line ); 339 $self->push_tokens(\@tokens); 340 return 1; 341 } 342 343 } 344 345 return; 346} 347 348sub fetch_plain { 349 my ($self, $indent, $context) = @_; 350 my $next_line = $self->next_line; 351 my $yaml = \$next_line->[1]; 352 my $eol = $next_line->[2]; 353 my $REGEX = $RE_PLAIN_WORDS; 354 if ($self->flowcontext) { 355 $REGEX = $RE_PLAIN_WORDS_FLOW; 356 } 357 358 my @tokens; 359 unless ($$yaml =~ s/\A($REGEX)//) { 360 $self->push_tokens(\@tokens); 361 $self->exception("Invalid plain scalar"); 362 } 363 my $plain = $1; 364 push @tokens, ( PLAIN => $plain, $self->line ); 365 366 if ($$yaml =~ s/\A(?:($RE_WS+#.*)|($RE_WS*))\z//) { 367 if (defined $1) { 368 push @tokens, ( EOL => $1 . $eol, $self->line ); 369 $self->push_tokens(\@tokens); 370 $self->set_next_line(undef); 371 return; 372 } 373 else { 374 push @tokens, ( EOL => $2. $eol, $self->line ); 375 $self->set_next_line(undef); 376 } 377 } 378 else { 379 $self->push_tokens(\@tokens); 380 my $partial = $self->_fetch_next_tokens($next_line); 381 if (not $partial) { 382 $self->set_next_line(undef); 383 } 384 return; 385 } 386 387 my $RE2 = $RE_PLAIN_WORDS2; 388 if ($self->flowcontext) { 389 $RE2 = $RE_PLAIN_WORDS_FLOW2; 390 } 391 my $fetch_next = 0; 392 my @lines = ($plain); 393 my @next; 394 LOOP: while (1) { 395 $next_line = $self->fetch_next_line; 396 if (not $next_line) { 397 last LOOP; 398 } 399 my $spaces = $next_line->[0]; 400 my $yaml = \$next_line->[1]; 401 my $eol = $next_line->[2]; 402 403 if (not length $$yaml) { 404 push @tokens, ( EOL => $spaces . $eol, $self->line ); 405 $self->set_next_line(undef); 406 push @lines, ''; 407 next LOOP; 408 } 409 410 if (not $spaces and $$yaml =~ s/\A(---|\.\.\.)(?=$RE_WS|\z)//) { 411 push @next, $TOKEN_NAMES{ $1 } => $1, $self->line; 412 $fetch_next = 1; 413 last LOOP; 414 } 415 if ((length $spaces) < $indent) { 416 last LOOP; 417 } 418 419 my $ws = ''; 420 if ($$yaml =~ s/\A($RE_WS+)//) { 421 $ws = $1; 422 } 423 if (not length $$yaml) { 424 push @tokens, ( EOL => $spaces . $ws . $eol, $self->line ); 425 $self->set_next_line(undef); 426 push @lines, ''; 427 next LOOP; 428 } 429 if ($$yaml =~ s/\A(#.*)\z//) { 430 push @tokens, ( EOL => $spaces . $ws . $1 . $eol, $self->line ); 431 $self->set_next_line(undef); 432 last LOOP; 433 } 434 435 if ($$yaml =~ s/\A($RE2)//) { 436 push @tokens, INDENT => $spaces, $self->line; 437 push @tokens, WS => $ws, $self->line; 438 push @tokens, PLAIN => $1, $self->line; 439 push @lines, $1; 440 my $ws = ''; 441 if ($$yaml =~ s/\A($RE_WS+)//) { 442 $ws = $1; 443 } 444 if (not length $$yaml) { 445 push @tokens, EOL => $ws . $eol, $self->line; 446 $self->set_next_line(undef); 447 next LOOP; 448 } 449 450 if ($$yaml =~ s/\A(#.*)\z//) { 451 push @tokens, EOL => $ws . $1 . $eol, $self->line; 452 $self->set_next_line(undef); 453 last LOOP; 454 } 455 else { 456 push @tokens, WS => $ws, $self->line if $ws; 457 $fetch_next = 1; 458 } 459 } 460 else { 461 push @tokens, SPACE => $spaces, $self->line; 462 push @tokens, WS => $ws, $self->line; 463 if ($self->flowcontext) { 464 $fetch_next = 1; 465 } 466 else { 467 push @tokens, ERROR => $$yaml, $self->line; 468 } 469 } 470 471 last LOOP; 472 473 } 474 # remove empty lines at the end 475 while (@lines > 1 and $lines[-1] eq '') { 476 pop @lines; 477 } 478 if (@lines > 1) { 479 my $value = YAML::PP::Render->render_multi_val(\@lines); 480 my @eol; 481 if ($tokens[-3] eq 'EOL') { 482 @eol = splice @tokens, -3; 483 } 484 $self->push_subtokens( { name => 'PLAIN_MULTI', value => $value }, \@tokens); 485 $self->push_tokens([ @eol, @next ]); 486 } 487 else { 488 $self->push_tokens([ @tokens, @next ]); 489 } 490 @tokens = (); 491 if ($fetch_next) { 492 my $partial = $self->_fetch_next_tokens($next_line); 493 if (not $partial) { 494 $self->set_next_line(undef); 495 } 496 } 497 return; 498} 499 500sub fetch_block { 501 my ($self, $indent, $context) = @_; 502 my $next_line = $self->next_line; 503 my $yaml = \$next_line->[1]; 504 my $eol = $next_line->[2]; 505 506 my @tokens; 507 my $token_name = $TOKEN_NAMES{ $context }; 508 $$yaml =~ s/\A\Q$context\E// or die "Unexpected"; 509 push @tokens, ( $token_name => $context, $self->line ); 510 my $current_indent = $indent; 511 my $started = 0; 512 my $set_indent = 0; 513 my $chomp = ''; 514 if ($$yaml =~ s/\A([1-9]\d*)([+-]?)//) { 515 push @tokens, ( BLOCK_SCALAR_INDENT => $1, $self->line ); 516 $set_indent = $1; 517 $chomp = $2 if $2; 518 push @tokens, ( BLOCK_SCALAR_CHOMP => $2, $self->line ) if $2; 519 } 520 elsif ($$yaml =~ s/\A([+-])([1-9]\d*)?//) { 521 push @tokens, ( BLOCK_SCALAR_CHOMP => $1, $self->line ); 522 $chomp = $1; 523 push @tokens, ( BLOCK_SCALAR_INDENT => $2, $self->line ) if $2; 524 $set_indent = $2 if $2; 525 } 526 if ($set_indent) { 527 $started = 1; 528 $current_indent = $set_indent; 529 } 530 if (not length $$yaml) { 531 push @tokens, ( EOL => $eol, $self->line ); 532 } 533 elsif ($$yaml =~ s/\A($RE_WS*(?:$RE_WS#.*|))\z//) { 534 push @tokens, ( EOL => $1 . $eol, $self->line ); 535 } 536 else { 537 $self->push_tokens(\@tokens); 538 $self->exception("Invalid block scalar"); 539 } 540 541 my @lines; 542 while (1) { 543 $self->set_next_line(undef); 544 $next_line = $self->fetch_next_line; 545 if (not $next_line) { 546 last; 547 } 548 my $spaces = $next_line->[0]; 549 my $content = $next_line->[1]; 550 my $eol = $next_line->[2]; 551 if (not $spaces and $content =~ m/\A(---|\.\.\.)(?=$RE_WS|\z)/) { 552 last; 553 } 554 if ((length $spaces) < $current_indent) { 555 if (length $content) { 556 last; 557 } 558 else { 559 push @lines, ''; 560 push @tokens, ( EOL => $spaces . $eol, $self->line ); 561 next; 562 } 563 } 564 if ((length $spaces) > $current_indent) { 565 if ($started) { 566 ($spaces, my $more_spaces) = unpack "a${current_indent}a*", $spaces; 567 $content = $more_spaces . $content; 568 } 569 } 570 unless (length $content) { 571 push @lines, ''; 572 push @tokens, ( INDENT => $spaces, $self->line, EOL => $eol, $self->line ); 573 unless ($started) { 574 $current_indent = length $spaces; 575 } 576 next; 577 } 578 unless ($started) { 579 $started = 1; 580 $current_indent = length $spaces; 581 } 582 push @lines, $content; 583 push @tokens, ( 584 INDENT => $spaces, $self->line, 585 BLOCK_SCALAR_CONTENT => $content, $self->line, 586 EOL => $eol, $self->line, 587 ); 588 } 589 my $value = YAML::PP::Render->render_block_scalar($context, $chomp, \@lines); 590 my @eol = splice @tokens, -3; 591 $self->push_subtokens( { name => 'BLOCK_SCALAR', value => $value }, \@tokens ); 592 $self->push_tokens([ @eol ]); 593 return 0; 594} 595 596sub fetch_quoted { 597 my ($self, $indent, $context) = @_; 598 my $next_line = $self->next_line; 599 my $yaml = \$next_line->[1]; 600 my $spaces = $next_line->[0]; 601 602 my $token_name = $TOKEN_NAMES{ $context }; 603 $$yaml =~ s/\A\Q$context// or die "Unexpected";; 604 my @tokens = ( $token_name => $context, $self->line ); 605 606 my $start = 1; 607 my @values; 608 while (1) { 609 610 unless ($start) { 611 $next_line = $self->fetch_next_line or do { 612 for (my $i = 0; $i < @tokens; $i+= 3) { 613 my $token = $tokens[ $i + 1 ]; 614 if (ref $token) { 615 $tokens[ $i + 1 ] = $token->{orig}; 616 } 617 } 618 $self->push_tokens(\@tokens); 619 $self->exception("Missing closing quote <$context> at EOF"); 620 }; 621 $start = 0; 622 $spaces = $next_line->[0]; 623 $yaml = \$next_line->[1]; 624 625 if (not length $$yaml) { 626 push @tokens, ( EOL => $spaces . $next_line->[2], $self->line ); 627 $self->set_next_line(undef); 628 push @values, { value => '', orig => '' }; 629 next; 630 } 631 elsif (not $spaces and $$yaml =~ m/\A(---|\.\.\.)(?=$RE_WS|\z)/) { 632 for (my $i = 0; $i < @tokens; $i+= 3) { 633 my $token = $tokens[ $i + 1 ]; 634 if (ref $token) { 635 $tokens[ $i + 1 ] = $token->{orig}; 636 } 637 } 638 $self->push_tokens(\@tokens); 639 $self->exception("Missing closing quote <$context> or invalid document marker"); 640 } 641 elsif ((length $spaces) < $indent) { 642 for (my $i = 0; $i < @tokens; $i+= 3) { 643 my $token = $tokens[ $i + 1 ]; 644 if (ref $token) { 645 $tokens[ $i + 1 ] = $token->{orig}; 646 } 647 } 648 $self->push_tokens(\@tokens); 649 $self->exception("Wrong indendation or missing closing quote <$context>"); 650 } 651 652 if ($$yaml =~ s/\A($RE_WS+)//) { 653 $spaces .= $1; 654 } 655 push @tokens, ( WS => $spaces, $self->line ); 656 } 657 658 my $v = $self->_read_quoted_tokens($start, $context, $yaml, \@tokens); 659 push @values, $v; 660 if ($tokens[-3] eq $token_name) { 661 if ($start) { 662 $self->push_subtokens( 663 { name => 'QUOTED', value => $v->{value} }, \@tokens 664 ); 665 } 666 else { 667 my $value = YAML::PP::Render->render_quoted($context, \@values); 668 $self->push_subtokens( 669 { name => 'QUOTED_MULTILINE', value => $value }, \@tokens 670 ); 671 } 672 $self->set_context(1) if $self->flowcontext; 673 if (length $$yaml) { 674 my $partial = $self->_fetch_next_tokens($next_line); 675 if (not $partial) { 676 $self->set_next_line(undef); 677 } 678 return 0; 679 } 680 else { 681 @tokens = (); 682 push @tokens, ( EOL => $next_line->[2], $self->line ); 683 $self->push_tokens(\@tokens); 684 $self->set_next_line(undef); 685 return; 686 } 687 } 688 $tokens[-2] .= $next_line->[2]; 689 $self->set_next_line(undef); 690 $start = 0; 691 } 692} 693 694sub _read_quoted_tokens { 695 my ($self, $start, $first, $yaml, $tokens) = @_; 696 my $quoted = ''; 697 my $decoded = ''; 698 my $token_name = $TOKEN_NAMES{ $first }; 699 if ($first eq "'") { 700 my $regex = $REGEXES{SINGLEQUOTED}; 701 if ($$yaml =~ s/\A($regex)//) { 702 $quoted .= $1; 703 $decoded .= $1; 704 $decoded =~ s/''/'/g; 705 } 706 } 707 else { 708 ($quoted, $decoded) = $self->_read_doublequoted($yaml); 709 } 710 my $eol = ''; 711 unless (length $$yaml) { 712 if ($quoted =~ s/($RE_WS+)\z//) { 713 $eol = $1; 714 $decoded =~ s/($eol)\z//; 715 } 716 } 717 my $value = { value => $decoded, orig => $quoted }; 718 719 if ($$yaml =~ s/\A$first//) { 720 if ($start) { 721 push @$tokens, ( $token_name . 'D' => $value, $self->line ); 722 } 723 else { 724 push @$tokens, ( $token_name . 'D_LINE' => $value, $self->line ); 725 } 726 push @$tokens, ( $token_name => $first, $self->line ); 727 return $value; 728 } 729 if (length $$yaml) { 730 push @$tokens, ( $token_name . 'D' => $value->{orig}, $self->line ); 731 $self->push_tokens($tokens); 732 $self->exception("Invalid quoted <$first> string"); 733 } 734 735 push @$tokens, ( $token_name . 'D_LINE' => $value, $self->line ); 736 push @$tokens, ( EOL => $eol, $self->line ); 737 738 return $value; 739} 740 741sub _read_doublequoted { 742 my ($self, $yaml) = @_; 743 my $quoted = ''; 744 my $decoded = ''; 745 while (1) { 746 my $last = 1; 747 if ($$yaml =~ s/\A([^"\\]+)//) { 748 $quoted .= $1; 749 $decoded .= $1; 750 $last = 0; 751 } 752 if ($$yaml =~ s/\A($RE_ESCAPES)//) { 753 $quoted .= $1; 754 my $dec = defined $2 ? $CONTROL{ $2 } 755 : defined $3 ? chr hex $3 756 : defined $4 ? chr hex $4 757 : chr hex $5; 758 $decoded .= $dec; 759 $last = 0; 760 } 761 if ($$yaml =~ s/\A(\\)\z//) { 762 $quoted .= $1; 763 $decoded .= $1; 764 last; 765 } 766 last if $last; 767 } 768 return ($quoted, $decoded); 769} 770 771sub _fetch_next_tokens_directive { 772 my ($self, $yaml, $eol) = @_; 773 my @tokens; 774 775 if ($$yaml =~ s/\A(\s*%YAML)//) { 776 my $dir = $1; 777 if ($$yaml =~ s/\A( )//) { 778 $dir .= $1; 779 if ($$yaml =~ s/\A(1\.[12]$RE_WS*)//) { 780 $dir .= $1; 781 push @tokens, ( YAML_DIRECTIVE => $dir, $self->line ); 782 } 783 else { 784 $$yaml =~ s/\A(.*)//; 785 $dir .= $1; 786 my $warn = $ENV{YAML_PP_RESERVED_DIRECTIVE} || 'warn'; 787 if ($warn eq 'warn') { 788 warn "Found reserved directive '$dir'"; 789 } 790 elsif ($warn eq 'fatal') { 791 die "Found reserved directive '$dir'"; 792 } 793 push @tokens, ( RESERVED_DIRECTIVE => "$dir", $self->line ); 794 } 795 } 796 else { 797 $$yaml =~ s/\A(.*)//; 798 $dir .= $1; 799 push @tokens, ( 'Invalid directive' => $dir, $self->line ); 800 push @tokens, ( EOL => $eol, $self->line ); 801 $self->push_tokens(\@tokens); 802 return; 803 } 804 } 805 elsif ($$yaml =~ s/\A(\s*%TAG +(!$RE_NS_WORD_CHAR*!|!) +(tag:\S+|!$RE_URI_CHAR+)$RE_WS*)//) { 806 push @tokens, ( TAG_DIRECTIVE => $1, $self->line ); 807 # TODO 808 my $tag_alias = $2; 809 my $tag_url = $3; 810 } 811 elsif ($$yaml =~ s/\A(\s*\A%(?:\w+).*)//) { 812 push @tokens, ( RESERVED_DIRECTIVE => $1, $self->line ); 813 my $warn = $ENV{YAML_PP_RESERVED_DIRECTIVE} || 'warn'; 814 if ($warn eq 'warn') { 815 warn "Found reserved directive '$1'"; 816 } 817 elsif ($warn eq 'fatal') { 818 die "Found reserved directive '$1'"; 819 } 820 } 821 else { 822 push @tokens, ( 'Invalid directive' => $$yaml, $self->line ); 823 push @tokens, ( EOL => $eol, $self->line ); 824 $self->push_tokens(\@tokens); 825 return; 826 } 827 if (not length $$yaml) { 828 push @tokens, ( EOL => $eol, $self->line ); 829 } 830 else { 831 push @tokens, ( 'Invalid directive' => $$yaml, $self->line ); 832 push @tokens, ( EOL => $eol, $self->line ); 833 } 834 $self->push_tokens(\@tokens); 835 return; 836} 837 838sub push_tokens { 839 my ($self, $new_tokens) = @_; 840 my $next = $self->next_tokens; 841 my $line = $self->line; 842 my $column = $self->offset; 843 844 for (my $i = 0; $i < @$new_tokens; $i += 3) { 845 my $value = $new_tokens->[ $i + 1 ]; 846 my $name = $new_tokens->[ $i ]; 847 my $line = $new_tokens->[ $i + 2 ]; 848 my $push = { 849 name => $name, 850 line => $line, 851 column => $column, 852 value => $value, 853 }; 854 $column += length $value unless $name eq 'CONTEXT'; 855 push @$next, $push; 856 if ($name eq 'EOL') { 857 $column = 0; 858 } 859 } 860 $self->set_offset($column); 861 return $next; 862} 863 864sub push_subtokens { 865 my ($self, $token, $subtokens) = @_; 866 my $next = $self->next_tokens; 867 my $line = $self->line; 868 my $column = $self->offset; 869 $token->{column} = $column; 870 $token->{subtokens} = \my @sub; 871 872 for (my $i = 0; $i < @$subtokens; $i+=3) { 873 my $name = $subtokens->[ $i ]; 874 my $value = $subtokens->[ $i + 1 ]; 875 my $line = $subtokens->[ $i + 2 ]; 876 my $push = { 877 name => $subtokens->[ $i ], 878 line => $line, 879 column => $column, 880 }; 881 if (ref $value eq 'HASH') { 882 %$push = ( %$push, %$value ); 883 $column += length $value->{orig}; 884 } 885 else { 886 $push->{value} = $value; 887 $column += length $value; 888 } 889 if ($push->{name} eq 'EOL') { 890 $column = 0; 891 } 892 push @sub, $push; 893 } 894 $token->{line} = $sub[0]->{line}; 895 push @$next, $token; 896 $self->set_offset($column); 897 return $next; 898} 899 900sub exception { 901 my ($self, $msg) = @_; 902 my $next = $self->next_tokens; 903 $next = []; 904 my $line = @$next ? $next->[0]->{line} : $self->line; 905 my @caller = caller(0); 906 my $yaml = ''; 907 if (my $nl = $self->next_line) { 908 $yaml = join '', @$nl; 909 $yaml = $nl->[1]; 910 } 911 my $e = YAML::PP::Exception->new( 912 line => $line, 913 column => $self->offset + 1, 914 msg => $msg, 915 next => $next, 916 where => $caller[1] . ' line ' . $caller[2], 917 yaml => $yaml, 918 ); 919 croak $e; 920} 921 9221; 923