1use strict;
2use warnings;
3package YAML::PP::Lexer;
4
5our $VERSION = '0.020'; # VERSION
6
7use constant TRACE => $ENV{YAML_PP_TRACE} ? 1 : 0;
8use constant DEBUG => ($ENV{YAML_PP_DEBUG} || $ENV{YAML_PP_TRACE}) ? 1 : 0;
9
10use YAML::PP::Grammar qw/ $GRAMMAR /;
11use Carp qw/ croak /;
12
13sub new {
14    my ($class, %args) = @_;
15    my $self = bless {
16        reader => $args{reader},
17    }, $class;
18    $self->init;
19    return $self;
20}
21
22sub init {
23    my ($self) = @_;
24    $self->{next_tokens} = [];
25    $self->{next_line} = undef;
26    $self->{line} = 0;
27    $self->{offset} = 0;
28    $self->{flowcontext} = 0;
29}
30
31sub next_line { return $_[0]->{next_line} }
32sub set_next_line { $_[0]->{next_line} = $_[1] }
33sub reader { return $_[0]->{reader} }
34sub set_reader { $_[0]->{reader} = $_[1] }
35sub next_tokens { return $_[0]->{next_tokens} }
36sub line { return $_[0]->{line} }
37sub set_line { $_[0]->{line} = $_[1] }
38sub offset { return $_[0]->{offset} }
39sub set_offset { $_[0]->{offset} = $_[1] }
40sub inc_line { return $_[0]->{line}++ }
41sub context { return $_[0]->{context} }
42sub set_context { $_[0]->{context} = $_[1] }
43sub flowcontext { return $_[0]->{flowcontext} }
44sub set_flowcontext { $_[0]->{flowcontext} = $_[1] }
45
46my $RE_WS = '[\t ]';
47my $RE_LB = '[\r\n]';
48my $RE_DOC_END = qr/\A(\.\.\.)(?=$RE_WS|$)/m;
49my $RE_DOC_START = qr/\A(---)(?=$RE_WS|$)/m;
50my $RE_EOL = qr/\A($RE_WS+#.*|$RE_WS+)\z/;
51#my $RE_COMMENT_EOL = qr/\A(#.*)?(?:$RE_LB|\z)/;
52
53#ns-word-char    ::= ns-dec-digit | ns-ascii-letter | “-”
54my $RE_NS_WORD_CHAR = '[0-9A-Za-z-]';
55my $RE_URI_CHAR = '(?:' . '%[0-9a-fA-F]{2}' .'|'.  q{[0-9A-Za-z#;/?:@&=+$,_.!*'\(\)\[\]-]} . ')';
56my $RE_NS_TAG_CHAR = '(?:' . '%[0-9a-fA-F]{2}' .'|'.  q{[0-9A-Za-z#;/?:@&=+$_.*'\(\)-]} . ')';
57
58#  [#x21-#x7E]          /* 8 bit */
59# | #x85 | [#xA0-#xD7FF] | [#xE000-#xFFFD] /* 16 bit */
60# | [#x10000-#x10FFFF]                     /* 32 bit */
61
62#nb-char ::= c-printable - b-char - c-byte-order-mark
63#my $RE_NB_CHAR = '[\x21-\x7E]';
64my $RE_ANCHOR_CAR = '[\x21-\x2B\x2D-\x5A\x5C\x5E-\x7A\x7C\x7E\xA0-\xFF\x{100}-\x{10FFFF}]';
65
66my $RE_PLAIN_START = '[\x21\x22\x24-\x39\x3B-\x7E\xA0-\xFF\x{100}-\x{10FFFF}]';
67my $RE_PLAIN_END = '[\x21-\x39\x3B-\x7E\x85\xA0-\x{D7FF}\x{E000}-\x{FEFE}\x{FF00}-\x{FFFD}\x{10000}-\x{10FFFF}]';
68my $RE_PLAIN_FIRST = '[\x24\x28-\x29\x2B\x2E-\x39\x3B-\x3D\x41-\x5A\x5C\x5E-\x5F\x61-\x7A\x7E\xA0-\xFF\x{100}-\x{10FFFF}]';
69
70my $RE_PLAIN_START_FLOW = '[\x21\x22\x24-\x2B\x2D-\x39\x3B-\x5A\x5C\x5E-\x7A\x7C\x7E\xA0-\xFF\x{100}-\x{10FFFF}]';
71my $RE_PLAIN_END_FLOW = '[\x21-\x2B\x2D-\x39\x3B-\x5A\x5C\x5E-\x7A\x7C\x7E\x85\xA0-\x{D7FF}\x{E000}-\x{FEFE}\x{FF00}-\x{FFFD}\x{10000}-\x{10FFFF}]';
72my $RE_PLAIN_FIRST_FLOW = '[\x24\x28-\x29\x2B\x2E-\x39\x3B-\x3D\x41-\x5A\x5C\x5E-\x5F\x61-\x7A\x7C\x7E\xA0-\xFF\x{100}-\x{10FFFF}]';
73# c-indicators
74#! 21
75#" 22
76## 23
77#% 25
78#& 26
79#' 27
80#* 2A
81#, 2C FLOW
82#- 2D XX
83#: 3A XX
84#> 3E
85#? 3F XX
86#@ 40
87#[ 5B FLOW
88#] 5D FLOW
89#` 60
90#{ 7B FLOW
91#| 7C
92#} 7D FLOW
93
94
95my $RE_PLAIN_WORD = "(?::+$RE_PLAIN_END|$RE_PLAIN_START)(?::+$RE_PLAIN_END|$RE_PLAIN_END)*";
96my $RE_PLAIN_FIRST_WORD = "(?:[:?-]+$RE_PLAIN_END|$RE_PLAIN_FIRST)(?::+$RE_PLAIN_END|$RE_PLAIN_END)*";
97my $RE_PLAIN_WORDS = "(?:$RE_PLAIN_FIRST_WORD(?:$RE_WS+$RE_PLAIN_WORD)*)";
98my $RE_PLAIN_WORDS2 = "(?:$RE_PLAIN_WORD(?:$RE_WS+$RE_PLAIN_WORD)*)";
99
100my $RE_PLAIN_WORD_FLOW = "(?::+$RE_PLAIN_END_FLOW|$RE_PLAIN_START_FLOW)(?::+$RE_PLAIN_END_FLOW|$RE_PLAIN_END_FLOW)*";
101my $RE_PLAIN_FIRST_WORD_FLOW = "(?:[:?-]+$RE_PLAIN_END_FLOW|$RE_PLAIN_FIRST_FLOW)(?::+$RE_PLAIN_END_FLOW|$RE_PLAIN_END_FLOW)*";
102my $RE_PLAIN_WORDS_FLOW = "(?:$RE_PLAIN_FIRST_WORD_FLOW(?:$RE_WS+$RE_PLAIN_WORD_FLOW)*)";
103my $RE_PLAIN_WORDS_FLOW2 = "(?:$RE_PLAIN_WORD_FLOW(?:$RE_WS+$RE_PLAIN_WORD_FLOW)*)";
104
105
106#c-secondary-tag-handle  ::= “!” “!”
107#c-named-tag-handle  ::= “!” ns-word-char+ “!”
108#ns-tag-char ::= ns-uri-char - “!” - c-flow-indicator
109#ns-global-tag-prefix    ::= ns-tag-char ns-uri-char*
110#c-ns-local-tag-prefix   ::= “!” ns-uri-char*
111my $RE_TAG = "!(?:$RE_NS_WORD_CHAR*!$RE_NS_TAG_CHAR+|$RE_NS_TAG_CHAR+|<$RE_URI_CHAR+>|)";
112
113#c-ns-anchor-property    ::= “&” ns-anchor-name
114#ns-char ::= nb-char - s-white
115#ns-anchor-char  ::= ns-char - c-flow-indicator
116#ns-anchor-name  ::= ns-anchor-char+
117
118my $RE_SEQSTART = qr/\A(-)(?=$RE_WS|$)/m;
119my $RE_COMPLEX = qr/(\?)(?=$RE_WS|$)/m;
120my $RE_COMPLEXCOLON = qr/\A(:)(?=$RE_WS|$)/m;
121my $RE_ANCHOR = "&$RE_ANCHOR_CAR+";
122my $RE_ALIAS = "\\*$RE_ANCHOR_CAR+";
123
124
125my %REGEXES = (
126    ANCHOR => qr{($RE_ANCHOR)},
127    TAG => qr{($RE_TAG)},
128    ALIAS => qr{($RE_ALIAS)},
129    SINGLEQUOTED => qr{(?:''|[^'\r\n]+)*},
130);
131
132sub fetch_next_line {
133    my ($self) = @_;
134    my $next_line = $self->next_line;
135    if (defined $next_line ) {
136        return $next_line;
137    }
138
139    my $line = $self->reader->readline;
140    unless (defined $line) {
141        $self->set_next_line(undef);
142        return;
143    }
144    $self->inc_line;
145    $line =~ m/\A( *)([^\r\n]*)([\r\n]|\z)/ or die "Unexpected";
146    $next_line = [ $1,  $2, $3 ];
147    $self->set_next_line($next_line);
148    # $ESCAPE_CHAR from YAML.pm
149    if ($line =~ tr/\x00-\x08\x0b-\x0c\x0e-\x1f//) {
150        $self->exception("Control characters are not allowed");
151    }
152
153    return $next_line;
154}
155
156my %TOKEN_NAMES = (
157    '"' => 'DOUBLEQUOTE',
158    "'" => 'SINGLEQUOTE',
159    '|' => 'LITERAL',
160    '>' => 'FOLDED',
161    '!' => 'TAG',
162    '*' => 'ALIAS',
163    '&' => 'ANCHOR',
164    ':' => 'COLON',
165    '-' => 'DASH',
166    '?' => 'QUESTION',
167    '[' => 'FLOWSEQ_START',
168    ']' => 'FLOWSEQ_END',
169    '{' => 'FLOWMAP_START',
170    '}' => 'FLOWMAP_END',
171    ',' => 'FLOW_COMMA',
172    '---' => 'DOC_START',
173    '...' => 'DOC_END',
174);
175
176
177sub fetch_next_tokens {
178    my ($self) = @_;
179    my $next = $self->next_tokens;
180    return $next if @$next;
181
182    my $next_line = $self->fetch_next_line;
183    if (not $next_line) {
184        return [];
185    }
186
187    my $spaces = $next_line->[0];
188    my $yaml = \$next_line->[1];
189    if (not length $$yaml) {
190        $self->push_tokens([ EOL => join('', @$next_line), $self->line ]);
191        $self->set_next_line(undef);
192        return $next;
193    }
194    if (substr($$yaml, 0, 1) eq '#') {
195        $self->push_tokens([ EOL => join('', @$next_line), $self->line ]);
196        $self->set_next_line(undef);
197        return $next;
198    }
199    if (not $spaces and substr($$yaml, 0, 1) eq "%") {
200        $self->_fetch_next_tokens_directive($yaml, $next_line->[2]);
201        $self->set_context(0);
202        $self->set_next_line(undef);
203        return $next;
204    }
205    if (not $spaces and $$yaml =~ s/\A(---|\.\.\.)(?=$RE_WS|\z)//) {
206        $self->push_tokens([ $TOKEN_NAMES{ $1 } => $1, $self->line ]);
207    }
208    else {
209        $self->push_tokens([ SPACE => $spaces, $self->line ]);
210    }
211
212    my $partial = $self->_fetch_next_tokens($next_line);
213    unless ($partial) {
214        $self->set_next_line(undef);
215    }
216    return $next;
217}
218
219my %ANCHOR_ALIAS_TAG =    ( '&' => 1, '*' => 1, '!' => 1 );
220my %BLOCK_SCALAR =        ( '|' => 1, '>' => 1 );
221my %COLON_DASH_QUESTION = ( ':' => 1, '-' => 1, '?' => 1 );
222my %QUOTED =              ( '"' => 1, "'" => 1 );
223my %FLOW =                ( '{' => 1, '[' => 1, '}' => 1, ']' => 1, ',' => 1 );
224my %CONTEXT =             ( '"' => 1, "'" => 1, '>' => 1, '|' => 1 );
225
226my $RE_ESCAPES = qr{(?:
227    \\([ \\\/_0abefnrtvLNP"]) | \\x([0-9a-fA-F]{2})
228    | \\u([A-Fa-f0-9]{4}) | \\U([A-Fa-f0-9]{4,8})
229)}x;
230my %CONTROL = (
231    '\\' => '\\', '/' => '/', n => "\n", t => "\t", r => "\r", b => "\b",
232    'a' => "\a", 'b' => "\b", 'e' => "\e", 'f' => "\f", 'v' => "\x0b",
233    'P' => "\x{2029}", L => "\x{2028}", 'N' => "\x85",
234    '0' => "\0", '_' => "\xa0", ' ' => ' ', q/"/ => q/"/,
235);
236
237sub _fetch_next_tokens {
238    TRACE and warn __PACKAGE__.':'.__LINE__.": _fetch_next_tokens\n";
239    my ($self, $next_line) = @_;
240
241    my $yaml = \$next_line->[1];
242    my $eol = $next_line->[2];
243
244    my @tokens;
245
246    while (1) {
247        unless (length $$yaml) {
248            push @tokens, ( EOL => $eol, $self->line );
249            $self->push_tokens(\@tokens);
250            return;
251        }
252        my $first = substr($$yaml, 0, 1);
253        my $plain = 0;
254
255        if ($self->context) {
256            if ($$yaml =~ s/\A($RE_WS*)://) {
257                push @tokens, ( WS => $1, $self->line ) if $1;
258                push @tokens, ( COLON => ':', $self->line );
259                $self->set_context(0);
260                next;
261            }
262            if ($$yaml =~ s/\A($RE_WS*(?: #.*))\z//) {
263                push @tokens, ( EOL => $1 . $eol, $self->line );
264                $self->push_tokens(\@tokens);
265                return;
266            }
267            $self->set_context(0);
268        }
269        if ($CONTEXT{ $first }) {
270            push @tokens, ( CONTEXT => $first, $self->line );
271            $self->push_tokens(\@tokens);
272            return 1;
273        }
274        elsif ($COLON_DASH_QUESTION{ $first }) {
275            my $token_name = $TOKEN_NAMES{ $first };
276            if ($$yaml =~ s/\A\Q$first\E(?:($RE_WS+)|\z)//) {
277                my $token_name = $TOKEN_NAMES{ $first };
278                push @tokens, ( $token_name => $first, $self->line );
279                if (not defined $1) {
280                    push @tokens, ( EOL => $eol, $self->line );
281                    $self->push_tokens(\@tokens);
282                    return;
283                }
284                my $ws = $1;
285                if ($$yaml =~ s/\A(#.*|)\z//) {
286                    push @tokens, ( EOL => $ws . $1 . $eol, $self->line );
287                    $self->push_tokens(\@tokens);
288                    return;
289                }
290                push @tokens, ( WS => $ws, $self->line );
291                next;
292            }
293            elsif ($self->flowcontext and $$yaml =~ s/\A:(?=[,\{\}\[\]])//) {
294                push @tokens, ( $token_name => $first, $self->line );
295                next;
296            }
297            $plain = 1;
298        }
299        elsif ($ANCHOR_ALIAS_TAG{ $first }) {
300            my $token_name = $TOKEN_NAMES{ $first };
301            my $REGEX = $REGEXES{ $token_name };
302            if ($$yaml =~ s/\A$REGEX//) {
303                push @tokens, ( $token_name => $1, $self->line );
304            }
305            else {
306                push @tokens, ( "Invalid $token_name" => $$yaml, $self->line );
307                $self->push_tokens(\@tokens);
308                return;
309            }
310        }
311        elsif ($first eq ' ' or $first eq "\t") {
312            if ($$yaml =~ s/\A($RE_WS+)//) {
313                my $ws = $1;
314                if ($$yaml =~ s/\A((?:#.*)?\z)//) {
315                    push @tokens, ( EOL => $ws . $1 . $eol, $self->line );
316                    $self->push_tokens(\@tokens);
317                    return;
318                }
319                push @tokens, ( WS => $ws, $self->line );
320            }
321        }
322        elsif ($FLOW{ $first }) {
323            push @tokens, ( $TOKEN_NAMES{ $first } => $first, $self->line );
324            substr($$yaml, 0, 1, '');
325            my $flowcontext = $self->flowcontext;
326            if ($first eq '{' or $first eq '[') {
327                $self->set_flowcontext(++$flowcontext);
328            }
329            elsif ($first eq '}' or $first eq ']') {
330                $self->set_flowcontext(--$flowcontext);
331            }
332        }
333        else {
334            $plain = 1;
335        }
336
337        if ($plain) {
338            push @tokens, ( CONTEXT => '', $self->line );
339            $self->push_tokens(\@tokens);
340            return 1;
341        }
342
343    }
344
345    return;
346}
347
348sub fetch_plain {
349    my ($self, $indent, $context) = @_;
350    my $next_line = $self->next_line;
351    my $yaml = \$next_line->[1];
352    my $eol = $next_line->[2];
353    my $REGEX = $RE_PLAIN_WORDS;
354    if ($self->flowcontext) {
355        $REGEX = $RE_PLAIN_WORDS_FLOW;
356    }
357
358    my @tokens;
359    unless ($$yaml =~ s/\A($REGEX)//) {
360        $self->push_tokens(\@tokens);
361        $self->exception("Invalid plain scalar");
362    }
363    my $plain = $1;
364    push @tokens, ( PLAIN => $plain, $self->line );
365
366    if ($$yaml =~ s/\A(?:($RE_WS+#.*)|($RE_WS*))\z//) {
367        if (defined $1) {
368            push @tokens, ( EOL => $1 . $eol, $self->line );
369            $self->push_tokens(\@tokens);
370            $self->set_next_line(undef);
371            return;
372        }
373        else {
374            push @tokens, ( EOL => $2. $eol, $self->line );
375            $self->set_next_line(undef);
376        }
377    }
378    else {
379        $self->push_tokens(\@tokens);
380        my $partial = $self->_fetch_next_tokens($next_line);
381        if (not $partial) {
382            $self->set_next_line(undef);
383        }
384        return;
385    }
386
387    my $RE2 = $RE_PLAIN_WORDS2;
388    if ($self->flowcontext) {
389        $RE2 = $RE_PLAIN_WORDS_FLOW2;
390    }
391    my $fetch_next = 0;
392    my @lines = ($plain);
393    my @next;
394    LOOP: while (1) {
395        $next_line = $self->fetch_next_line;
396        if (not $next_line) {
397            last LOOP;
398        }
399        my $spaces = $next_line->[0];
400        my $yaml = \$next_line->[1];
401        my $eol = $next_line->[2];
402
403        if (not length $$yaml) {
404            push @tokens, ( EOL => $spaces . $eol, $self->line );
405            $self->set_next_line(undef);
406            push @lines, '';
407            next LOOP;
408        }
409
410        if (not $spaces and $$yaml =~ s/\A(---|\.\.\.)(?=$RE_WS|\z)//) {
411            push @next, $TOKEN_NAMES{ $1 } => $1, $self->line;
412            $fetch_next = 1;
413            last LOOP;
414        }
415        if ((length $spaces) < $indent) {
416            last LOOP;
417        }
418
419        my $ws = '';
420        if ($$yaml =~ s/\A($RE_WS+)//) {
421            $ws = $1;
422        }
423        if (not length $$yaml) {
424            push @tokens, ( EOL => $spaces . $ws . $eol, $self->line );
425            $self->set_next_line(undef);
426            push @lines, '';
427            next LOOP;
428        }
429        if ($$yaml =~ s/\A(#.*)\z//) {
430            push @tokens, ( EOL => $spaces . $ws . $1 . $eol, $self->line );
431            $self->set_next_line(undef);
432            last LOOP;
433        }
434
435        if ($$yaml =~ s/\A($RE2)//) {
436            push @tokens, INDENT => $spaces, $self->line;
437            push @tokens, WS => $ws, $self->line;
438            push @tokens, PLAIN => $1, $self->line;
439            push @lines, $1;
440            my $ws = '';
441            if ($$yaml =~ s/\A($RE_WS+)//) {
442                $ws = $1;
443            }
444            if (not length $$yaml) {
445                push @tokens, EOL => $ws . $eol, $self->line;
446                $self->set_next_line(undef);
447                next LOOP;
448            }
449
450            if ($$yaml =~ s/\A(#.*)\z//) {
451                push @tokens, EOL => $ws . $1 . $eol, $self->line;
452                $self->set_next_line(undef);
453                last LOOP;
454            }
455            else {
456                push @tokens, WS => $ws, $self->line if $ws;
457                $fetch_next = 1;
458            }
459        }
460        else {
461            push @tokens, SPACE => $spaces, $self->line;
462            push @tokens, WS => $ws, $self->line;
463            if ($self->flowcontext) {
464                $fetch_next = 1;
465            }
466            else {
467                push @tokens, ERROR => $$yaml, $self->line;
468            }
469        }
470
471        last LOOP;
472
473    }
474    # remove empty lines at the end
475    while (@lines > 1 and $lines[-1] eq '') {
476        pop @lines;
477    }
478    if (@lines > 1) {
479        my $value = YAML::PP::Render->render_multi_val(\@lines);
480        my @eol;
481        if ($tokens[-3] eq 'EOL') {
482            @eol = splice @tokens, -3;
483        }
484        $self->push_subtokens( { name => 'PLAIN_MULTI', value => $value }, \@tokens);
485        $self->push_tokens([ @eol, @next ]);
486    }
487    else {
488        $self->push_tokens([ @tokens, @next ]);
489    }
490    @tokens = ();
491    if ($fetch_next) {
492        my $partial = $self->_fetch_next_tokens($next_line);
493        if (not $partial) {
494            $self->set_next_line(undef);
495        }
496    }
497    return;
498}
499
500sub fetch_block {
501    my ($self, $indent, $context) = @_;
502    my $next_line = $self->next_line;
503    my $yaml = \$next_line->[1];
504    my $eol = $next_line->[2];
505
506    my @tokens;
507    my $token_name = $TOKEN_NAMES{ $context };
508    $$yaml =~ s/\A\Q$context\E// or die "Unexpected";
509    push @tokens, ( $token_name => $context, $self->line );
510    my $current_indent = $indent;
511    my $started = 0;
512    my $set_indent = 0;
513    my $chomp = '';
514    if ($$yaml =~ s/\A([1-9]\d*)([+-]?)//) {
515        push @tokens, ( BLOCK_SCALAR_INDENT => $1, $self->line );
516        $set_indent = $1;
517        $chomp = $2 if $2;
518        push @tokens, ( BLOCK_SCALAR_CHOMP => $2, $self->line ) if $2;
519    }
520    elsif ($$yaml =~ s/\A([+-])([1-9]\d*)?//) {
521        push @tokens, ( BLOCK_SCALAR_CHOMP => $1, $self->line );
522        $chomp = $1;
523        push @tokens, ( BLOCK_SCALAR_INDENT => $2, $self->line ) if $2;
524        $set_indent = $2 if $2;
525    }
526    if ($set_indent) {
527        $started = 1;
528        $current_indent = $set_indent;
529    }
530    if (not length $$yaml) {
531        push @tokens, ( EOL => $eol, $self->line );
532    }
533    elsif ($$yaml =~ s/\A($RE_WS*(?:$RE_WS#.*|))\z//) {
534        push @tokens, ( EOL => $1 . $eol, $self->line );
535    }
536    else {
537        $self->push_tokens(\@tokens);
538        $self->exception("Invalid block scalar");
539    }
540
541    my @lines;
542    while (1) {
543        $self->set_next_line(undef);
544        $next_line = $self->fetch_next_line;
545        if (not $next_line) {
546            last;
547        }
548        my $spaces = $next_line->[0];
549        my $content = $next_line->[1];
550        my $eol = $next_line->[2];
551        if (not $spaces and $content =~ m/\A(---|\.\.\.)(?=$RE_WS|\z)/) {
552            last;
553        }
554        if ((length $spaces) < $current_indent) {
555            if (length $content) {
556                last;
557            }
558            else {
559                push @lines, '';
560                push @tokens, ( EOL => $spaces . $eol, $self->line );
561                next;
562            }
563        }
564        if ((length $spaces) > $current_indent) {
565            if ($started) {
566                ($spaces, my $more_spaces) = unpack "a${current_indent}a*", $spaces;
567                $content = $more_spaces . $content;
568            }
569        }
570        unless (length $content) {
571            push @lines, '';
572            push @tokens, ( INDENT => $spaces, $self->line, EOL => $eol, $self->line );
573            unless ($started) {
574                $current_indent = length $spaces;
575            }
576            next;
577        }
578        unless ($started) {
579            $started = 1;
580            $current_indent = length $spaces;
581        }
582        push @lines, $content;
583        push @tokens, (
584            INDENT => $spaces, $self->line,
585            BLOCK_SCALAR_CONTENT => $content, $self->line,
586            EOL => $eol, $self->line,
587        );
588    }
589    my $value = YAML::PP::Render->render_block_scalar($context, $chomp, \@lines);
590    my @eol = splice @tokens, -3;
591    $self->push_subtokens( { name => 'BLOCK_SCALAR', value => $value }, \@tokens );
592    $self->push_tokens([ @eol ]);
593    return 0;
594}
595
596sub fetch_quoted {
597    my ($self, $indent, $context) = @_;
598    my $next_line = $self->next_line;
599    my $yaml = \$next_line->[1];
600    my $spaces = $next_line->[0];
601
602    my $token_name = $TOKEN_NAMES{ $context };
603    $$yaml =~ s/\A\Q$context// or die "Unexpected";;
604    my @tokens = ( $token_name => $context, $self->line );
605
606    my $start = 1;
607    my @values;
608    while (1) {
609
610        unless ($start) {
611            $next_line = $self->fetch_next_line or do {
612                    for (my $i = 0; $i < @tokens; $i+= 3) {
613                        my $token = $tokens[ $i + 1 ];
614                        if (ref $token) {
615                            $tokens[ $i + 1 ] = $token->{orig};
616                        }
617                    }
618                    $self->push_tokens(\@tokens);
619                    $self->exception("Missing closing quote <$context> at EOF");
620                };
621            $start = 0;
622            $spaces = $next_line->[0];
623            $yaml = \$next_line->[1];
624
625            if (not length $$yaml) {
626                push @tokens, ( EOL => $spaces . $next_line->[2], $self->line );
627                $self->set_next_line(undef);
628                push @values, { value => '', orig => '' };
629                next;
630            }
631            elsif (not $spaces and $$yaml =~ m/\A(---|\.\.\.)(?=$RE_WS|\z)/) {
632                    for (my $i = 0; $i < @tokens; $i+= 3) {
633                        my $token = $tokens[ $i + 1 ];
634                        if (ref $token) {
635                            $tokens[ $i + 1 ] = $token->{orig};
636                        }
637                    }
638                $self->push_tokens(\@tokens);
639                $self->exception("Missing closing quote <$context> or invalid document marker");
640            }
641            elsif ((length $spaces) < $indent) {
642                for (my $i = 0; $i < @tokens; $i+= 3) {
643                    my $token = $tokens[ $i + 1 ];
644                    if (ref $token) {
645                        $tokens[ $i + 1 ] = $token->{orig};
646                    }
647                }
648                $self->push_tokens(\@tokens);
649                $self->exception("Wrong indendation or missing closing quote <$context>");
650            }
651
652            if ($$yaml =~ s/\A($RE_WS+)//) {
653                $spaces .= $1;
654            }
655            push @tokens, ( WS => $spaces, $self->line );
656        }
657
658        my $v = $self->_read_quoted_tokens($start, $context, $yaml, \@tokens);
659        push @values, $v;
660        if ($tokens[-3] eq $token_name) {
661            if ($start) {
662                $self->push_subtokens(
663                    { name => 'QUOTED', value => $v->{value} }, \@tokens
664                );
665            }
666            else {
667                my $value = YAML::PP::Render->render_quoted($context, \@values);
668                $self->push_subtokens(
669                    { name => 'QUOTED_MULTILINE', value => $value }, \@tokens
670                );
671            }
672            $self->set_context(1) if $self->flowcontext;
673            if (length $$yaml) {
674                my $partial = $self->_fetch_next_tokens($next_line);
675                if (not $partial) {
676                    $self->set_next_line(undef);
677                }
678                return 0;
679            }
680            else {
681                @tokens = ();
682                push @tokens, ( EOL => $next_line->[2], $self->line );
683                $self->push_tokens(\@tokens);
684                $self->set_next_line(undef);
685                return;
686            }
687        }
688        $tokens[-2] .= $next_line->[2];
689        $self->set_next_line(undef);
690        $start = 0;
691    }
692}
693
694sub _read_quoted_tokens {
695    my ($self, $start, $first, $yaml, $tokens) = @_;
696    my $quoted = '';
697    my $decoded = '';
698    my $token_name = $TOKEN_NAMES{ $first };
699    if ($first eq "'") {
700        my $regex = $REGEXES{SINGLEQUOTED};
701        if ($$yaml =~ s/\A($regex)//) {
702            $quoted .= $1;
703            $decoded .= $1;
704            $decoded =~ s/''/'/g;
705        }
706    }
707    else {
708        ($quoted, $decoded) = $self->_read_doublequoted($yaml);
709    }
710    my $eol = '';
711    unless (length $$yaml) {
712        if ($quoted =~ s/($RE_WS+)\z//) {
713            $eol = $1;
714            $decoded =~ s/($eol)\z//;
715        }
716    }
717    my $value = { value => $decoded, orig => $quoted };
718
719    if ($$yaml =~ s/\A$first//) {
720        if ($start) {
721            push @$tokens, ( $token_name . 'D' => $value, $self->line );
722        }
723        else {
724            push @$tokens, ( $token_name . 'D_LINE' => $value, $self->line );
725        }
726        push @$tokens, ( $token_name => $first, $self->line );
727        return $value;
728    }
729    if (length $$yaml) {
730        push @$tokens, ( $token_name . 'D' => $value->{orig}, $self->line );
731        $self->push_tokens($tokens);
732        $self->exception("Invalid quoted <$first> string");
733    }
734
735    push @$tokens, ( $token_name . 'D_LINE' => $value, $self->line );
736    push @$tokens, ( EOL => $eol, $self->line );
737
738    return $value;
739}
740
741sub _read_doublequoted {
742    my ($self, $yaml) = @_;
743    my $quoted = '';
744    my $decoded = '';
745    while (1) {
746        my $last = 1;
747        if ($$yaml =~ s/\A([^"\\]+)//) {
748            $quoted .= $1;
749            $decoded .= $1;
750            $last = 0;
751        }
752        if ($$yaml =~ s/\A($RE_ESCAPES)//) {
753            $quoted .= $1;
754            my $dec = defined $2 ? $CONTROL{ $2 }
755                        : defined $3 ? chr hex $3
756                        : defined $4 ? chr hex $4
757                        : chr hex $5;
758            $decoded .= $dec;
759            $last = 0;
760        }
761        if ($$yaml =~ s/\A(\\)\z//) {
762            $quoted .= $1;
763            $decoded .= $1;
764            last;
765        }
766        last if $last;
767    }
768    return ($quoted, $decoded);
769}
770
771sub _fetch_next_tokens_directive {
772    my ($self, $yaml, $eol) = @_;
773    my @tokens;
774
775    if ($$yaml =~ s/\A(\s*%YAML)//) {
776        my $dir = $1;
777        if ($$yaml =~ s/\A( )//) {
778            $dir .= $1;
779            if ($$yaml =~ s/\A(1\.[12]$RE_WS*)//) {
780                $dir .= $1;
781                push @tokens, ( YAML_DIRECTIVE => $dir, $self->line );
782            }
783            else {
784                $$yaml =~ s/\A(.*)//;
785                $dir .= $1;
786                my $warn = $ENV{YAML_PP_RESERVED_DIRECTIVE} || 'warn';
787                if ($warn eq 'warn') {
788                    warn "Found reserved directive '$dir'";
789                }
790                elsif ($warn eq 'fatal') {
791                    die "Found reserved directive '$dir'";
792                }
793                push @tokens, ( RESERVED_DIRECTIVE => "$dir", $self->line );
794            }
795        }
796        else {
797            $$yaml =~ s/\A(.*)//;
798            $dir .= $1;
799            push @tokens, ( 'Invalid directive' => $dir, $self->line );
800            push @tokens, ( EOL => $eol, $self->line );
801            $self->push_tokens(\@tokens);
802            return;
803        }
804    }
805    elsif ($$yaml =~ s/\A(\s*%TAG +(!$RE_NS_WORD_CHAR*!|!) +(tag:\S+|!$RE_URI_CHAR+)$RE_WS*)//) {
806        push @tokens, ( TAG_DIRECTIVE => $1, $self->line );
807        # TODO
808        my $tag_alias = $2;
809        my $tag_url = $3;
810    }
811    elsif ($$yaml =~ s/\A(\s*\A%(?:\w+).*)//) {
812        push @tokens, ( RESERVED_DIRECTIVE => $1, $self->line );
813        my $warn = $ENV{YAML_PP_RESERVED_DIRECTIVE} || 'warn';
814        if ($warn eq 'warn') {
815            warn "Found reserved directive '$1'";
816        }
817        elsif ($warn eq 'fatal') {
818            die "Found reserved directive '$1'";
819        }
820    }
821    else {
822        push @tokens, ( 'Invalid directive' => $$yaml, $self->line );
823        push @tokens, ( EOL => $eol, $self->line );
824        $self->push_tokens(\@tokens);
825        return;
826    }
827    if (not length $$yaml) {
828        push @tokens, ( EOL => $eol, $self->line );
829    }
830    else {
831        push @tokens, ( 'Invalid directive' => $$yaml, $self->line );
832        push @tokens, ( EOL => $eol, $self->line );
833    }
834    $self->push_tokens(\@tokens);
835    return;
836}
837
838sub push_tokens {
839    my ($self, $new_tokens) = @_;
840    my $next = $self->next_tokens;
841    my $line = $self->line;
842    my $column = $self->offset;
843
844    for (my $i = 0; $i < @$new_tokens; $i += 3) {
845        my $value = $new_tokens->[ $i + 1 ];
846        my $name = $new_tokens->[ $i ];
847        my $line = $new_tokens->[ $i + 2 ];
848        my $push = {
849            name => $name,
850            line => $line,
851            column => $column,
852            value => $value,
853        };
854        $column += length $value unless $name eq 'CONTEXT';
855        push @$next, $push;
856        if ($name eq 'EOL') {
857            $column = 0;
858        }
859    }
860    $self->set_offset($column);
861    return $next;
862}
863
864sub push_subtokens {
865    my ($self, $token, $subtokens) = @_;
866    my $next = $self->next_tokens;
867    my $line = $self->line;
868    my $column = $self->offset;
869    $token->{column} = $column;
870    $token->{subtokens} = \my @sub;
871
872    for (my $i = 0; $i < @$subtokens; $i+=3) {
873        my $name = $subtokens->[ $i ];
874        my $value = $subtokens->[ $i + 1 ];
875        my $line = $subtokens->[ $i + 2 ];
876        my $push = {
877            name => $subtokens->[ $i ],
878            line => $line,
879            column => $column,
880        };
881        if (ref $value eq 'HASH') {
882            %$push = ( %$push, %$value );
883            $column += length $value->{orig};
884        }
885        else {
886            $push->{value} = $value;
887            $column += length $value;
888        }
889        if ($push->{name} eq 'EOL') {
890            $column = 0;
891        }
892        push @sub, $push;
893    }
894    $token->{line} = $sub[0]->{line};
895    push @$next, $token;
896    $self->set_offset($column);
897    return $next;
898}
899
900sub exception {
901    my ($self, $msg) = @_;
902    my $next = $self->next_tokens;
903    $next = [];
904    my $line = @$next ? $next->[0]->{line} : $self->line;
905    my @caller = caller(0);
906    my $yaml = '';
907    if (my $nl = $self->next_line) {
908        $yaml = join '', @$nl;
909        $yaml = $nl->[1];
910    }
911    my $e = YAML::PP::Exception->new(
912        line => $line,
913        column => $self->offset + 1,
914        msg => $msg,
915        next => $next,
916        where => $caller[1] . ' line ' . $caller[2],
917        yaml => $yaml,
918    );
919    croak $e;
920}
921
9221;
923