1package TAP::Parser::YAMLish::Reader;
2
3use strict;
4use warnings;
5
6use base 'TAP::Object';
7
8our $VERSION = '3.48';
9
10                             # No EBCDIC support on early perls
11*to_native = (ord "A" == 65 || $] < 5.008)
12             ? sub { return shift }
13             : sub { utf8::unicode_to_native(shift) };
14
15# TODO:
16#   Handle blessed object syntax
17
18# Printable characters for escapes
19my %UNESCAPES = (
20    z => "\x00", a => "\a",  t    => "\t",
21    n => "\n",   v => "\cK", f    => "\f",
22    r => "\r",   e => "\e",  '\\' => '\\',
23);
24
25my $QQ_STRING    = qr{ " (?:\\. | [^"])* " }x;
26my $HASH_LINE    = qr{ ^ ($QQ_STRING|\S+) \s* : \s* (?: (.+?) \s* )? $ }x;
27my $IS_HASH_KEY  = qr{ ^ [\w\'\"] }x;
28my $IS_END_YAML  = qr{ ^ \.\.\. \s* $ }x;
29my $IS_QQ_STRING = qr{ ^ $QQ_STRING $ }x;
30my $IS_ARRAY_LINE = qr{ ^ - \s* ($QQ_STRING|\S+) }x;
31
32# new() implementation supplied by TAP::Object
33
34sub read {
35    my $self = shift;
36    my $obj  = shift;
37
38    die "Must have a code reference to read input from"
39      unless ref $obj eq 'CODE';
40
41    $self->{reader}  = $obj;
42    $self->{capture} = [];
43
44    # Prime the reader
45    $self->_next;
46    return unless $self->{next};
47
48    my $doc = $self->_read;
49
50    # The terminator is mandatory otherwise we'd consume a line from the
51    # iterator that doesn't belong to us. If we want to remove this
52    # restriction we'll have to implement look-ahead in the iterators.
53    # Which might not be a bad idea.
54    my $dots = $self->_peek;
55    die "Missing '...' at end of YAMLish"
56      unless defined $dots
57          and $dots =~ $IS_END_YAML;
58
59    delete $self->{reader};
60    delete $self->{next};
61
62    return $doc;
63}
64
65sub get_raw { join( "\n", grep defined, @{ shift->{capture} || [] } ) . "\n" }
66
67sub _peek {
68    my $self = shift;
69    return $self->{next} unless wantarray;
70    my $line = $self->{next};
71    $line =~ /^ (\s*) (.*) $ /x;
72    return ( $2, length $1 );
73}
74
75sub _next {
76    my $self = shift;
77    die "_next called with no reader"
78      unless $self->{reader};
79    my $line = $self->{reader}->();
80    $self->{next} = $line;
81    push @{ $self->{capture} }, $line;
82}
83
84sub _read {
85    my $self = shift;
86
87    my $line = $self->_peek;
88
89    # Do we have a document header?
90    if ( $line =~ /^ --- (?: \s* (.+?)? \s* )? $/x ) {
91        $self->_next;
92
93        return $self->_read_scalar($1) if defined $1;    # Inline?
94
95        my ( $next, $indent ) = $self->_peek;
96
97        if ( $next =~ /^ - /x ) {
98            return $self->_read_array($indent);
99        }
100        elsif ( $next =~ $IS_HASH_KEY ) {
101            return $self->_read_hash( $next, $indent );
102        }
103        elsif ( $next =~ $IS_END_YAML ) {
104            die "Premature end of YAMLish";
105        }
106        else {
107            die "Unsupported YAMLish syntax: '$next'";
108        }
109    }
110    else {
111        die "YAMLish document header not found";
112    }
113}
114
115# Parse a double quoted string
116sub _read_qq {
117    my $self = shift;
118    my $str  = shift;
119
120    unless ( $str =~ s/^ " (.*?) " $/$1/x ) {
121        die "Internal: not a quoted string";
122    }
123
124    $str =~ s/\\"/"/gx;
125    $str =~ s/ \\ ( [tartan\\favez] | x([0-9a-fA-F]{2}) )
126                 / (length($1) > 1) ? pack("H2", to_native($2))
127                                    : $UNESCAPES{$1} /gex;
128    return $str;
129}
130
131# Parse a scalar string to the actual scalar
132sub _read_scalar {
133    my $self   = shift;
134    my $string = shift;
135
136    return undef if $string eq '~';
137    return {} if $string eq '{}';
138    return [] if $string eq '[]';
139
140    if ( $string eq '>' || $string eq '|' ) {
141
142        my ( $line, $indent ) = $self->_peek;
143        die "Multi-line scalar content missing" unless defined $line;
144
145        my @multiline = ($line);
146
147        while (1) {
148            $self->_next;
149            my ( $next, $ind ) = $self->_peek;
150            last if $ind < $indent;
151
152            my $pad = $string eq '|' ? ( ' ' x ( $ind - $indent ) ) : '';
153            push @multiline, $pad . $next;
154        }
155
156        return join( ( $string eq '>' ? ' ' : "\n" ), @multiline ) . "\n";
157    }
158
159    if ( $string =~ /^ ' (.*) ' $/x ) {
160        ( my $rv = $1 ) =~ s/''/'/g;
161        return $rv;
162    }
163
164    if ( $string =~ $IS_QQ_STRING ) {
165        return $self->_read_qq($string);
166    }
167
168    if ( $string =~ /^['"]/ ) {
169
170        # A quote with folding... we don't support that
171        die __PACKAGE__ . " does not support multi-line quoted scalars";
172    }
173
174    # Regular unquoted string
175    return $string;
176}
177
178sub _read_nested {
179    my $self = shift;
180
181    my ( $line, $indent ) = $self->_peek;
182
183    if ( $line =~ /^ -/x ) {
184        return $self->_read_array($indent);
185    }
186    elsif ( $line =~ $IS_HASH_KEY ) {
187        return $self->_read_hash( $line, $indent );
188    }
189    else {
190        die "Unsupported YAMLish syntax: '$line'";
191    }
192}
193
194# Parse an array
195sub _read_array {
196    my ( $self, $limit ) = @_;
197
198    my $ar = [];
199
200    while (1) {
201        my ( $line, $indent ) = $self->_peek;
202        last
203          if $indent < $limit
204              || !defined $line
205              || $line =~ $IS_END_YAML;
206
207        if ( $indent > $limit ) {
208            die "Array line over-indented";
209        }
210
211        if ( $line =~ /^ (- \s+) \S+ \s* : (?: \s+ | $ ) /x ) {
212            $indent += length $1;
213            $line =~ s/-\s+//;
214            push @$ar, $self->_read_hash( $line, $indent );
215        }
216        elsif ( $line =~ /^ - \s* (.+?) \s* $/x ) {
217            die "Unexpected start of YAMLish" if $line =~ /^---/;
218            $self->_next;
219            push @$ar, $self->_read_scalar($1);
220        }
221        elsif ( $line =~ /^ - \s* $/x ) {
222            $self->_next;
223            push @$ar, $self->_read_nested;
224        }
225        elsif ( $line =~ $IS_HASH_KEY ) {
226            $self->_next;
227            push @$ar, $self->_read_hash( $line, $indent, );
228        }
229        else {
230            die "Unsupported YAMLish syntax: '$line'";
231        }
232    }
233
234    return $ar;
235}
236
237sub _read_hash {
238    my ( $self, $line, $limit ) = @_;
239
240    my $indent;
241    my $hash = {};
242
243    while (1) {
244        die "Badly formed hash line: '$line'"
245          unless $line =~ $HASH_LINE;
246
247        my ( $key, $value ) = ( $self->_read_scalar($1), $2 );
248        $self->_next;
249
250        my ( $next_line, $next_indent ) = $self->_peek;
251
252        if ( defined $value ) {
253            $hash->{$key} = $self->_read_scalar($value);
254        }
255        elsif (not defined $value                  # no explicit undef ("~") given
256               and $next_indent <= $limit          # next line is same or less indentation
257               and $next_line !~ $IS_ARRAY_LINE)   # arrays can start at same indent
258        {
259            $hash->{$key} = undef;
260        }
261        else {
262            $hash->{$key} = $self->_read_nested;
263        }
264
265        ( $line, $indent ) = $self->_peek;
266        last
267          if $indent < $limit
268              || !defined $line
269              || $line =~ $IS_END_YAML;
270    }
271
272    return $hash;
273}
274
2751;
276
277__END__
278
279=pod
280
281=head1 NAME
282
283TAP::Parser::YAMLish::Reader - Read YAMLish data from iterator
284
285=head1 VERSION
286
287Version 3.48
288
289=head1 SYNOPSIS
290
291=head1 DESCRIPTION
292
293Note that parts of this code were derived from L<YAML::Tiny> with the
294permission of Adam Kennedy.
295
296=head1 METHODS
297
298=head2 Class Methods
299
300=head3 C<new>
301
302The constructor C<new> creates and returns an empty
303C<TAP::Parser::YAMLish::Reader> object.
304
305 my $reader = TAP::Parser::YAMLish::Reader->new;
306
307=head2 Instance Methods
308
309=head3 C<read>
310
311 my $got = $reader->read($iterator);
312
313Read YAMLish from a L<TAP::Parser::Iterator> and return the data structure it
314represents.
315
316=head3 C<get_raw>
317
318 my $source = $reader->get_source;
319
320Return the raw YAMLish source from the most recent C<read>.
321
322=head1 AUTHOR
323
324Andy Armstrong, <andy@hexten.net>
325
326Adam Kennedy wrote L<YAML::Tiny> which provided the template and many of
327the YAML matching regular expressions for this module.
328
329=head1 SEE ALSO
330
331L<YAML::Tiny>, L<YAML>, L<YAML::Syck>, L<Config::Tiny>, L<CSS::Tiny>,
332L<http://use.perl.org/~Alias/journal/29427>
333
334=head1 COPYRIGHT
335
336Copyright 2007-2011 Andy Armstrong.
337
338Portions copyright 2006-2008 Adam Kennedy.
339
340This program is free software; you can redistribute
341it and/or modify it under the same terms as Perl itself.
342
343The full text of the license can be found in the
344LICENSE file included with this module.
345
346=cut
347
348