1#!/usr/bin/env perl
2use v5.10;
3use strict;
4use warnings;
5use utf8;
6use open qw/:std :utf8/;
7
8use Getopt::Long;
9use Pod::Usage;
10
11use if $^O eq 'MSWin32', 'Win32::Console::ANSI';
12use Term::ANSIColor;
13
14use constant {
15    NULL              => "\x00",
16    BSON_TYPE         => "C",
17    BSON_ENAME        => "Z*",
18    BSON_TYPE_NAME    => "CZ*",
19    BSON_DOUBLE       => "d",
20    BSON_STRING       => "l/A",
21    BSON_BOOLEAN      => "C",
22    BSON_REGEX        => "Z*Z*",
23    BSON_JSCODE       => "",
24    BSON_INT32        => "l",
25    BSON_INT64        => "q",
26    BSON_TIMESTAMP    => "q",
27    BSON_CODE_W_SCOPE => "l",
28    BSON_REMAINING    => 'a*',
29    BSON_SKIP_4_BYTES => 'x4',
30    BSON_OBJECTID     => 'a12',
31    BSON_BINARY_TYPE  => 'C',
32    BSON_CSTRING      => 'Z*',
33    BSON_BYTES        => 'a*'
34};
35
36my $BOLD = $^O eq 'MSWin32' ? "bold " : "";
37
38# minimum field size
39my %FIELD_SIZES = (
40    0x01 => 8,
41    0x02 => 5,
42    0x03 => 5,
43    0x04 => 5,
44    0x05 => 5,
45    0x06 => 0,
46    0x07 => 12,
47    0x08 => 1,
48    0x09 => 8,
49    0x0A => 0,
50    0x0B => 2,
51    0x0C => 17,
52    0x0D => 5,
53    0x0E => 5,
54    0x0F => 14,
55    0x10 => 4,
56    0x11 => 8,
57    0x12 => 8,
58    0x7F => 0,
59    0xFF => 0,
60);
61
62sub main {
63    my ( $hex, $file, $help );
64    GetOptions(
65        "file=s" => \$file,
66        "x"      => \$hex,
67        "help|h" => \$help,
68    ) or die("Error in command line args");
69    pod2usage( { -exitval => 2, -verbose => 2, } ) if $help;
70
71    if ( $file ) {
72        dump_file($file);
73    }
74    else {
75        dump_stdin($hex);
76    }
77}
78
79sub dump_stdin {
80    my $hex = shift;
81    while ( defined( my $bson = <STDIN> ) ) {
82        chomp $bson;
83        if ( !length($bson) ) {
84            print_error("[ no document ]\n");
85            next;
86        }
87        # in -x mode, treat leading # as a comment
88        if ( $hex && index( $bson, "#" ) == 0 ) {
89            say $bson;
90            next;
91        }
92        $bson =~ s[ ][]g if $hex;
93        $bson = pack( "H*", $bson ) if $hex;
94        dump_document( \$bson );
95        print "\n";
96    }
97}
98
99sub dump_file {
100    my $file = shift;
101    open my $fh, "<", $file;
102    binmode($fh);
103    my $data = do { local $/; <$fh> };
104    while ( length $data ) {
105        my $len = unpack( BSON_INT32, $data );
106        my $bson = substr($data,0,$len,'');
107        dump_document(\$bson);
108        print "\n";
109    }
110}
111
112sub dump_document {
113    my ( $ref, $is_array ) = @_;
114    print $is_array ? " [" : " {" if defined $is_array;
115    dump_header($ref);
116    1 while dump_field($ref);
117    print_error( " " . unpack( "H*", $$ref ) ) if length($$ref);
118    print $is_array ? " ]" : " }" if defined $is_array;
119    return;
120}
121
122sub dump_header {
123    my ($ref) = @_;
124
125    my $len = get_length( $ref, 4 );
126    return unless defined $len;
127
128    if ( $len < 5 || $len < length($$ref) + 4 ) {
129        print_length( $len, 'red' );
130    }
131    else {
132        print_length( $len, 'blue' );
133    }
134}
135
136sub dump_field {
137    my ($ref) = @_;
138
139    # detect end of document
140    if ( length($$ref) < 2 ) {
141        if ( length($$ref) == 0 ) {
142            print_error(" [missing terminator]");
143        }
144        else {
145            my $end = substr( $$ref, 0, 1, '' );
146            print_hex( $end, $end eq NULL ? 'blue' : 'red' );
147        }
148        return;
149    }
150
151    # unpack type
152    my $type = unpack( BSON_TYPE, substr( $$ref, 0, 1, '' ) );
153
154    if ( !exists $FIELD_SIZES{$type} ) {
155        print_type( $type, 'red' );
156        return;
157    }
158
159    print_type($type);
160
161    # check for key termination
162    my $key_end = index( $$ref, NULL );
163    return if $key_end == -1;
164
165    # unpack key
166    my $key = unpack( BSON_CSTRING, substr( $$ref, 0, $key_end + 1, '' ) );
167    print_key($key);
168
169    # Check if there is enough data to complete field for this type
170    # This is greedy, so it checks length, not length -1
171    my $min_size = $FIELD_SIZES{$type};
172    return if length($$ref) < $min_size;
173
174    # fields without payload: 0x06, 0x0A, 0x7F, 0xFF
175    return 1 if $min_size == 0;
176
177    # document or array
178    if ( $type == 0x03 || $type == 0x04 ) {
179        my ($len) = unpack( BSON_INT32, $$ref );
180        my $doc = substr( $$ref, 0, $len, '' );
181        dump_document( \$doc, $type == 0x04 );
182        return 1;
183    }
184
185    # fixed width fields
186    if (   $type == 0x01
187        || $type == 0x07
188        || $type == 0x09
189        || $type == 0x10
190        || $type == 0x11
191        || $type == 0x12 )
192    {
193        my $len = ( $type == 0x10 ? 4 : $type == 0x07 ? 12 : 8 );
194        print_hex( substr( $$ref, 0, $len, '' ) );
195        return 1;
196    }
197
198    # boolean
199    if ( $type == 0x08 ) {
200        my $bool = substr( $$ref, 0, 1, '' );
201        print_hex( $bool, ( $bool eq "\x00" || $bool eq "\x01" ) ? 'green' : 'red' );
202        return 1;
203    }
204
205    # binary field
206    if ( $type == 0x05 ) {
207        my $len = get_length( $ref, -1 );
208        my $subtype = substr( $$ref, 0, 1, '' );
209
210        if ( !defined($len) ) {
211            print_hex($subtype);
212            return;
213        }
214
215        my $binary = substr( $$ref, 0, $len, '' );
216
217        print_length($len);
218        print_hex($subtype);
219
220        if ( $subtype eq "\x02" ) {
221            my $bin_len = get_length( \$binary );
222            if ( !defined($bin_len) ) {
223                print_hex( $binary, 'red' );
224                return;
225            }
226            if ( $bin_len != length($binary) ) {
227                print_length( $bin_len, 'red' );
228                print_hex( $binary, 'red' );
229                return;
230            }
231        }
232
233        print_hex($binary) if length($binary);
234        return 1;
235    }
236
237    # string or symbol or code
238    if ( $type == 0x02 || $type == 0x0e || $type == 0x0d ) {
239        my ( $len, $string ) = get_string($ref);
240        return unless defined $len;
241
242        print_length( $len, 'cyan' );
243        print_string($string);
244        return 1;
245
246    }
247
248    # regex 0x0B
249    if ( $type == 0x0B ) {
250        my ( $pattern, $flag ) = unpack( BSON_CSTRING . BSON_CSTRING, $$ref );
251        substr( $$ref, 0, length($pattern) + length($flag) + 2, '' );
252        print_string($pattern);
253        print_string($flag);
254        return 1;
255    }
256
257    # code with scope 0x0F
258    if ( $type == 0x0F ) {
259        my $len = get_length( $ref, 4 );
260        return unless defined $len;
261
262        # len + string + doc minimum size is 4 + 5 + 5
263        if ( $len < 14 ) {
264            print_length( $len, 'red' );
265            return;
266        }
267
268        print_length($len);
269
270        my $cws = substr( $$ref, 0, $len - 4, '' );
271
272        my ( $strlen, $string ) = get_string( \$cws );
273
274        if ( !defined $strlen ) {
275            print_hex( $cws, 'red' );
276            return;
277        }
278
279        print_length($strlen);
280        print_string($string);
281
282        dump_document( \$cws, 0 );
283
284        return 1;
285    }
286
287    # dbpointer 0x0C
288    if ( $type == 0x0C ) {
289        my ( $len, $string ) = get_string($ref);
290        return unless defined $len;
291
292        print_length($len);
293        print_string($string);
294
295        # Check if there are 12 bytes (plus terminator) or more
296        return if length($$ref) < 13;
297
298        my $oid = substr( $$ref, 0, 12, '' );
299        print_hex($oid);
300
301        return 1;
302    }
303
304    die "Shouldn't reach here";
305}
306
307sub get_length {
308    my ( $ref, $adj ) = @_;
309    $adj ||= 0;
310    my $len = unpack( BSON_INT32, substr( $$ref, 0, 4, '' ) );
311    return unless defined $len;
312
313    # check if requested length is too long
314    if ( $len < 0 || $len > length($$ref) + $adj ) {
315        print_length( $len, 'red' );
316        return;
317    }
318
319    return $len;
320}
321
322sub get_string {
323    my ($ref) = @_;
324
325    my $len = get_length($ref);
326    return unless defined $len;
327
328    # len must be at least 1 for trailing 0x00
329    if ( $len == 0 ) {
330        print_length( $len, 'red' );
331        return;
332    }
333
334    my $string = substr( $$ref, 0, $len, '' );
335
336    # check if null terminated
337    if ( substr( $string, -1, 1 ) ne NULL ) {
338        print_length($len);
339        print_hex( $string, 'red' );
340        return;
341    }
342
343    # remove trailing null
344    chop($string);
345
346    # try to decode to UTF-8
347    if ( !utf8::decode($string) ) {
348        print_length($len);
349        print_hex( $string . "\x00", 'red' );
350        return;
351    }
352
353    return ( $len, $string );
354}
355
356sub print_error {
357    my ($text) = @_;
358    print colored( ["${BOLD}red"], $text );
359}
360
361sub print_type {
362    my ( $type, $color ) = @_;
363    $color ||= 'magenta';
364    print colored( ["$BOLD$color"], sprintf( " %02x", $type ) );
365}
366
367sub print_key {
368    my ($string) = @_;
369    print_string( $string, 'yellow' );
370}
371
372sub print_string {
373    my ( $string, $color ) = @_;
374    $color ||= 'green';
375    $string =~ s{([^[:graph:]])}{sprintf("\\x%02x",ord($1))}ge;
376    print colored( ["$BOLD$color"], qq[ "$string"] . " 00" );
377}
378
379sub print_length {
380    my ( $len, $color ) = @_;
381    $color ||= 'cyan';
382    print colored( ["$BOLD$color"], " " . unpack( "H*", pack( BSON_INT32, $len ) ) );
383}
384
385sub print_hex {
386    my ( $value, $color ) = @_;
387    $color ||= 'green';
388    print colored( ["$BOLD$color"], " " . uc( unpack( "H*", $value ) ) );
389}
390
391main();
392
393__END__
394
395=head1 NAME
396
397bsonview - dump a BSON string with color output showing structure
398
399=head1 SYNOPSIS
400
401    cat file.bson | bsondump
402
403    echo "0500000000" | bsondump -x
404
405=head1 OPTIONS
406
407    -x          input is in hex format (default is 0)
408    --help, -h  show help
409
410=head1 USAGE
411
412Reads from C<STDIN> and dumps colored structures to C<STDOUT>.
413
414=head1 AUTHOR
415
416=over 4
417
418=item *
419
420David Golden <david@mongodb.com>
421
422=back
423
424=head1 COPYRIGHT AND LICENSE
425
426This software is Copyright (c) 2016 by MongoDB, Inc..
427
428This is free software, licensed under:
429
430  The Apache License, Version 2.0, January 2004
431
432=cut
433
434=cut
435