1# vim: set ts=2 sts=2 sw=2 expandtab smarttab:
2#
3# This file is part of Parse-ANSIColor-Tiny
4#
5# This software is copyright (c) 2011 by Randy Stauner.
6#
7# This is free software; you can redistribute it and/or modify it under
8# the same terms as the Perl 5 programming language system itself.
9#
10use strict;
11use warnings;
12
13package Parse::ANSIColor::Tiny;
14# git description: v0.600-2-gba6391f
15
16our $AUTHORITY = 'cpan:RWSTAUNER';
17# ABSTRACT: Determine attributes of ANSI-Colored string
18$Parse::ANSIColor::Tiny::VERSION = '0.601';
19our @COLORS = qw( black red green yellow blue magenta cyan white );
20our %FOREGROUND = (
21  (map { (               $COLORS[$_] =>  30 + $_ ) } 0 .. $#COLORS),
22  (map { (   'bright_' . $COLORS[$_] =>  90 + $_ ) } 0 .. $#COLORS),
23);
24our %BACKGROUND = (
25  (map { (       'on_' . $COLORS[$_] =>  40 + $_ ) } 0 .. $#COLORS),
26  (map { ('on_bright_' . $COLORS[$_] => 100 + $_ ) } 0 .. $#COLORS),
27);
28our %ATTRIBUTES = (
29  clear          => 0,
30  reset          => 0,
31  bold           => 1,
32  dark           => 2,
33  faint          => 2,
34  underline      => 4,
35  underscore     => 4,
36  blink          => 5,
37  reverse        => 7,
38  concealed      => 8,
39  reverse_off    => 27,
40  reset_foreground => 39,
41  reset_background => 49,
42  %FOREGROUND,
43  %BACKGROUND,
44);
45
46# Generating the 256-color codes involves a lot of codes and offsets that are
47# not helped by turning them into constants.
48## no critic (ValuesAndExpressions::ProhibitMagicNumbers)
49
50our @COLORS256;
51
52# The first 16 256-color codes are duplicates of the 16 ANSI colors,
53# included for completeness.
54for my $code (0 .. 15) {
55  my $name = "ansi$code";
56  $ATTRIBUTES{$name}      = "38;5;$code";
57  $ATTRIBUTES{"on_$name"} = "48;5;$code";
58  push @COLORS256, $name;
59}
60
61# 256-color RGB colors.  Red, green, and blue can each be values 0 through 5,
62# and the resulting 216 colors start with color 16.
63for my $r (0 .. 5) {
64  for my $g (0 .. 5) {
65    for my $b (0 .. 5) {
66      my $code = 16 + (6 * 6 * $r) + (6 * $g) + $b;
67      my $name = "rgb$r$g$b";
68      $ATTRIBUTES{$name}    = "38;5;$code";
69      $ATTRIBUTES{"on_$name"} = "48;5;$code";
70      push @COLORS256, $name;
71    }
72  }
73}
74
75# The last 256-color codes are 24 shades of grey.
76for my $n (0 .. 23) {
77  my $code = $n + 232;
78  my $name = "grey$n";
79  $ATTRIBUTES{$name}      = "38;5;$code";
80  $ATTRIBUTES{"on_$name"} = "48;5;$code";
81  push @COLORS256, $name;
82}
83
84# copied from Term::ANSIColor
85  our %ATTRIBUTES_R;
86  # Reverse lookup.  Alphabetically first name for a sequence is preferred.
87  for (reverse sort keys %ATTRIBUTES) {
88      $ATTRIBUTES_R{$ATTRIBUTES{$_}} = $_;
89  }
90
91
92sub new {
93  my $class = shift;
94  my $self = {
95    remove_escapes => 1,
96    @_ == 1 ? %{ $_[0] } : @_,
97  };
98
99  $self->{process} = 1
100    if $self->{auto_reverse};
101
102  # fix incorrectly specified attributes
103  ($self->{background} ||= 'black') =~ s/^(on_)*/on_/;
104  ($self->{foreground} ||= 'white') =~ s/^(on_)*//;
105
106  bless $self, $class;
107}
108
109
110sub colors {
111  return (@COLORS, @COLORS256);
112}
113sub foreground_colors {
114  return (
115    @COLORS,
116    (map { "bright_$_" } @COLORS),
117    @COLORS256,
118  );
119}
120sub background_colors {
121  return (
122    (map { "on_$_" } @COLORS),
123    (map { "on_bright_$_" } @COLORS),
124    (map { "on_$_" } @COLORS256),
125  );
126}
127
128
129sub __separate_and_normalize {
130  my ($codes) = @_;
131
132  # Treat empty as "clear".
133  defined($codes) && length($codes)
134    or return 0;
135
136  # Replace empty (clear) with zero to simplify parsing and return values.
137  $codes =~ s/^;/0;/;
138  $codes =~ s/;$/;0/;
139  # Insert a zero between two semicolons (use look-ahead to get /g to find all).
140  $codes =~ s/;(?=;)/;0/g;
141
142  # Remove any leading zeros from (sections of) codes.
143  $codes =~ s/\b0+(?=\d)//g;
144
145  # Return all matches (of extended sequences or digits).
146  return $codes =~ m{ ( [34]8;5;\d+ | \d+) }xg;
147}
148
149sub identify {
150  my ($self, @codes) = @_;
151  local $_;
152  return
153    grep { defined }
154    map  { $ATTRIBUTES_R{ $_ } }
155    map  { __separate_and_normalize($_) }
156      @codes;
157}
158
159
160sub normalize {
161  my $self = shift;
162  my @norm;
163  foreach my $attr ( @_ ){
164    if( $attr eq 'clear' ){
165      @norm = ();
166    }
167    elsif( $attr eq 'reverse_off' ){
168      # reverse_off cancels reverse
169      @norm = grep { $_ ne 'reverse' } @norm;
170    }
171    elsif( $attr eq 'reset_foreground' ){
172      @norm = grep { !exists $FOREGROUND{$_} } @norm;
173    }
174    elsif( $attr eq 'reset_background' ){
175      @norm = grep { !exists $BACKGROUND{$_} } @norm;
176    }
177    else {
178      # remove previous (duplicate) occurrences of this attribute
179      @norm = grep { $_ ne $attr } @norm;
180      # new fg color overwrites previous fg
181      @norm = grep { !exists $FOREGROUND{$_} } @norm if exists $FOREGROUND{$attr};
182      # new bg color overwrites previous bg
183      @norm = grep { !exists $BACKGROUND{$_} } @norm if exists $BACKGROUND{$attr};
184      push @norm, $attr;
185    }
186  }
187  return @norm;
188}
189
190
191sub parse {
192  my ($self, $orig) = @_;
193
194  my $last_pos = 0;
195  my $last_attr = [];
196  my $processed = [];
197  my $parsed = [];
198
199  # Strip escape sequences that we aren't going to use
200  $orig = $self->remove_escape_sequences($orig)
201    if $self->{remove_escapes};
202
203  while( $orig =~ m/(\e\[([0-9;]*)m)/mg ){
204    my $seq = $1;
205    my $attrs = $2;
206
207    my $cur_pos = pos($orig);
208
209    my $len = ($cur_pos - length($seq)) - $last_pos;
210    push @$parsed, [
211      $processed,
212      substr($orig, $last_pos, $len)
213    ]
214      # don't bother with empty strings
215      if $len;
216
217    $last_pos = $cur_pos;
218    $last_attr = [$self->normalize(@$last_attr, $self->identify($attrs))];
219    $processed = $self->{process} ? [$self->process(@$last_attr)] : $last_attr;
220  }
221
222    push @$parsed, [
223      $processed,
224      substr($orig, $last_pos)
225    ]
226      # if there's any string left
227      if $last_pos < length($orig);
228
229  return $parsed;
230}
231
232
233sub process {
234  my ($self, @attr) = @_;
235  @attr = $self->process_reverse(@attr) if $self->{auto_reverse};
236  return @attr;
237}
238
239
240sub process_reverse {
241  my $self = shift;
242  my ($rev, $fg, $bg, @attr);
243  my $i = 0;
244  foreach my $attr ( @_ ){
245    if( $attr eq 'reverse' ){
246      $rev = 1;
247      next;
248    }
249    elsif( $FOREGROUND{ $attr } ){
250      $fg = $i;
251    }
252    elsif( $BACKGROUND{ $attr } ){
253      $bg = $i;
254    }
255    push @attr, $attr;
256    $i++;
257  }
258  # maintain order for consistency with other methods
259  if( $rev ){
260    # if either color is missing then the default colors should be reversed
261    {
262      $attr[ $fg = $i++ ] = $self->{foreground} if !defined $fg;
263      $attr[ $bg = $i++ ] = $self->{background} if !defined $bg;
264    }
265    $attr[ $fg ] = 'on_' . $attr[ $fg ]      if defined $fg;
266    $attr[ $bg ] = substr( $attr[ $bg ], 3 ) if defined $bg;
267  }
268  return @attr;
269}
270
271
272sub remove_escape_sequences {
273  my ($self, $string) = @_;
274
275  # This is in no way comprehensive or accurate...
276  # it just seems like most of the sequences match this.
277  # We could certainly expand this if the need arises.
278  $string =~ s{
279    \e\[
280      [0-9;]*
281      [a-ln-zA-Z]
282  }{}gx;
283
284  return $string;
285}
286
287
288our @EXPORT_OK;
289BEGIN {
290  my @funcs = qw(identify normalize parse);
291  my $suffix = '_ansicolor';
292  local $_;
293  eval join '', ## no critic (StringyEval)
294    map { "sub ${_}$suffix { __PACKAGE__->new->$_(\@_) }" }
295    @funcs;
296  @EXPORT_OK = map { $_ . $suffix } @funcs;
297}
298
299sub import {
300  my $class = shift;
301  return unless @_;
302
303  my $caller = caller;
304  no strict 'refs'; ## no critic (NoStrict)
305
306  foreach my $arg ( @_ ){
307    die "'$arg' is not exported by $class"
308      unless grep { $arg eq $_ } @EXPORT_OK;
309    *{"${caller}::$arg"} = *{"${class}::$arg"}{CODE};
310  }
311}
312
313# TODO: option for blotting out 'concealed'? s/\S/ /g
314
3151;
316
317# NOTE: this synopsis is tested (eval'ed) in t/synopsis.t
318
319__END__
320
321=pod
322
323=encoding UTF-8
324
325=for :stopwords Randy Stauner ACKNOWLEDGEMENTS cpan testmatrix url annocpan anno bugtracker
326rt cpants kwalitee diff irc mailto metadata placeholders metacpan
327
328=head1 NAME
329
330Parse::ANSIColor::Tiny - Determine attributes of ANSI-Colored string
331
332=head1 VERSION
333
334version 0.601
335
336=for test_synopsis sub h { shift };
337
338=head1 SYNOPSIS
339
340  # output from some command
341  my $output = "foo\e[31mbar\e[00m";
342
343  my $ansi = Parse::ANSIColor::Tiny->new();
344  my $marked = $ansi->parse($output);
345
346  is_deeply
347    $marked,
348    [
349      [ [], 'foo' ],
350      [ ['red'], 'bar' ],
351    ],
352    'parse colored string';
353
354  # don't forget to html-encode the string!
355  my $html = join '',
356    '<div>',
357    (map { '<span class="' . join(' ', @{ $_->[0] }) . '">' . h($_->[1]) . '</span>' } @$marked),
358    '</div>';
359
360  is $html,
361    '<div><span class="">foo</span><span class="red">bar</span></div>',
362    'turned simple ansi into html';
363
364=head1 DESCRIPTION
365
366Parse a string colored with ANSI escape sequences
367into a structure suitable for reformatting (into HTML, for example).
368
369The output of terminal commands can be marked up with colors and formatting
370that in some instances you'd like to preserve.
371
372This module is essentially the inverse of L<Term::ANSIColor>.
373The array refs returned from L</parse>
374can be passed back in to C<Term::ANSIColor::colored>.
375The strings may not match exactly due to different ways the attributes can be specified,
376but the end result should be colored the same.
377
378This is a C<::Tiny> module...
379it attempts to be correct for most cases with a small amount of code.
380It may not be 100% correct, especially in complex cases.
381It only handles the C<m> escape sequence (C<\033[0m>)
382which produces colors and simple attributes (bold, underline)
383(like what can be produced with L<Term::ANSIColor>).
384Other escape sequences are removed by default
385but you can disable this by passing C<< remove_escapes => 0 >> to the constructor.
386
387If you do find bugs please submit tickets (with patches, if possible).
388
389=head1 METHODS
390
391=head2 new
392
393Constructor.
394
395Takes a hash or hash ref of arguments:
396
397=over 4
398
399=item *
400
401C<auto_reverse> - Automatically invert colors when C<reverse> is present; Disabled by default.
402
403=item *
404
405C<background> - Color to assume as background; Black by default. Currently used by L</process_reverse>.
406
407=item *
408
409C<foreground> - Color to assume as foreground; White by default. Currently used by L</process_reverse>.
410
411=item *
412
413C<remove_escapes> - Remove other terminal escape sequences (not related to color).  Passes strings through L</remove_escape_sequences> before parsing.
414
415=back
416
417=head2 colors
418
419Returns a list of the base color names (in numeric escape sequence order).
420
421=head2 foreground_colors
422
423Returns a list of the foreground colors (in numeric escape sequence order).
424
425This includes the base colors, their C<bright_> variants,
426and the names from the 256 palette (prefixes of C<ansi>, C<rgb>, and C<grey>).
427
428=head2 background_colors
429
430Returns a list of the background colors (in numeric escape sequence order).
431
432This includes the C<on_> and C<on_bright_> variants of the base colors
433and the C<on_> names for the 256 palette.
434
435=head2 identify
436
437  my @names = $parser->identify('1;31');
438    # or $parser->identify('1', '31');
439  # returns ('bold', 'red')
440
441Identifies attributes by their number;
442Returns a B<list> of names.
443
444This is similar to C<uncolor()> in L<Term::ANSIColor>.
445
446Unknown codes will be ignored (remove from the output):
447
448  $parser->identify('33', '52');
449  # returns ('yellow') # drops the '52'
450
451=head2 normalize
452
453  my @norm = $parser->normalize(@attributes);
454
455Takes a list of named attributes
456(like those returned from L</identify>)
457and reduces the list to only those that would have effect.
458
459=over 4
460
461=item *
462
463Duplicates will be removed
464
465=item *
466
467a foreground color will overwrite any previous foreground color (and the previous ones will be removed)
468
469=item *
470
471same for background colors
472
473=item *
474
475C<clear> will remove all previous attributes
476
477=back
478
479  my @norm = $parser->normalize(qw(red bold green));
480  # returns ('bold', 'green');
481
482=head2 parse
483
484  my $marked = $parser->parse($output);
485
486Parse the provided string
487and return an array ref of array refs describing the formatting:
488
489  # [
490  #   [ [], 'plain words' ],
491  #   [ ['red'], 'colored words' ],
492  # [
493
494These array refs are consistent with the arguments to
495C<colored()> in L<Term::ANSIColor>:
496
497  Term::ANSIColor::colored( ['red'], 'colored words' );
498
499=head2 process
500
501Performs post-processing on the provided attributes.
502
503This currently includes L</process_reverse>
504if C<auto_reverse> is enabled.
505
506=head2 process_reverse
507
508  my @attr = $parser->process_reverse( $parser->normalize( '31;42;7' ) );
509
510Translates a normalized set of attributes into something easier to process.
511This is called internally when C<auto_reverse> is configured.
512
513If C<reverse> is included in the attributes
514it should invert the foreground and background colors.
515
516This method makes the attributes more straight forward
517and likely easier for other things to process:
518
519  my @norm = $parser->normalize( '1;31;42;7' );
520  # returns qw( bold red on_green reverse );
521
522  my @attr = $parser->process_reverse( @norm );
523  # returns qw( bold on_red green );
524
525This extra step is necessary to maintain state
526and properly handle C<reverse>/C<reverse_off>
527since two C<reverse>s do not cancel each other,
528but rather the second should be ignored.
529
530If no foreground or background color is currently active
531then the colors specified as C<foreground> and C<background>
532will be included (and reversed).
533
534  my @attr = $parser->process_reverse( qw( bold reverse ) );
535  # returns qw( bold on_white black );
536
537  my @attr = $parser->process_reverse( qw( bold reverse red ) );
538  # returns qw( bold on_red   black );
539
540This is consistent with the way it is drawn in the terminal.
541Explicitly specifying both colors should make it easy
542for anything downstream to process and display as intended.
543
544=head2 remove_escape_sequences
545
546  my $clean = $parser->remove_escape_sequences( $string );
547
548Strip other terminal escape sequences (those not relating to color)
549from the string to avoid unexpected characters in the output.
550This method is called from L</parse> if C<remove_escapes> is enabled.
551
552=head1 FUNCTIONS
553
554=head2 identify_ansicolor
555
556Function wrapped around L</identify>.
557
558=head2 normalize_ansicolor
559
560Function wrapped around L</normalize>.
561
562=head2 parse_ansicolor
563
564Function wrapped around L</parse>.
565
566=head1 EXPORTS
567
568Everything listed in L</FUNCTIONS> is also available for export upon request.
569
570=head1 SEE ALSO
571
572=over 4
573
574=item *
575
576L<Term::ANSIColor> - For marking up text that will be printed to the terminal
577
578=item *
579
580L<Image::TextMode> (and L<Image::TextMode::Format::ANSI>) - Successor to C<Image::ANSI>; Specifically designed for parsing ANSI art
581
582=item *
583
584L<Term::VT102> - Handles more than colors and is likely more robust but may be overkill in simple situations (and was difficult to install in the past).
585
586=item *
587
588L<HTML::FromANSI::Tiny> - Uses this module to translate ANSI colored text to simple HTML
589
590=back
591
592=head1 SUPPORT
593
594=head2 Perldoc
595
596You can find documentation for this module with the perldoc command.
597
598  perldoc Parse::ANSIColor::Tiny
599
600=head2 Websites
601
602The following websites have more information about this module, and may be of help to you. As always,
603in addition to those websites please use your favorite search engine to discover more resources.
604
605=over 4
606
607=item *
608
609MetaCPAN
610
611A modern, open-source CPAN search engine, useful to view POD in HTML format.
612
613L<http://metacpan.org/release/Parse-ANSIColor-Tiny>
614
615=back
616
617=head2 Bugs / Feature Requests
618
619Please report any bugs or feature requests by email to C<bug-parse-ansicolor-tiny at rt.cpan.org>, or through
620the web interface at L<https://rt.cpan.org/Public/Bug/Report.html?Queue=Parse-ANSIColor-Tiny>. You will be automatically notified of any
621progress on the request by the system.
622
623=head2 Source Code
624
625
626L<https://github.com/rwstauner/Parse-ANSIColor-Tiny>
627
628  git clone https://github.com/rwstauner/Parse-ANSIColor-Tiny.git
629
630=head1 AUTHOR
631
632Randy Stauner <rwstauner@cpan.org>
633
634=head1 CONTRIBUTOR
635
636=for stopwords Dmitry Fedin
637
638Dmitry Fedin <dmitry.fedin@gmail.com>
639
640=head1 COPYRIGHT AND LICENSE
641
642This software is copyright (c) 2011 by Randy Stauner.
643
644This is free software; you can redistribute it and/or modify it under
645the same terms as the Perl 5 programming language system itself.
646
647=cut
648