1package HTML::FromANSI;
2$HTML::FromANSI::VERSION = '2.03';
3
4use strict;
5use base qw/Exporter/;
6use vars qw/@EXPORT @EXPORT_OK @Color %Options/;
7use Term::VT102::Boundless;
8use HTML::Entities;
9use Scalar::Util qw(blessed reftype);
10use Carp qw(croak);
11
12=head1 NAME
13
14HTML::FromANSI - Mark up ANSI sequences as HTML
15
16=head1 SYNOPSIS
17
18    use HTML::FromANSI (); # avoid exports if using OO
19    use Term::ANSIColor;
20
21    my $h = HTML::FromANSI->new(
22        fill_cols => 1,
23    );
24
25
26    $h->add_text(color('bold blue'), "This text is bold blue.");
27
28    print $h->html;
29
30
31    # you can append text in the new api:
32
33    $h->add_text(color('bold blue'), " still blue.");
34
35    print $h->html
36
37
38
39    # The old API still works:
40
41    $HTML::FromANSI::Options{fill_cols} = 1; # fill all 80 cols
42    print ansi2html(color('bold blue'), "This text is bold blue.");
43
44=head1 DESCRIPTION
45
46This small module converts ANSI text sequences to corresponding HTML
47codes, using stylesheets to control color and blinking properties.
48
49It exports C<ansi2html()> by default, which takes an array, joins it
50it into a single scalar, and returns its HTML rendering.
51
52From version 2.00 an object oriented api which is safer for multiple uses (no
53more manipulation of shared C<%Options>) is available. It is reccomended that
54you no longer import any functions by doing:
55
56    use HTML::FromANSI ();
57
58and use the new documented API instead of the functional one.
59
60The underlying ANSI code processing is done by L<Term::VT102>, a DEC VT102
61terminal emulator. To generate ANSI codes for color changes, cursor movements
62etc, take a look at L<Term::ANSIScreen> and L<Term::ANSIColor>.
63
64=head1 METHODS
65
66=over 4
67
68=item new
69
70The constructor. See L</OPTIONS> for the options it takes.
71
72=item add_text @text
73
74Adds text input to the terminal emulator.
75
76=item html
77
78Renders the screen as computed by C<terminal_object> into HTML.
79
80=item ansi_to_html @text
81
82A convenience method.
83
84Calls C<add_text> and then C<html>.
85
86=item terminal_object
87
88The underlying terminal emulator object.
89
90=back
91
92=head1 OPTIONS
93
94These are parameters you can pass to C<new>.
95
96=over 4
97
98=item linewrap
99
100A boolean value to specify whether to wrap lines that exceeds
101width specified by C<col>, or simply truncate them. Defaults to C<1>.
102
103Only takes effect if you override C<terminal_class> or C<terminal_object> with
104a L<Term::VT102> (instead of L<Term::VT102::Boundless>).
105
106=item lf_to_crlf
107
108A boolean value to specify whether to translate all incoming
109\n into C<\r\n> or not; you generally wants to use this if your
110data is from a file using unix line endings. The default is C<0>
111on MSWin32 and MacOS, and C<1> on other platforms.
112
113=item fill_cols
114
115A boolean value to specify whether to fill empty columns with
116space; use this if you want to maintain a I<screen-like> appearance
117in the resulting HTML, so that each row will be aligned properly.
118Defaults to C<0>.
119
120=item html_entity
121
122A boolean value to specify whether to escape all high-bit characters
123to HTML entities or not; defaults to C<0>, which means only C<E<lt>>,
124C<E<gt>>, C<"> and C<&> will be escaped. (Handy when processing most
125ANSI art entries.)
126
127=item cols
128
129A number specifying the width of the virtual terminal; defaults to 80.
130
131When C<Term::VT102::Boundless> is in use (the default) this specifies the
132minimum number of rows to draw.
133
134When using C<Term::VT102> (by overriding C<terminal_class> or
135C<terminal_object>) then the number of columns is fixed.
136
137=item rows
138
139When C<Term::VT102::Boundless> is in use (the default) this specifies the
140minimum number of rows to draw.
141
142When L<Term::VT102> is in use (by overriding C<terminal_class> or
143L<terminal_object>) then it sets the height of the virtual terminal; rows that
144exceeds this number will be truncated.
145
146=item font_face
147
148A string used as the C<face> attribute to the C<font> tag enclosing the
149HTML text; defaults to C<fixedsys, lucida console, terminal, vga, monospace>.
150
151If this option and the C<style> option are both set to empty strings, the
152C<font> tag will be omitted.
153
154=item style
155
156A string used as the C<style> attribute to the C<font> tag enclosing the
157HTML text; defaults to <line-height: 1; letter-spacing: 0; font-size: 12pt>.
158
159If this option and the C<font_face> option are both set to empty strings, the
160C<font> tag will be omitted.
161
162=item tt
163
164A boolean value specifying whether the HTML text should be enclosed in a
165C<tt> tag or not. Defaults to C<1>.
166
167=item show_cursor
168
169A boolean value to control whether to highlight the character under
170the cursor position, by reversing its background and foregroud color.
171Defaults to C<0>.
172
173If the cursor is on it's own line and C<show_cursor> is set, then that row will
174be omitted.
175
176=item terminal_class
177
178The class to instantiate C<terminal_object> with. Defaults to
179L<Term::VT102::Boundless>.
180
181=item terminal_object
182
183Any L<Term::VT102> compatible object should work here.
184
185If you override it most values like C<cols>, C<rows>, C<terminal_class> etc
186will be ignored.
187
188=cut
189
190@EXPORT = '&ansi2html';
191@EXPORT_OK = qw|@Color %Options|;
192
193@Color = (qw(
194    black   darkred darkgreen),'#8b8b00',qw(darkblue darkmagenta darkcyan gray
195    dimgray     red     green    yellow         blue     magenta     cyan white
196));
197
198%Options = (
199    linewrap	=> 1,		# wrap long lines
200    lf_to_crlf	=> (		# translate \n to \r\n on Unix
201        $^O !~ /^(?:MSWin32|MacOS)$/
202    ),
203    fill_cols	=> 0,		# fill all (80) columns with space
204    html_entity => 0,		# escape all HTML entities
205    cols	=> 80,		# column width
206    rows	=> undef,	# let ansi2html figure it out
207    font_face	=> 'fixedsys, lucida console, terminal, vga, monospace',
208    style	=> 'line-height: 1; letter-spacing: 0; font-size: 12pt',
209    tt		=> 1,
210    show_cursor	=> 0,
211
212    terminal_class => 'Term::VT102::Boundless',
213);
214
215sub import {
216    my $class = shift;
217    while (my ($k, $v) = splice(@_, 0, 2)) {
218        $Options{$k} = $v;
219    }
220    $class->export_to_level(1);
221}
222
223sub new {
224    my ( $class, @args ) = @_;
225
226    if ( @args == 1 && reftype($args[0]) eq 'HASH' ) {
227        return bless { %Options, %{ $args[0] } }, $class;
228    } elsif ( @args % 2 == 0 ) {
229        return bless { %Options, @args }, $class;
230    } else {
231        croak "Constructor arguments must be an even sized list or a hash ref";
232    }
233}
234
235sub _obj_args {
236    if ( blessed($_[0]) and $_[0]->isa(__PACKAGE__) ) {
237        return @_;
238    } else {
239        return ( __PACKAGE__->new(), @_ );
240    }
241}
242
243sub ansi2html {
244    my ( $self, @args ) = _obj_args(@_);
245    $self->ansi_to_html(@args);
246}
247
248sub terminal_object {
249    my ( $self, @args ) = @_;
250    $self->{terminal_object} ||= $self->create_terminal_object(@args);
251}
252
253sub create_terminal_object {
254    my ( $self, %args ) = @_;
255
256    my $class = $self->{terminal_class};
257
258    if ( $class ne 'Term::VT102::Boundless' ) {
259        ( my $file = "${class}.pm" ) =~ s{::}{/}g;
260        require $file;
261    }
262
263    my $vt = $class->new(
264        cols => $self->{cols},
265        ( defined($self->{rows}) ? ( rows => $self->{rows} ) : () ),
266    );
267
268    $vt->option_set(LINEWRAP => $self->{linewrap});
269    $vt->option_set(LFTOCRLF => $self->{lf_to_crlf});
270
271    $vt->_code_DECTCEM( $self->{show_cursor} );
272
273    return $vt;
274}
275
276sub add_text {
277    my ( $self, @lines ) = @_;
278    $self->terminal_object->process($_) for @lines;
279}
280
281sub ansi_to_html {
282    my ( $self, @lines ) = @_;
283
284    $self->add_text(@lines);
285
286    return $self->html;
287}
288
289sub html {
290    my ( $self, @args ) = @_;
291
292    my $result = $self->parse_vt($self->terminal_object);
293
294    if (length $self->{font_face} or length $self->{style}) {
295        $result = "<font face='$self->{font_face}' style='$self->{style}'>".
296        $result."</font>";
297    }
298
299    $result = "<tt>$result</tt>" if $self->{tt};
300
301    return $result;
302}
303
304sub parse_vt {
305    my ( $self, $vt ) = _obj_args(@_);
306
307    my (%prev, %this); # attributes
308    my $out;
309
310    my ($x, $y) = ($vt->x, $vt->y);
311
312    my $total_rows = $vt->rows;
313
314    foreach my $row_num (1 .. $total_rows) {
315        local $SIG{__WARN__} = sub {}; # abandon all hope, ye who enter here
316
317        my $row = $vt->row_text($row_num);
318        my $att = $vt->row_attr($row_num);
319
320        if ( $row_num == $total_rows and $total_rows != ($self->{rows}||0) # this is the last row
321            and $row =~ /^[\s\x00]*$/s # and it's completely empty
322            and !$self->{show_cursor} # and we're not showing a cursor
323        ) { last } # skip it
324
325        foreach my $col_num (0 .. length($row)) {
326            my $text = substr($row, $col_num,, 1);
327
328            @this{qw|fg bg bo fo st ul bl rv|} = $vt->attr_unpack(
329                substr($att, $col_num * 2, 2)
330            );
331
332            if ($y == $row_num and $x == $col_num + 1 and $self->{show_cursor}) {
333                # this block is the cursor
334                @this{qw|fg bg bo bl|} = (@this{qw|bg fg bl bo|});
335                $text = ' ' if $text eq '\000';
336            }
337            elsif ($text eq "\000") {
338                next unless $self->{fill_cols};
339            }
340
341            $out .= $self->diff_attr(\%prev, \%this) . (
342                ($text eq ' ' or $text eq "\000") ? '&nbsp;':
343                $self->{html_entity} ? encode_entities($text)
344                : encode_entities($text, '<>"&')
345            );
346
347            %prev = %this;
348        }
349
350        $out .= "<br>";
351    }
352
353    return "$out</span>";
354}
355
356sub diff_attr {
357    my ($self, $prev, $this) = _obj_args(@_);
358    my $out = '';
359
360    # skip if the attributes remain unchanged
361    return if %{$prev} and not scalar (grep {
362            ($_->[0] ne $_->[1])
363        } map {
364            [ $prev->{$_}, $this->{$_} ]
365        } keys %{$this}
366    );
367
368    # bold, faint, standout, underline, blink and reverse
369    my ($fg, $bg, $bo, $fo, $st, $ul, $bl, $rv)
370        = @{$this}{qw|fg bg bo fo st ul bl rv|};
371
372    ($fg, $bg) = ($bg, $fg) if $rv;
373
374    $out .= "</span>" if %{$prev};
375    $out .= "<span style='";
376    $out .= "color: $Color[$fg + $bo * 8]; ";
377    $out .= "background: $Color[$bg + $bl * 8]; ";
378    $out .= "text-decoration: underline; " if $ul;
379    $out .= "'>";
380
381    return $out;
382}
383
3841;
385
386__END__
387
388=head1 SEE ALSO
389
390L<Term::VT102::Boundless>, L<HTML::Entities>, L<Term::ANSIScreen>
391
392=head1 AUTHORS
393
394Audrey Tang E<lt>audreyt@audreyt.orgE<gt>
395Yuval Kogman E<lt>nothingmuch@woobling.orgE<gt>
396
397=head1 COPYRIGHT
398
399Copyright 2001, 2002, 2003 by Audrey Tang E<lt>audreyt@audreyt.orgE<gt>.
400
401Copyright 2007 Yuval Kogman E<lt>nothingmuch@Woobling.orgE<gt>
402
403This program is free software; you can redistribute it and/or
404modify it under the terms of the MIT license or the same terms as Perl itself.
405
406=cut
407