• Home
  • History
  • Annotate
Name Date Size #Lines LOC

..03-May-2022-

t/H14-Apr-2005-516339

ChangesH A D14-Apr-2005993 3324

Clean.pmH A D14-Apr-20054.3 KiB19649

INSTALLH A D01-Feb-2005812 3824

MANIFESTH A D01-Feb-2005492 3029

META.ymlH A D14-Apr-2005375 1311

Makefile.PLH A D12-Apr-2005457 2212

READMEH A D14-Apr-20054.3 KiB196124

README

1package Apache::Clean;
2
3use 5.008;
4
5use Apache2::Filter ();      # $f
6use Apache2::RequestRec ();  # $r
7use Apache2::RequestUtil (); # $r->dir_config()
8use Apache2::Log ();         # $log->info()
9use APR::Table ();          # dir_config->get() and headers_out->get()
10
11use Apache2::Const -compile => qw(OK DECLINED);
12
13use HTML::Clean ();
14
15use strict;
16
17our $VERSION = '2.00_6';
18
19sub handler {
20
21  my $f   = shift;
22
23  my $r   = $f->r;
24
25  my $log = $r->server->log;
26
27  # we only process HTML documents
28  unless ($r->content_type =~ m!text/html!i) {
29    $log->info('skipping request to ', $r->uri, ' (not an HTML document)');
30
31    return Apache2::Const::DECLINED;
32  }
33
34  my $context;
35
36  unless ($f->ctx) {
37    # these are things we only want to do once no matter how
38    # many times our filter is invoked per request
39
40    # parse the configuration options
41    my $level = $r->dir_config->get('CleanLevel') || 1;
42
43    my %options = map { $_ => 1 } $r->dir_config->get('CleanOption');
44
45    # store the configuration
46    $context = { level   => $level,
47                 options => \%options,
48                 extra   => undef };
49
50    # output filters that alter content are responsible for removing
51    # the Content-Length header, but we only need to do this once.
52    $r->headers_out->unset('Content-Length');
53  }
54
55  # retrieve the filter context, which was set up on the first invocation
56  $context ||= $f->ctx;
57
58  # now, filter the content
59  while ($f->read(my $buffer, 1024)) {
60
61    # prepend any tags leftover from the last buffer or invocation
62    $buffer = $context->{extra} . $buffer if $context->{extra};
63
64    # if our buffer ends in a split tag ('<strong' for example)
65    # save processing the tag for later
66    if (($context->{extra}) = $buffer =~ m/(<[^>]*)$/) {
67      $buffer = substr($buffer, 0, - length($context->{extra}));
68    }
69
70    my $h = HTML::Clean->new(\$buffer);
71
72    $h->level($context->{level});
73
74    $h->strip($context->{options});
75
76    $f->print(${$h->data});
77  }
78
79  if ($f->seen_eos) {
80    # we've seen the end of the data stream
81
82    # print any leftover data
83    $f->print($context->{extra}) if $context->{extra};
84  }
85  else {
86    # there's more data to come
87
88    # store the filter context, including any leftover data
89    # in the 'extra' key
90    $f->ctx($context);
91  }
92
93  return Apache2::Const::OK;
94}
95
961;
97
98__END__
99
100=head1 NAME
101
102Apache::Clean - interface into HTML::Clean for mod_perl 2.0
103
104=head1 SYNOPSIS
105
106httpd.conf:
107
108  PerlModule Apache::Clean
109
110  Alias /clean /usr/local/apache2/htdocs
111  <Location /clean>
112    PerlOutputFilterHandler Apache::Clean
113
114    PerlSetVar CleanOption shortertags
115    PerlAddVar CleanOption whitespace
116  </Location>
117
118=head1 DESCRIPTION
119
120Apache::Clean uses HTML::Clean to tidy up large, messy HTML, saving
121bandwidth.
122
123Only documents with a content type of "text/html" are affected - all
124others are passed through unaltered.
125
126For more information, see
127
128  http://www.perl.com/pub/a/2003/04/17/filters.html
129
130=head1 OPTIONS
131
132Apache::Clean supports few options - all of which are based on
133options from HTML::Clean.  Apache::Clean will only tidy up whitespace
134(via $h->strip) and will not perform other options of HTML::Clean
135(such as browser compatibility).  See the HTML::Clean manpage for
136details.
137
138=over 4
139
140=item CleanLevel
141
142sets the clean level, which is passed to the level() method
143in HTML::Clean.
144
145  PerlSetVar CleanLevel 9
146
147CleanLevel defaults to 1.
148
149=item CleanOption
150
151specifies the set of options which are passed to the options()
152method in HTML::Clean - see the HTML::Clean manpage for a complete
153list of options.
154
155  PerlSetVar CleanOption shortertags
156  PerlAddVar CleanOption whitespace
157
158CleanOption has no default.
159
160=back
161
162=head1 NOTES
163
164This is alpha software, and as such has not been tested on multiple
165platforms or environments.
166
167=head1 FEATURES/BUGS
168
169probably lots - this is the preliminary port to mod_perl 2.0
170
171=head1 SEE ALSO
172
173perl(1), mod_perl(3), Apache(3), HTML::Clean(3)
174
175=head1 AUTHOR
176
177Geoffrey Young <geoff@modperlcookbook.org>
178
179=head1 COPYRIGHT
180
181Copyright (c) 2005, Geoffrey Young
182All rights reserved.
183
184This module is free software.  It may be used, redistributed
185and/or modified under the same terms as Perl itself.
186
187=head1 HISTORY
188
189This code is derived from the Cookbook::Clean and
190Cookbook::TestMe modules available as part of
191"The mod_perl Developer's Cookbook".
192
193For more information, visit http://www.modperlcookbook.org/
194
195=cut
196