README
1package Apache::Clean;
2
3use 5.008;
4
5use Apache2::Filter (); # $f
6use Apache2::RequestRec (); # $r
7use Apache2::RequestUtil (); # $r->dir_config()
8use Apache2::Log (); # $log->info()
9use APR::Table (); # dir_config->get() and headers_out->get()
10
11use Apache2::Const -compile => qw(OK DECLINED);
12
13use HTML::Clean ();
14
15use strict;
16
17our $VERSION = '2.00_6';
18
19sub handler {
20
21 my $f = shift;
22
23 my $r = $f->r;
24
25 my $log = $r->server->log;
26
27 # we only process HTML documents
28 unless ($r->content_type =~ m!text/html!i) {
29 $log->info('skipping request to ', $r->uri, ' (not an HTML document)');
30
31 return Apache2::Const::DECLINED;
32 }
33
34 my $context;
35
36 unless ($f->ctx) {
37 # these are things we only want to do once no matter how
38 # many times our filter is invoked per request
39
40 # parse the configuration options
41 my $level = $r->dir_config->get('CleanLevel') || 1;
42
43 my %options = map { $_ => 1 } $r->dir_config->get('CleanOption');
44
45 # store the configuration
46 $context = { level => $level,
47 options => \%options,
48 extra => undef };
49
50 # output filters that alter content are responsible for removing
51 # the Content-Length header, but we only need to do this once.
52 $r->headers_out->unset('Content-Length');
53 }
54
55 # retrieve the filter context, which was set up on the first invocation
56 $context ||= $f->ctx;
57
58 # now, filter the content
59 while ($f->read(my $buffer, 1024)) {
60
61 # prepend any tags leftover from the last buffer or invocation
62 $buffer = $context->{extra} . $buffer if $context->{extra};
63
64 # if our buffer ends in a split tag ('<strong' for example)
65 # save processing the tag for later
66 if (($context->{extra}) = $buffer =~ m/(<[^>]*)$/) {
67 $buffer = substr($buffer, 0, - length($context->{extra}));
68 }
69
70 my $h = HTML::Clean->new(\$buffer);
71
72 $h->level($context->{level});
73
74 $h->strip($context->{options});
75
76 $f->print(${$h->data});
77 }
78
79 if ($f->seen_eos) {
80 # we've seen the end of the data stream
81
82 # print any leftover data
83 $f->print($context->{extra}) if $context->{extra};
84 }
85 else {
86 # there's more data to come
87
88 # store the filter context, including any leftover data
89 # in the 'extra' key
90 $f->ctx($context);
91 }
92
93 return Apache2::Const::OK;
94}
95
961;
97
98__END__
99
100=head1 NAME
101
102Apache::Clean - interface into HTML::Clean for mod_perl 2.0
103
104=head1 SYNOPSIS
105
106httpd.conf:
107
108 PerlModule Apache::Clean
109
110 Alias /clean /usr/local/apache2/htdocs
111 <Location /clean>
112 PerlOutputFilterHandler Apache::Clean
113
114 PerlSetVar CleanOption shortertags
115 PerlAddVar CleanOption whitespace
116 </Location>
117
118=head1 DESCRIPTION
119
120Apache::Clean uses HTML::Clean to tidy up large, messy HTML, saving
121bandwidth.
122
123Only documents with a content type of "text/html" are affected - all
124others are passed through unaltered.
125
126For more information, see
127
128 http://www.perl.com/pub/a/2003/04/17/filters.html
129
130=head1 OPTIONS
131
132Apache::Clean supports few options - all of which are based on
133options from HTML::Clean. Apache::Clean will only tidy up whitespace
134(via $h->strip) and will not perform other options of HTML::Clean
135(such as browser compatibility). See the HTML::Clean manpage for
136details.
137
138=over 4
139
140=item CleanLevel
141
142sets the clean level, which is passed to the level() method
143in HTML::Clean.
144
145 PerlSetVar CleanLevel 9
146
147CleanLevel defaults to 1.
148
149=item CleanOption
150
151specifies the set of options which are passed to the options()
152method in HTML::Clean - see the HTML::Clean manpage for a complete
153list of options.
154
155 PerlSetVar CleanOption shortertags
156 PerlAddVar CleanOption whitespace
157
158CleanOption has no default.
159
160=back
161
162=head1 NOTES
163
164This is alpha software, and as such has not been tested on multiple
165platforms or environments.
166
167=head1 FEATURES/BUGS
168
169probably lots - this is the preliminary port to mod_perl 2.0
170
171=head1 SEE ALSO
172
173perl(1), mod_perl(3), Apache(3), HTML::Clean(3)
174
175=head1 AUTHOR
176
177Geoffrey Young <geoff@modperlcookbook.org>
178
179=head1 COPYRIGHT
180
181Copyright (c) 2005, Geoffrey Young
182All rights reserved.
183
184This module is free software. It may be used, redistributed
185and/or modified under the same terms as Perl itself.
186
187=head1 HISTORY
188
189This code is derived from the Cookbook::Clean and
190Cookbook::TestMe modules available as part of
191"The mod_perl Developer's Cookbook".
192
193For more information, visit http://www.modperlcookbook.org/
194
195=cut
196