1#!/usr/bin/perl
2#
3# zip_policy.pl 0.02
4#
5# Enforces file name and encrypted content policy inside ZIP archives.
6# Exits with return value of 1 if banned or encrypted content is
7# found. DOES NOT modify the zip file... use this to reject or
8# remove an entire ZIP file attachment that has banned content.
9#
10# For use with Anomy Sanitizer 1.56+ (http://mailtools.anomy.net/)
11#
12#    Copyright 2004 Advosys Consulting Inc., Ottawa
13#
14#    This program is free software; you can redistribute it and/or modify
15#    it under the terms of the GNU General Public License as published by
16#    the Free Software Foundation; either version 2 of the License, or
17#    (at your option) any later version.
18#
19#    This program is distributed in the hope that it will be useful,
20#    but WITHOUT ANY WARRANTY; without even the implied warranty of
21#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
22#    GNU General Public License for more details.
23#
24# Use this as the scanner for attachments named "*.zip"
25#
26# Requires the following Perl modules:
27#	- Archive::Zip v0.12 or later
28#   - Additional modules required by Archive::Zip (see Required Modules
29#     section of Archive::Zip documentation)
30#
31# Example uses:
32#
33# USAGE ONE: You use Anomy only to enforce e-mail attachment policies
34# and rely on an external downstream SMTP anti-virus product to actually
35# scan e-mail for malware:
36#
37#    file_list_1 = (?i)(\.zip\s*)
38#    file_list_1_policy = accept:drop:drop:drop
39#    file_list_1_scanner = 0:::/usr/local/bin/zip_policy.pl %FILENAME
40#
41# Note: policy "accept" tells Anomy to skip all remaining rules.
42#
43# USAGE TWO: You use Anomy to both enforce e-mail attachment policies
44# and also to use a command-line antivirus product to look for for malware:
45#
46#    file_list_1 = (?i)(\.zip\s*)
47#    file_list_1_policy = unknown:drop:drop:drop
48#    file_list_1_scanner = 0:::/usr/local/bin/zip_policy.pl %FILENAME
49#
50#    file_list_2 = .*
51#    file_list_2_policy = accept:accept:save:save
52#    file_list_2_scanner = 0:5:3,4:/usr/local/bin/avp.sh %FILENAME
53#
54# Note: Policy "unknown" tells Anomy to invoke the next rule, so a zip
55# accepted by the policy filter will still be scanned for viruses. DO
56# NOT USE "accept" for zip_policy.pl or the virus scanner will never be called!
57#
58# IMPLEMENTATION NOTES:
59#
60# Detecting encrypted files:
61# 	If the ZIP uses the newer encryption schemes created by Winzip
62# 	or PKWare's PKZip, this program will either detect it
63# 	or die horribly with exit code 255.
64# 	Set up Anomy to reject any exit code other than 1 to be safe.
65#
66# Multi-volume archives
67# 	ZIP files created as disk-spanning (muti-volume) will
68# 	be rejected. Exit code will be 255
69#
70# Changelog:
71#   v0.01 Initial release
72#   v0.02 Regexp now case insensitive and looks for trailing whitespace.
73#         Checks for Zip::Archive > .11 before using eocdOffset.
74
75
76
77# USER SETTINGS:
78
79# Regular expression of files not permitted in ZIP:
80my $filelist = '(?i)\.([23]86|vb[se]|jse|cpl|crt|chm|cpl|in[fsi]|isp|dll|drv|msi|cmd|sc[rt]|sys|bat';
81$filelist .= '|pif|lnk|hlp|ms[cip]|reg|asd|sh[bs]|app|ocx|htt|hta|mht|url|exe|ws[cfh]|ops|com';
82$filelist .= '|mim|uue?|b64|b[hq]x|xxe)\s*';
83
84# Permit encrypted (and therefore UNSCANNABLE) content?
85my $ForbidEncrypted = 1;
86
87# Maximum depth for zips that contain zips.
88# 5 is usually enough. Do not set too high
89# or you could be DoS'd by a zip bomb.
90my $MaxZipDepth = 5;
91
92# Maximum file size to attempt to extract (bytes)
93# Again, do not set too high or you can be attacked
94# by a zip bomb.
95my $MaxFileSize = 104857600;
96
97# Directory for temp files
98# (Suggestion: use a tmpfs partition if possible)
99my $tmpdir = '/var/tmp';
100
101# Set $DEBUG =1 for verbose output on STDERR:
102my $DEBUG = 1;
103
104# Fill in the following to send STDOUT to a file (for debugging only):
105my $LOGFILE = '';
106
107# ---------------------------------------
108# Few user servicable parts below
109
110# Force all variables to be declared:
111use strict;
112
113# Redirect STDERR to a file if required:
114open(STDERR, ">>$LOGFILE") or die "Cannot open log file $LOGFILE: $!\n" if $LOGFILE;
115
116# Import the required libraries:
117use Archive::Zip qw( :ERROR_CODES :CONSTANTS );
118
119# Declare global variables:
120my $curdepth = 0;	# Current recursion depth
121my $crypted = 0;	# Total encrypted files seen
122my $disallowed = 0;	# Total files matching regular expression
123
124# Get name of zip file from parameter 1 on command line:
125my $zipfilename = shift or die "Usage: $0 /path/to/filename.zip\n";
126
127# Do a few basic checks on parameter:
128unless ( -f $zipfilename ) {
129	die "Cannot find file '$zipfilename'\n";
130}
131
132# Call the main routine:
133checkzip($zipfilename);
134
135# Determine results:
136print STDERR "Forbidden files = $disallowed, Encrypted = $crypted\n" if $DEBUG;
137if ( $disallowed or ($crypted and $ForbidEncrypted) ) {
138	exit 1;
139}
140
141exit 0;
142
143## Subroutines:
144
145sub checkzip {
146# Check contents of zip file for:
147#   - files matching regular expression
148#   - Encrypted files
149#   - Multivolume ZIP headers
150
151	# Define local variables:
152	my ($tmpname, $fh, $element, $elementname, $status, $tempname);
153
154	# Grab file name from parameter 1
155	my $zipfilename = shift;
156
157	# Increment recursion counter:
158	$curdepth++;
159
160	print STDERR "Opening $zipfilename\n" if $DEBUG;
161	my $zip = Archive::Zip->new();
162	die "Error reading $zipfilename\n" unless $zip->read( $zipfilename ) == AZ_OK;
163	my @members = $zip->members();
164
165	# Abort if this is a multi-disk zip:
166	die "Multi-volume zip file. Cannot process.\n" if $zip->diskNumberWithStartOfCentralDirectory();
167
168	# Warn about file weirdness (characters prepended to zip, such as a virus would do):
169	if ( $zip->VERSION > .11 and $zip->eocdOffset())
170		{
171			warn "Zip directory offset doesn't match file! Possibly altered.\n";
172	}
173
174	# Tally total files matching regexp:
175	$disallowed += $zip->membersMatching( $filelist );
176
177	# Now scan through each ZIP file member looking for
178	# embedded ZIP files and encrypted files
179	foreach $element (@members) {
180		# Increment number of encrypted elements:
181		$crypted++ if $element->isEncrypted();
182		$elementname = $element->fileName();
183
184		# Print internal filename and "(+)" if encrypted:
185		print STDERR " $elementname", $element->isEncrypted() ? ' (+)': '', "\n" if $DEBUG;
186
187		# If member filename ends in .ZIP, call this routine again (up to MaxZipDepth times):
188		if ( $elementname =~ /\.zip\s*$/i ) {
189			if ( $curdepth < $MaxZipDepth ) {
190				# Check uncompressed file size:
191				die "Embedded ZIP '$elementname' too large to inspect.\n" if $element->uncompressedSize() > $MaxFileSize;
192				# Unzip to a temp file:
193				# (Yes, using Archive::Zip::tempName would be better but doesn't seem to
194				# exist in Archive::Zip 1.09)
195				($fh, $tmpname) = Archive::Zip::tempFile( $tmpdir );
196				$status = $element->extractToFileNamed( $tmpname );
197				die "error $status" unless $status == AZ_OK;
198				$fh->close;
199				# Remember this temp file name:
200				# Recursively call this subroutine to check this embedded zip:
201				checkzip($tmpname);
202				# ...and finally try to tidy the temp file:
203				unlink($tmpname) or warn "Unable to delete tempfile $tmpname\n";
204			} else {
205				die "ZIP file exceeded depth of $MaxZipDepth.\n";
206			}
207		}
208	}
209	$curdepth--;
210}
211