1# MIMEPARSER.RC
2# Process messages that have MIME-Version: 1.0 in the header
3
4# Define a temporary directory where we will save files
5# during the MIME autopsy.
6MESSAGETMPDIR="${SBTEMP}/${LOGNAME}.message.${LOCALRANDOM}"
7
8# You will have to consider /tmp race conditions or disk space
9# if you point SBTEMP at directory other than /tmp
10:0
11* $ ? ${TEST} -d ${SBTEMP}
12{
13 OLDUMASK=${UMASK}
14 # Make sure the ${MESSAGETMPDIR} directory is private for the user in question.
15 UMASK=007
16 :0
17 * $ ? ${MKDIR} ${MESSAGETMPDIR}
18 {
19  SBLOG="L5-Creating temp directory: \"${MESSAGETMPDIR}\""
20  INCLUDERC=${SBDIR}/functions/loglevel.rc
21 }
22 :0 E
23 {
24  SBLOG="L1-Error: Failed to create temp directory:\"${MESSAGETMPDIR}\""
25  INCLUDERC=${SBDIR}/functions/loglevel.rc
26  MESSAGETMPDIR=''
27 }
28 UMASK=${OLDUMASK}
29}
30
31# If we don't have a /tmp directory and a MIME header, then bail.
32# Otherwise proceed with the mime autopsy.
33MIME=no
34MIMEVERSION=`${FORMAIL} -cX "MIME-Version:" |${SED} -e 's/[ 	]\{1,\}/ /g;s/ $//'`
35:0
36* ! MESSAGETMPDIR ?? ^^^^
37* MIMEVERSION ?? ^MIME-Version: 1\.0$
38{
39 MIME=yes
40 SBLOG="L5-Begin MIME Message Body Autopsy"
41 INCLUDERC=${SBDIR}/functions/loglevel.rc
42
43 # Save a copy of existing spambouncer headers for later restoration.
44 :0hc:
45 |${SED} -ne '/^X-S\(pamBouncer\|B\(Rule\|Stop\|Pass\|Note\|Class\|Score\|Pattern\)\):.\{1,\}$/p' >>${MESSAGETMPDIR}/sbheaders-saved-before-autopsy.eml
46
47 # Strip any existing spambouncer headers.
48 :0hf
49 |${SED} -e '/^X-S\(pamBouncer\|B\(Rule\|Stop\|Pass\|Note\|Class\|Score\|Pattern\)\):.\{1,\}$/d'
50
51 # Save a copy of the entire original message before we start the autopsy.
52 # The message will be restored from this file later after
53 # the mime message body autopsy is complete.
54 :0c:
55 ${MESSAGETMPDIR}/message-original.eml
56
57 # Unfold all the headers, replace multiple tabs and spaces with single spaces
58 # Strip all trailing spaces, and restore the newline at end of headers.
59 # Doing this makes header parsing less complicated when using regular expressions.
60 # We'll restore the headers later to their original condition from the saved file.
61 :0 hf
62 |${FORMAIL} -cX "" |${SED} -e 's/[ 	]\{1,\}/ /g;s/ $//;$s/$/\n/'
63
64 # Begin the mime Message body autopsy
65 :0
66 * MIME ?? ^yes$
67 {
68  MIMELEVEL=0
69
70  # level is the internal representation of mime part recursion
71  LOCALLEVEL=${MIMELEVEL}
72  :0
73  * LOCALLEVEL ?? ^0$
74  { LOCALCONTENT=`${FORMAIL} -cX "Content-Type:"` }
75  :0 E
76  { LOCALCONTENT=`${CAT} ${MESSAGETMPDIR}/MIME_${LOCALLEVEL} |${FORMAIL} -cX "Content-Type:"` }
77
78  SBLOG="L7-LOCALCONTENT=${LOCALCONTENT}"
79  INCLUDERC=${SBDIR}/functions/loglevel.rc
80
81  LOCALBOUNDARY=''
82  # Some mime messages quote the boundary
83  # Note: Per RFC 2046 para 5.1 MIME 1.0 boundary tags must be 7bit US ASCII
84  # If they contain 8bit characters, they are not following RFC 2046
85  :0
86  * LOCALCONTENT ?? ^[Cc][Oo][Nn][Tt][Ee][Nn][Tt]-[Tt][Yy][Pp][Ee]: [Mm][Uu][Ll][Tt][Ii][Pp][Aa][Rr][Tt]/.+;( )?[Bb][Oo][Uu][Nn][Dd][Aa][Rr][Yy]=".+"(;.*)?$
87  * LOCALCONTENT ?? ^[Cc][Oo][Nn][Tt][Ee][Nn][Tt]-[Tt][Yy][Pp][Ee]: [Mm][Uu][Ll][Tt][Ii][Pp][Aa][Rr][Tt]/.+;( )?[Bb][Oo][Uu][Nn][Dd][Aa][Rr][Yy]="[^"]+"(;.*)?$
88  * LOCALCONTENT ?? ^[Cc][Oo][Nn][Tt][Ee][Nn][Tt]-[Tt][Yy][Pp][Ee]: [Mm][Uu][Ll][Tt][Ii][Pp][Aa][Rr][Tt]/.+;( )?[Bb][Oo][Uu][Nn][Dd][Aa][Rr][Yy]="\/[^"]+
89  { LOCALBOUNDARY=$MATCH }
90
91  # Some mime messages don't quote the boundary
92  # Note: Unquoted boundary tags can not contain ; or :
93  :0 E
94  * LOCALCONTENT ?? ^[Cc][Oo][Nn][Tt][Ee][Nn][Tt]-[Tt][Yy][Pp][Ee]: [Mm][Uu][Ll][Tt][Ii][Pp][Aa][Rr][Tt]/.+;( )?[Bb][Oo][Uu][Nn][Dd][Aa][Rr][Yy]=[^"]+(;.*)?$
95  * LOCALCONTENT ?? ^[Cc][Oo][Nn][Tt][Ee][Nn][Tt]-[Tt][Yy][Pp][Ee]: [Mm][Uu][Ll][Tt][Ii][Pp][Aa][Rr][Tt]/.+;( )?[Bb][Oo][Uu][Nn][Dd][Aa][Rr][Yy]=\/[^";:]+
96  { LOCALBOUNDARY=$MATCH }
97
98  # No boundary tag was found for the MIME message body parts
99  # so treat the body itself as a MIME part.
100  :0
101  * LOCALBOUNDARY ?? ^^^^
102  { LOCALBOUNDARY='NONE' }
103
104  :0
105  * LOCALBOUNDARY ?? ^NONE$
106  {
107   SBLOG="L7-LOCALBOUNDARY=${LOCALBOUNDARY} using \"SpamBouncer_Autopsy_Mime_Boundary\""
108   INCLUDERC=${SBDIR}/functions/loglevel.rc
109  }
110  :0 E
111  {
112   SBLOG="L7-LOCALBOUNDARY=${LOCALBOUNDARY}"
113   INCLUDERC=${SBDIR}/functions/loglevel.rc
114  }
115  # Save the message body into it's own file for recursive autopsy
116  :0 bc:
117  * ! LOCALBOUNDARY ?? ^NONE$
118  ${MESSAGETMPDIR}/MIME_1
119
120  # The message body was not encapsulated in a mime Boundary, so we'll create one
121  # on the fly for the autopsy
122  :0 E
123  {
124   LOCALBOUNDARY='SpamBouncer_Autopsy_Mime_Boundary'
125   LOG=`(${ECHO} "";${ECHO} "--SpamBouncer_Autopsy_Mime_Boundary") >${MESSAGETMPDIR}/MIME_1`
126   LOG=`(${FORMAIL} -cX "Content-";${ECHO} "") >>${MESSAGETMPDIR}/MIME_1`
127
128   :0 bc:
129   ${MESSAGETMPDIR}/MIME_1
130
131   LOG=`(${ECHO} "";${ECHO} "--SpamBouncer_Autopsy_Mime_Boundary--";${ECHO} "") >>${MESSAGETMPDIR}/MIME_1`
132  }
133
134  # Named sed script that makes things more readable. Might take it out
135  # or move it to sb-config-defaults.rc
136  SPACES_TABS_TO_SPACES='s/[ 	]\{1,\}/ /g;s/ $//'
137
138  # Set the initial recursion states: Don't change these.
139  PART=1
140  OUTLINE=1
141  RECURSLEVEL=1
142
143  # This limit can be increased if you want. It's there to cause the recursion to limit itself so
144  # it doesn't get stuck in an infiniate loop in case something breaks.
145  # Note: Each recursion will fork a new process causing the parent process(s) to wait
146  # until all the child process(s) are finished.
147  # You'll not likely ever see any mime message with more than 10 nested mime parts.
148  RECURSELIMIT=3
149
150  SBLOG="L7-Entering file: ${SBDIR}/functions/mimeparts.sh"
151  INCLUDERC=${SBDIR}/functions/loglevel.rc
152
153  # Don't change this unless you know what you are doing or things will likely break.
154  # This calls a custom /bin/sh script that recursively calls itself to find all the mime parts.
155  # The command line arguments to mimeparts.sh become local variables in each recursion instance.
156  SBPID="$$"
157  LOG=`export SED ECHO NL DECODEBASE64 SBLOGLEVEL GREP EGREP SBDIR SPACES_TABS_TO_SPACES MESSAGETMPDIR WC SBPID DATE HOST LOGFILE SBLOGFILE SBHEADERS HASHCOMMAND;\
158       ${SBDIR}/functions/mimeparts.sh "MIME_${OUTLINE}" "${OUTLINE}" "${PART}" "${RECURSLEVEL}" "${RECURSELIMIT}" "${LOCALBOUNDARY}"
159  `
160  # procmail strippes the last newline from the above script, so add it back.
161  :0
162  * SBLOGLEVEL ?? ^[789]$
163  { LOG="${NL}" }
164  SBLOG="L7-Leaving file: ${SBDIR}/functions/mimeparts.sh"
165  INCLUDERC=${SBDIR}/functions/loglevel.rc
166 }
167
168 # We should have a new file ${MESSAGETMPDIR}/plain_body_files_to_parse.txt
169 # which contains a list of mime body part filenames that were determined
170 # to contain parsable text/html content and if they were base64 encoded
171 # were decoded on the fly.
172
173 # So we are now going to replace the message body
174 # with the contents of these files, send it through the body
175 # pattern matching filters, then restore the original message
176 # from the save. Note: This might break if you run out of disk space!
177 # so disk cleanup is going to have to be done.
178
179 :0
180 * $ ? ${TEST} -f ${MESSAGETMPDIR}/plain_body_files_to_parse.txt
181 {
182  FILELIST=`${CAT} ${MESSAGETMPDIR}/plain_body_files_to_parse.txt`
183  # Note: this filter uses for, do and done which are /bin/sh or /bin/bash
184  # builtin commands. You won't find them on the filesystem.
185  #
186  # This filter rebuilds the message body in X-SpamBouncer-Autopsy: format
187  # by temporarily replacing the original message body in the pipeline.
188  #
189  # The last part of this filter squeezes the parts by
190  # converting LFCR to LF, replaces multiple tabs and
191  # spaces with single spaces, squeezes multiple blank lines one
192  # blank line and removes any leading or trailing spaces from each line.
193  # Then checks for mailbox message delimeters and escapes them.
194  #
195  # This is to simplify message body pattern matching on the autopsy copy.
196  # This could also be used to remove any non-printable characters and/or replace
197  # them with quoted printable characters.
198  :0bf
199  |${SED} '1,$d' |\
200   ${ECHO} "X-SpamBouncer-Autopsy: Begin:";\
201  (for i in ${FILELIST};\
202   do (\
203        ${ECHO};\
204        ${ECHO} "X-SpamBouncer-Autopsy: File: \"${i}\"";\
205	${ECHO};\
206        ${CAT} $i\
207      ) done;\
208   ${ECHO};\
209   ${ECHO} "X-SpamBouncer-Autopsy: End") |\
210   ${TR} "\012\015" "\012" |\
211   ${SED} -e 's/[ 	]\{1,\}/ /g;s/^ $//' |\
212   ${CAT} -s |\
213   ${SED} -e 's/^ //;s/ $//' |\
214   ${SED} -e 's/^From /\\&/;s/^\.$/\\&/'
215
216  # This is for debugging and may be going away or surpressed if
217  # the log level isn't high enough. It's a copy of what is in the
218  # pipeline that will be used next for the message body domain and
219  # IP extraction recipes and the pattern matching recipes.
220  SBLOG="L5-Saving: \"${MESSAGETMPDIR}/message_autopsy.eml\""
221  INCLUDERC=${SBDIR}/functions/loglevel.rc
222  :0c:
223  ${MESSAGETMPDIR}/message_autopsy.eml
224
225  # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
226  # Do spambouncer body parsing stuff here on the edited message body.
227  # When all pattern matching and extraction is done, restore the
228  # original message body.
229  # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
230
231  # The following will also call sb6.rc for the body pattern matching recipes.
232  INCLUDERC=${SBDIR}/functions/extract-body-info.rc
233
234  # Split out and save any SpamBouncer headers resulting from the pattern matching
235  # so we can append the new headers to the message in the main pipe.
236  :0
237  * H ?? ^X-S(pamBouncer|B(Rule|Stop|Pass|Note|Class|Score|Pattern)):.+$
238  { LOG=`${FORMAIL} -cX "" |${SED} -e '/^X-S\(pamBouncer\|B\(Rule\|Stop\|Pass\|Note\|Class\|Score\|Pattern\)\):.\{1,\}$/!d' >${MESSAGETMPDIR}/sbheaders-saved-after-autopsy` }
239 }
240
241 # else Nothing was found during the MIME message body autopsy that was worth parsing.
242 :0 E
243 { MIME=no }
244
245 SBLOG="L7-Restoring original message to pipeline from ${MESSAGETMPDIR}/message-original.eml"
246 INCLUDERC=${SBDIR}/functions/loglevel.rc
247 # Restore the message from the file saved copy of the original.
248 :0 hbf
249 |${SED} -e '1,$d';(${CAT} ${MESSAGETMPDIR}/message-original.eml)
250
251 # Restore the SpamBouncer headers from the file saved copy from before the autopsy.
252 :0
253 * $ ? ${TEST} -f ${MESSAGETMPDIR}/sbheaders-saved-before-autopsy.eml
254 {
255  SBLOG="L7-Restoring saved spambouncer headers from ${MESSAGETMPDIR}/sbheaders-saved-before-autopsy.eml"
256  INCLUDERC=${SBDIR}/functions/loglevel.rc
257  :0 hf
258  |${FORMAIL} -X "";(${CAT} "${MESSAGETMPDIR}/sbheaders-saved-before-autopsy.eml";${ECHO})
259 }
260
261 # Append any new headers added during autopsy
262 :0
263 * $ ? ${TEST} -f ${MESSAGETMPDIR}/sbheaders-created-during-autopsy
264 {
265  SBLOG="L7-Adding spambouncer headers from ${MESSAGETMPDIR}/sbheaders-created-during-autopsy"
266  INCLUDERC=${SBDIR}/functions/loglevel.rc
267  :0 hf
268  |${FORMAIL} -X "";(${CAT} "${MESSAGETMPDIR}/sbheaders-created-during-autopsy";${ECHO})
269 }
270
271 # Append any new headers resulting from the pattern matching filters.
272 :0
273 * $ ? ${TEST} -f ${MESSAGETMPDIR}/sbheaders-saved-after-autopsy
274 {
275  SBLOG="L7-Adding spambouncer headers from ${MESSAGETMPDIR}/sbheaders-saved-after-autopsy"
276  INCLUDERC=${SBDIR}/functions/loglevel.rc
277  :0 hf
278  |${FORMAIL} -X "";(${CAT} "${MESSAGETMPDIR}/sbheaders-saved-after-autopsy";${ECHO})
279 }
280}
281
282# Fallback to old method.
283# We either don't have a valid /tmp directory.
284# Or it's not a MIME 1.0 message.
285:0
286* 1^0 MESSAGETMPDIR ?? ^^^^
287* 1^0 MIME ?? ^no$
288{
289 SBLOG="L3-Using fallback method for body pattern matching.${NL}"
290 INCLUDERC=${SBDIR}/functions/loglevel.rc
291 #The following will also call sb6.rc
292 INCLUDERC=${SBDIR}/functions/extract-body-info.rc
293}
294
295# Cleanup, allow the user to set
296# KEEPAUTOPSIES from 5 - 999 otherwise set it to 20
297# remove anything else.
298:0
299* KEEPAUTOPSIES ?? ^^^^
300{ KEEPAUTOPSIES=20 }
301
302:0
303* ! KEEPAUTOPSIES ?? ^([5-9]|[1-9][0-9]|[1-9][0-9][0-9])$
304{ KEEPAUTOPSIES=20 }
305
306# This collects the autopsy directory file names from SBTEMP,
307# removes the newest 20 from the list by default, then limits it
308# to delete the 50 oldest directories at a time from what remains in the
309# list so we don't overload the variable in situations where there
310# are hundreds of of directories if the user had it set to 999,
311# then lowered it, and LINEBUF isn't set that high.
312#
313# Note: the last sed command implements the equivelent of
314# "tail -n 50" using a sed script in place of tail.
315# not all systems SpamBouncer runs on may have the "tail" command
316# so doing it this way is more portable for legacy systems runing older
317# versions of Unix. This is documented in the sed info pages under
318# examles:
319CLEANUPDIRECTORIES=`${CD} ${SBTEMP} && ${LS} -td ${LOGNAME}.message.*.* 2>/dev/null |\
320	${SED} -e "1,${KEEPAUTOPSIES}d" |\
321	${SED} -e '1h;2,50 {; H; g; };$q;1,49d;N;D'
322	`
323
324# LOG="[$$]`basename $_`:CLEANUPDIRECTORIES=\"${CLEANUPDIRECTORIES}\"${NL}"
325
326# Sanity checks
327# Since we are doing a recursive rm, double check to make
328# sure we are in an acceptable directory for this.
329# Both MESSAGETMPDIR and CLEANUPDIRECTORIES includes the user's LOGNAME
330# and CLEANUPDIRECTORIES is not empty and contains the word message
331# nor is it set to "/" "." or ".." or contain any obvious
332# wild cards in the pattern.
333:0
334* SBTEMP ?? ^(.*/te?mp/?)
335* $ ? ${TEST} -d ${SBTEMP}
336* $ MESSAGETMPDIR ?? ^.*${LOGNAME}.*$
337* CLEANUPDIRECTORIES ?? ^.*message.*$
338* ! CLEANUPDIRECTORIES ?? ^^^^
339* ! CLEANUPDIRECTORIES ?? ^(/|\.|\.\.|.*([/]|\*).*)$
340{
341 SBLOG="L5-Removing stale autopsy directories in ${SBTEMP}"
342 INCLUDERC=${SBDIR}/functions/loglevel.rc
343 :0 ic
344 |${SED} -e '1,$d';(${CD} ${SBTEMP} && ${RM} -r dummy `${ECHO} "${CLEANUPDIRECTORIES}"` 2>/dev/null ) >>/dev/null
345}
346:0 E
347* ! CLEANUPDIRECTORIES ?? ^^^^
348{
349 SBLOG="L1-Error: mimeparser.rc: Failed to remove remporary autopsy directorie(s) from ${SBTEMP}"
350 INCLUDERC=${SBDIR}/functions/loglevel.rc
351}
352
353# This will likely be removed eventually. It's for debugging and only
354# triggers if the SBLOGLEVEL is high enough.
355# Save another copy of the message after the restorations are complete.
356# This is to verify the above operations performed as intended.
357:0
358* SBLOGLEVEL ?? ^[789]$
359* MIME ?? ^yes$
360{
361 SBLOG="L7-Debug[${SBLOGLEVEL}]: Saving ${MESSAGETMPDIR}/message_debug.eml"
362 INCLUDERC=${SBDIR}/functions/loglevel.rc
363 :0c:
364 ${MESSAGETMPDIR}/message_debug.eml
365}
366