1# MIMEPARSER.RC 2# Process messages that have MIME-Version: 1.0 in the header 3 4# Define a temporary directory where we will save files 5# during the MIME autopsy. 6MESSAGETMPDIR="${SBTEMP}/${LOGNAME}.message.${LOCALRANDOM}" 7 8# You will have to consider /tmp race conditions or disk space 9# if you point SBTEMP at directory other than /tmp 10:0 11* $ ? ${TEST} -d ${SBTEMP} 12{ 13 OLDUMASK=${UMASK} 14 # Make sure the ${MESSAGETMPDIR} directory is private for the user in question. 15 UMASK=007 16 :0 17 * $ ? ${MKDIR} ${MESSAGETMPDIR} 18 { 19 SBLOG="L5-Creating temp directory: \"${MESSAGETMPDIR}\"" 20 INCLUDERC=${SBDIR}/functions/loglevel.rc 21 } 22 :0 E 23 { 24 SBLOG="L1-Error: Failed to create temp directory:\"${MESSAGETMPDIR}\"" 25 INCLUDERC=${SBDIR}/functions/loglevel.rc 26 MESSAGETMPDIR='' 27 } 28 UMASK=${OLDUMASK} 29} 30 31# If we don't have a /tmp directory and a MIME header, then bail. 32# Otherwise proceed with the mime autopsy. 33MIME=no 34MIMEVERSION=`${FORMAIL} -cX "MIME-Version:" |${SED} -e 's/[ ]\{1,\}/ /g;s/ $//'` 35:0 36* ! MESSAGETMPDIR ?? ^^^^ 37* MIMEVERSION ?? ^MIME-Version: 1\.0$ 38{ 39 MIME=yes 40 SBLOG="L5-Begin MIME Message Body Autopsy" 41 INCLUDERC=${SBDIR}/functions/loglevel.rc 42 43 # Save a copy of existing spambouncer headers for later restoration. 44 :0hc: 45 |${SED} -ne '/^X-S\(pamBouncer\|B\(Rule\|Stop\|Pass\|Note\|Class\|Score\|Pattern\)\):.\{1,\}$/p' >>${MESSAGETMPDIR}/sbheaders-saved-before-autopsy.eml 46 47 # Strip any existing spambouncer headers. 48 :0hf 49 |${SED} -e '/^X-S\(pamBouncer\|B\(Rule\|Stop\|Pass\|Note\|Class\|Score\|Pattern\)\):.\{1,\}$/d' 50 51 # Save a copy of the entire original message before we start the autopsy. 52 # The message will be restored from this file later after 53 # the mime message body autopsy is complete. 54 :0c: 55 ${MESSAGETMPDIR}/message-original.eml 56 57 # Unfold all the headers, replace multiple tabs and spaces with single spaces 58 # Strip all trailing spaces, and restore the newline at end of headers. 59 # Doing this makes header parsing less complicated when using regular expressions. 60 # We'll restore the headers later to their original condition from the saved file. 61 :0 hf 62 |${FORMAIL} -cX "" |${SED} -e 's/[ ]\{1,\}/ /g;s/ $//;$s/$/\n/' 63 64 # Begin the mime Message body autopsy 65 :0 66 * MIME ?? ^yes$ 67 { 68 MIMELEVEL=0 69 70 # level is the internal representation of mime part recursion 71 LOCALLEVEL=${MIMELEVEL} 72 :0 73 * LOCALLEVEL ?? ^0$ 74 { LOCALCONTENT=`${FORMAIL} -cX "Content-Type:"` } 75 :0 E 76 { LOCALCONTENT=`${CAT} ${MESSAGETMPDIR}/MIME_${LOCALLEVEL} |${FORMAIL} -cX "Content-Type:"` } 77 78 SBLOG="L7-LOCALCONTENT=${LOCALCONTENT}" 79 INCLUDERC=${SBDIR}/functions/loglevel.rc 80 81 LOCALBOUNDARY='' 82 # Some mime messages quote the boundary 83 # Note: Per RFC 2046 para 5.1 MIME 1.0 boundary tags must be 7bit US ASCII 84 # If they contain 8bit characters, they are not following RFC 2046 85 :0 86 * LOCALCONTENT ?? ^[Cc][Oo][Nn][Tt][Ee][Nn][Tt]-[Tt][Yy][Pp][Ee]: [Mm][Uu][Ll][Tt][Ii][Pp][Aa][Rr][Tt]/.+;( )?[Bb][Oo][Uu][Nn][Dd][Aa][Rr][Yy]=".+"(;.*)?$ 87 * LOCALCONTENT ?? ^[Cc][Oo][Nn][Tt][Ee][Nn][Tt]-[Tt][Yy][Pp][Ee]: [Mm][Uu][Ll][Tt][Ii][Pp][Aa][Rr][Tt]/.+;( )?[Bb][Oo][Uu][Nn][Dd][Aa][Rr][Yy]="[^"]+"(;.*)?$ 88 * LOCALCONTENT ?? ^[Cc][Oo][Nn][Tt][Ee][Nn][Tt]-[Tt][Yy][Pp][Ee]: [Mm][Uu][Ll][Tt][Ii][Pp][Aa][Rr][Tt]/.+;( )?[Bb][Oo][Uu][Nn][Dd][Aa][Rr][Yy]="\/[^"]+ 89 { LOCALBOUNDARY=$MATCH } 90 91 # Some mime messages don't quote the boundary 92 # Note: Unquoted boundary tags can not contain ; or : 93 :0 E 94 * LOCALCONTENT ?? ^[Cc][Oo][Nn][Tt][Ee][Nn][Tt]-[Tt][Yy][Pp][Ee]: [Mm][Uu][Ll][Tt][Ii][Pp][Aa][Rr][Tt]/.+;( )?[Bb][Oo][Uu][Nn][Dd][Aa][Rr][Yy]=[^"]+(;.*)?$ 95 * LOCALCONTENT ?? ^[Cc][Oo][Nn][Tt][Ee][Nn][Tt]-[Tt][Yy][Pp][Ee]: [Mm][Uu][Ll][Tt][Ii][Pp][Aa][Rr][Tt]/.+;( )?[Bb][Oo][Uu][Nn][Dd][Aa][Rr][Yy]=\/[^";:]+ 96 { LOCALBOUNDARY=$MATCH } 97 98 # No boundary tag was found for the MIME message body parts 99 # so treat the body itself as a MIME part. 100 :0 101 * LOCALBOUNDARY ?? ^^^^ 102 { LOCALBOUNDARY='NONE' } 103 104 :0 105 * LOCALBOUNDARY ?? ^NONE$ 106 { 107 SBLOG="L7-LOCALBOUNDARY=${LOCALBOUNDARY} using \"SpamBouncer_Autopsy_Mime_Boundary\"" 108 INCLUDERC=${SBDIR}/functions/loglevel.rc 109 } 110 :0 E 111 { 112 SBLOG="L7-LOCALBOUNDARY=${LOCALBOUNDARY}" 113 INCLUDERC=${SBDIR}/functions/loglevel.rc 114 } 115 # Save the message body into it's own file for recursive autopsy 116 :0 bc: 117 * ! LOCALBOUNDARY ?? ^NONE$ 118 ${MESSAGETMPDIR}/MIME_1 119 120 # The message body was not encapsulated in a mime Boundary, so we'll create one 121 # on the fly for the autopsy 122 :0 E 123 { 124 LOCALBOUNDARY='SpamBouncer_Autopsy_Mime_Boundary' 125 LOG=`(${ECHO} "";${ECHO} "--SpamBouncer_Autopsy_Mime_Boundary") >${MESSAGETMPDIR}/MIME_1` 126 LOG=`(${FORMAIL} -cX "Content-";${ECHO} "") >>${MESSAGETMPDIR}/MIME_1` 127 128 :0 bc: 129 ${MESSAGETMPDIR}/MIME_1 130 131 LOG=`(${ECHO} "";${ECHO} "--SpamBouncer_Autopsy_Mime_Boundary--";${ECHO} "") >>${MESSAGETMPDIR}/MIME_1` 132 } 133 134 # Named sed script that makes things more readable. Might take it out 135 # or move it to sb-config-defaults.rc 136 SPACES_TABS_TO_SPACES='s/[ ]\{1,\}/ /g;s/ $//' 137 138 # Set the initial recursion states: Don't change these. 139 PART=1 140 OUTLINE=1 141 RECURSLEVEL=1 142 143 # This limit can be increased if you want. It's there to cause the recursion to limit itself so 144 # it doesn't get stuck in an infiniate loop in case something breaks. 145 # Note: Each recursion will fork a new process causing the parent process(s) to wait 146 # until all the child process(s) are finished. 147 # You'll not likely ever see any mime message with more than 10 nested mime parts. 148 RECURSELIMIT=3 149 150 SBLOG="L7-Entering file: ${SBDIR}/functions/mimeparts.sh" 151 INCLUDERC=${SBDIR}/functions/loglevel.rc 152 153 # Don't change this unless you know what you are doing or things will likely break. 154 # This calls a custom /bin/sh script that recursively calls itself to find all the mime parts. 155 # The command line arguments to mimeparts.sh become local variables in each recursion instance. 156 SBPID="$$" 157 LOG=`export SED ECHO NL DECODEBASE64 SBLOGLEVEL GREP EGREP SBDIR SPACES_TABS_TO_SPACES MESSAGETMPDIR WC SBPID DATE HOST LOGFILE SBLOGFILE SBHEADERS HASHCOMMAND;\ 158 ${SBDIR}/functions/mimeparts.sh "MIME_${OUTLINE}" "${OUTLINE}" "${PART}" "${RECURSLEVEL}" "${RECURSELIMIT}" "${LOCALBOUNDARY}" 159 ` 160 # procmail strippes the last newline from the above script, so add it back. 161 :0 162 * SBLOGLEVEL ?? ^[789]$ 163 { LOG="${NL}" } 164 SBLOG="L7-Leaving file: ${SBDIR}/functions/mimeparts.sh" 165 INCLUDERC=${SBDIR}/functions/loglevel.rc 166 } 167 168 # We should have a new file ${MESSAGETMPDIR}/plain_body_files_to_parse.txt 169 # which contains a list of mime body part filenames that were determined 170 # to contain parsable text/html content and if they were base64 encoded 171 # were decoded on the fly. 172 173 # So we are now going to replace the message body 174 # with the contents of these files, send it through the body 175 # pattern matching filters, then restore the original message 176 # from the save. Note: This might break if you run out of disk space! 177 # so disk cleanup is going to have to be done. 178 179 :0 180 * $ ? ${TEST} -f ${MESSAGETMPDIR}/plain_body_files_to_parse.txt 181 { 182 FILELIST=`${CAT} ${MESSAGETMPDIR}/plain_body_files_to_parse.txt` 183 # Note: this filter uses for, do and done which are /bin/sh or /bin/bash 184 # builtin commands. You won't find them on the filesystem. 185 # 186 # This filter rebuilds the message body in X-SpamBouncer-Autopsy: format 187 # by temporarily replacing the original message body in the pipeline. 188 # 189 # The last part of this filter squeezes the parts by 190 # converting LFCR to LF, replaces multiple tabs and 191 # spaces with single spaces, squeezes multiple blank lines one 192 # blank line and removes any leading or trailing spaces from each line. 193 # Then checks for mailbox message delimeters and escapes them. 194 # 195 # This is to simplify message body pattern matching on the autopsy copy. 196 # This could also be used to remove any non-printable characters and/or replace 197 # them with quoted printable characters. 198 :0bf 199 |${SED} '1,$d' |\ 200 ${ECHO} "X-SpamBouncer-Autopsy: Begin:";\ 201 (for i in ${FILELIST};\ 202 do (\ 203 ${ECHO};\ 204 ${ECHO} "X-SpamBouncer-Autopsy: File: \"${i}\"";\ 205 ${ECHO};\ 206 ${CAT} $i\ 207 ) done;\ 208 ${ECHO};\ 209 ${ECHO} "X-SpamBouncer-Autopsy: End") |\ 210 ${TR} "\012\015" "\012" |\ 211 ${SED} -e 's/[ ]\{1,\}/ /g;s/^ $//' |\ 212 ${CAT} -s |\ 213 ${SED} -e 's/^ //;s/ $//' |\ 214 ${SED} -e 's/^From /\\&/;s/^\.$/\\&/' 215 216 # This is for debugging and may be going away or surpressed if 217 # the log level isn't high enough. It's a copy of what is in the 218 # pipeline that will be used next for the message body domain and 219 # IP extraction recipes and the pattern matching recipes. 220 SBLOG="L5-Saving: \"${MESSAGETMPDIR}/message_autopsy.eml\"" 221 INCLUDERC=${SBDIR}/functions/loglevel.rc 222 :0c: 223 ${MESSAGETMPDIR}/message_autopsy.eml 224 225 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 226 # Do spambouncer body parsing stuff here on the edited message body. 227 # When all pattern matching and extraction is done, restore the 228 # original message body. 229 # !!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!! 230 231 # The following will also call sb6.rc for the body pattern matching recipes. 232 INCLUDERC=${SBDIR}/functions/extract-body-info.rc 233 234 # Split out and save any SpamBouncer headers resulting from the pattern matching 235 # so we can append the new headers to the message in the main pipe. 236 :0 237 * H ?? ^X-S(pamBouncer|B(Rule|Stop|Pass|Note|Class|Score|Pattern)):.+$ 238 { LOG=`${FORMAIL} -cX "" |${SED} -e '/^X-S\(pamBouncer\|B\(Rule\|Stop\|Pass\|Note\|Class\|Score\|Pattern\)\):.\{1,\}$/!d' >${MESSAGETMPDIR}/sbheaders-saved-after-autopsy` } 239 } 240 241 # else Nothing was found during the MIME message body autopsy that was worth parsing. 242 :0 E 243 { MIME=no } 244 245 SBLOG="L7-Restoring original message to pipeline from ${MESSAGETMPDIR}/message-original.eml" 246 INCLUDERC=${SBDIR}/functions/loglevel.rc 247 # Restore the message from the file saved copy of the original. 248 :0 hbf 249 |${SED} -e '1,$d';(${CAT} ${MESSAGETMPDIR}/message-original.eml) 250 251 # Restore the SpamBouncer headers from the file saved copy from before the autopsy. 252 :0 253 * $ ? ${TEST} -f ${MESSAGETMPDIR}/sbheaders-saved-before-autopsy.eml 254 { 255 SBLOG="L7-Restoring saved spambouncer headers from ${MESSAGETMPDIR}/sbheaders-saved-before-autopsy.eml" 256 INCLUDERC=${SBDIR}/functions/loglevel.rc 257 :0 hf 258 |${FORMAIL} -X "";(${CAT} "${MESSAGETMPDIR}/sbheaders-saved-before-autopsy.eml";${ECHO}) 259 } 260 261 # Append any new headers added during autopsy 262 :0 263 * $ ? ${TEST} -f ${MESSAGETMPDIR}/sbheaders-created-during-autopsy 264 { 265 SBLOG="L7-Adding spambouncer headers from ${MESSAGETMPDIR}/sbheaders-created-during-autopsy" 266 INCLUDERC=${SBDIR}/functions/loglevel.rc 267 :0 hf 268 |${FORMAIL} -X "";(${CAT} "${MESSAGETMPDIR}/sbheaders-created-during-autopsy";${ECHO}) 269 } 270 271 # Append any new headers resulting from the pattern matching filters. 272 :0 273 * $ ? ${TEST} -f ${MESSAGETMPDIR}/sbheaders-saved-after-autopsy 274 { 275 SBLOG="L7-Adding spambouncer headers from ${MESSAGETMPDIR}/sbheaders-saved-after-autopsy" 276 INCLUDERC=${SBDIR}/functions/loglevel.rc 277 :0 hf 278 |${FORMAIL} -X "";(${CAT} "${MESSAGETMPDIR}/sbheaders-saved-after-autopsy";${ECHO}) 279 } 280} 281 282# Fallback to old method. 283# We either don't have a valid /tmp directory. 284# Or it's not a MIME 1.0 message. 285:0 286* 1^0 MESSAGETMPDIR ?? ^^^^ 287* 1^0 MIME ?? ^no$ 288{ 289 SBLOG="L3-Using fallback method for body pattern matching.${NL}" 290 INCLUDERC=${SBDIR}/functions/loglevel.rc 291 #The following will also call sb6.rc 292 INCLUDERC=${SBDIR}/functions/extract-body-info.rc 293} 294 295# Cleanup, allow the user to set 296# KEEPAUTOPSIES from 5 - 999 otherwise set it to 20 297# remove anything else. 298:0 299* KEEPAUTOPSIES ?? ^^^^ 300{ KEEPAUTOPSIES=20 } 301 302:0 303* ! KEEPAUTOPSIES ?? ^([5-9]|[1-9][0-9]|[1-9][0-9][0-9])$ 304{ KEEPAUTOPSIES=20 } 305 306# This collects the autopsy directory file names from SBTEMP, 307# removes the newest 20 from the list by default, then limits it 308# to delete the 50 oldest directories at a time from what remains in the 309# list so we don't overload the variable in situations where there 310# are hundreds of of directories if the user had it set to 999, 311# then lowered it, and LINEBUF isn't set that high. 312# 313# Note: the last sed command implements the equivelent of 314# "tail -n 50" using a sed script in place of tail. 315# not all systems SpamBouncer runs on may have the "tail" command 316# so doing it this way is more portable for legacy systems runing older 317# versions of Unix. This is documented in the sed info pages under 318# examles: 319CLEANUPDIRECTORIES=`${CD} ${SBTEMP} && ${LS} -td ${LOGNAME}.message.*.* 2>/dev/null |\ 320 ${SED} -e "1,${KEEPAUTOPSIES}d" |\ 321 ${SED} -e '1h;2,50 {; H; g; };$q;1,49d;N;D' 322 ` 323 324# LOG="[$$]`basename $_`:CLEANUPDIRECTORIES=\"${CLEANUPDIRECTORIES}\"${NL}" 325 326# Sanity checks 327# Since we are doing a recursive rm, double check to make 328# sure we are in an acceptable directory for this. 329# Both MESSAGETMPDIR and CLEANUPDIRECTORIES includes the user's LOGNAME 330# and CLEANUPDIRECTORIES is not empty and contains the word message 331# nor is it set to "/" "." or ".." or contain any obvious 332# wild cards in the pattern. 333:0 334* SBTEMP ?? ^(.*/te?mp/?) 335* $ ? ${TEST} -d ${SBTEMP} 336* $ MESSAGETMPDIR ?? ^.*${LOGNAME}.*$ 337* CLEANUPDIRECTORIES ?? ^.*message.*$ 338* ! CLEANUPDIRECTORIES ?? ^^^^ 339* ! CLEANUPDIRECTORIES ?? ^(/|\.|\.\.|.*([/]|\*).*)$ 340{ 341 SBLOG="L5-Removing stale autopsy directories in ${SBTEMP}" 342 INCLUDERC=${SBDIR}/functions/loglevel.rc 343 :0 ic 344 |${SED} -e '1,$d';(${CD} ${SBTEMP} && ${RM} -r dummy `${ECHO} "${CLEANUPDIRECTORIES}"` 2>/dev/null ) >>/dev/null 345} 346:0 E 347* ! CLEANUPDIRECTORIES ?? ^^^^ 348{ 349 SBLOG="L1-Error: mimeparser.rc: Failed to remove remporary autopsy directorie(s) from ${SBTEMP}" 350 INCLUDERC=${SBDIR}/functions/loglevel.rc 351} 352 353# This will likely be removed eventually. It's for debugging and only 354# triggers if the SBLOGLEVEL is high enough. 355# Save another copy of the message after the restorations are complete. 356# This is to verify the above operations performed as intended. 357:0 358* SBLOGLEVEL ?? ^[789]$ 359* MIME ?? ^yes$ 360{ 361 SBLOG="L7-Debug[${SBLOGLEVEL}]: Saving ${MESSAGETMPDIR}/message_debug.eml" 362 INCLUDERC=${SBDIR}/functions/loglevel.rc 363 :0c: 364 ${MESSAGETMPDIR}/message_debug.eml 365} 366