1# SPAMBODY-PATTERNS.RC
2#
3#  Recipes to catch spam with reliably detectable message body
4#  patterns.
5#
6#  Last updated: 2/17/2011
7
8LOCALSCORE=0
9
10# CID embedded image, no URL or email address
11#
12:0
13* LEANTAG ?? ^no$
14* ^Content-Type: multipart/related;$[^0-9a-z]*type=\"multipart/alternative\";
15{
16 :0 B
17 * ! ()<A HREF=
18 * ! (^|[^0-9a-z])[0-9a-z][-_0-9a-z]*@([0-9a-z][-_0-9a-z]*(�|\.|[=%]2E))+[a-z][a-z][a-z]?[a-z]?([^0-9a-z.]|$)
19 * ()<DIV><IMG alt=3D\"\" hspace=3D0=20$\
20         src=3D\"cid:[0-9a-z$@]*\f" =$\
21         align=3Dbaseline=20$\
22         border=3D0></DIV>
23 {
24  SBLOG="C3T-${TESTNAME} (CID image/no URL or Email Address)"
25  INCLUDERC=${SBDIR}/functions/loglevel.rc
26
27  :0
28  * $ ${LOCALSCORE}^0
29  * 5^0
30  { LOCALSCORE=$= }
31 }
32}
33
34# Fast-Flux URL Hosting
35#
36#  1/03/2007:
37#   Fast-flux hosting is a type of web hosting where DNS is provided
38#   through trojaned computers.  Typically, a fast-flux web site has
39#   five different IPs assigned to it, normally not from the same
40#   provider or in the same netblock.  It is easy to spot and unlike
41#   the type of DNS service that a legitimate site would have.
42#
43:0
44* LEANTAG ?? ^no$
45* ! FIRSTBODYHOST ?? ^host\.example\.com$
46* ^Content-Type: multipart/related;$[^0-9a-z]*type=\"multipart/alternative\";
47{
48 LOCALBUFFER='zilch'
49 LOCALHOST=${FIRSTBODYHOST}
50 LOCALBUFFER=`${SBHOST} ${LOCALHOST} ${SBNAMESRVR}`
51
52 :0 B
53 * LOCALBUFFER ?? ^.* [0-9][0-9]?[0-9]?\.[0-9][0-9]?[0-9]?\.[0-9][0-9]?[0-9]?\.[0-9][0-9]?[0-9]?$\
54                   .* [0-9][0-9]?[0-9]?\.[0-9][0-9]?[0-9]?\.[0-9][0-9]?[0-9]?\.[0-9][0-9]?[0-9]?$\
55                   .* [0-9][0-9]?[0-9]?\.[0-9][0-9]?[0-9]?\.[0-9][0-9]?[0-9]?\.[0-9][0-9]?[0-9]?$\
56                   .* [0-9][0-9]?[0-9]?\.[0-9][0-9]?[0-9]?\.[0-9][0-9]?[0-9]?\.[0-9][0-9]?[0-9]?$\
57                   .* [0-9][0-9]?[0-9]?\.[0-9][0-9]?[0-9]?\.[0-9][0-9]?[0-9]?\.[0-9][0-9]?[0-9]?$
58 {
59  SBLOG="C3T-${TESTNAME} (Host ${FIRSTBODYHOST} uses fast-flux hosting)"
60  INCLUDERC=${SBDIR}/functions/loglevel.rc
61
62  :0
63  * $ ${LOCALSCORE}^0
64  * 5^0
65  { LOCALSCORE=$= }
66 }
67}
68
69# HTML attached file in message body.
70#
71:0
72* LEANTAG ?? ^no$
73*   B ?? ^--_mixed[0-9a-z]*$\
74          Content-Type: text/plain; charset=\"[0-9a-z][-_0-9a-z]*\"$\
75          Content-Transfer-Encoding: 7bit$\
76          (.*$)*$\
77          ^--_mixed[0-9a-z]*$\
78          Content-Type: text/html; name=\"[0-9a-z][-_0-9a-z]*\.html?\"$\
79          Content-Transfer-Encoding: base64$\
80          Content-Disposition: attachment; filename=\"[0-9a-z][-_0-9a-z]*\.html?\"$
81{
82 SBLOG="C3T-${TESTNAME} (Attached HTML file in message body)"
83 INCLUDERC=${SBDIR}/functions/loglevel.rc
84
85 :0
86 * $ ${LOCALSCORE}^0
87 * 5^0
88 { LOCALSCORE=$= }
89}
90
91# Nonexistent domain in message body.
92#
93:0
94* LEANTAG ?? ^no$
95* -1000^0
96*  1100^0    (^|[^-_0-9a-z])altavista(�|\.|=2E|%2E)(com|net)([^a-z0-9.]|\. |\.$|$)
97*  1100^0    (^|[^-_0-9a-z])angelfire(�|\.|=2E|%2E)com([^a-z0-9.]|\. |\.$|$)
98*  1100^0    (^|[^-_0-9a-z])goatse(�|\.|=2E|%2E)cx([^a-z0-9.]|\. |\.$|$)
99*  1100^0    (^|[^-_0-9a-z])home(�|\.|=2E|%2E)com([^a-z0-9.]|\. |\.$|$)
100*  1100^0    (^|[^-_0-9a-z])work(�|\.|=2E|%2E)com([^a-z0-9.]|\. |\.$|$)
101*  1100^0    (^|[^-_0-9a-z])ybecker(�|\.|=2E|%2E)net([^a-z0-9.]|\. |\.$|$)
102{
103 SBLOG="C3T-${TESTNAME} (Unused Domain in Message Body)"
104 INCLUDERC=${SBDIR}/functions/loglevel.rc
105
106 :0
107 * $ ${LOCALSCORE}^0
108 * 5^0
109 { LOCALSCORE=$= }
110}
111
112# Decimal URL
113#
114#  This catches http://99999999/ urls.
115#
116:0 B
117* LEANTAG ?? ^no$
118* -1000^0
119*  1100^1    (^|[^0-9a-z])(=3D)?h?t?t?p?://[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]+(:[0-9][0-9][0-9]?[0-9]?[0-9]?)?/
120{
121 SBLOG="C3T-${TESTNAME} (Decimal URL)"
122 INCLUDERC=${SBDIR}/functions/loglevel.rc
123
124 :0
125 * $ ${LOCALSCORE}^0
126 * 5^0
127 { LOCALSCORE=$= }
128}
129
130# Numeric URL with non-standard port
131#
132#  This catches http://208.134.56.32:1234 urls.
133#
134:0 B
135* LEANTAG ?? ^no$
136* -1000^0
137*  1100^1    (^|[^0-9a-z])(=3D)?h?t?t?p?://[0-9][0-9]?[0-9]?(�|\.|=2E)[0-9][0-9]?[0-9]?(�|\.|=2E)[0-9][0-9]?[0-9]?(�|\.|=2E)[0-9][0-9]?[0-9]?(:[0-9][0-9][0-9]?[0-9]?[0-9]?)([^0-9a-z:.]|$)
138{
139 SBLOG="C3T-${TESTNAME} (Numeric URL with non-standard port)"
140 INCLUDERC=${SBDIR}/functions/loglevel.rc
141
142 :0
143 * $ ${LOCALSCORE}^0
144 * 5^0
145 { LOCALSCORE=$= }
146}
147
148# URLs with Bogus Queries
149#
150#  This catches http://example.com?zzzz type URLs -- a URL hostname should end
151#  either with a space (nothing), or with a forward slash, NEVER with
152#  a query/question mark.
153#
154:0 B
155* LEANTAG ?? ^no$
156* ! (^|[^-_0-9a-z]|=2E)trueswitch(�|\.|=2E)com([^a-z0-9.]|\. |\.$|$)
157* -1000^0
158*  1100^1    (^|[^0-9a-z])(=3D)?https?://([0-9a-z][-_0-9a-z]+\.)+[a-z][a-z][a-z]?[a-z]?\?[0-9a-z]+
159{
160 SBLOG="C3T-${TESTNAME} (URL with bogus query)"
161 INCLUDERC=${SBDIR}/functions/loglevel.rc
162
163 :0
164 * $ ${LOCALSCORE}^0
165 * 5^0
166 { LOCALSCORE=$= }
167}
168
169# URL with illegal characters
170#
171:0 B
172* LEANTAG ?? ^no$
173* -1000^0
174* B ??  1100^0  (^|[^-_0-9a-z]|[=%]20)https?:(//|/\\|\\/|\\\\)\
175                ([0-9a-z][-_0-9a-z]+(�|\.|[=%]2E))*\
176                [<>]((�|\.|[=%]2E)|[0-9a-z])
177{
178 SBLOG="C3T-${TESTNAME} (URL with illegal character)"
179 INCLUDERC=${SBDIR}/functions/loglevel.rc
180
181 :0
182 * $ ${LOCALSCORE}^0
183 * 5^0
184 { LOCALSCORE=$= }
185}
186
187# URL with invalid protocol statement
188#
189:0 B
190* LEANTAG ?? ^no$
191* -1000^0
192* B ??  1100^0  (^|[^-_0-9a-z]|[=%]20)https?:(%3a|/\\|\\/|\\\\)[0-9a-z]
193{
194 SBLOG="C3T-${TESTNAME} (URL with invalid protocol)"
195 INCLUDERC=${SBDIR}/functions/loglevel.rc
196
197 :0
198 * $ ${LOCALSCORE}^0
199 * 5^0
200 { LOCALSCORE=$= }
201}
202
203# Obfuscated URLs
204#
205#  This catches those http://203948023 URLs, urls of the form
206#  http://anydomain@realdomain-or-IP/, and urls with embedded
207#  spaces and newline characters.
208#
209:0 B
210* LEANTAG ?? ^no$
211* ! ^[^0-9a-z]*Received:(.*$)+Received:
212* -1000^0
213* -1100^1    (^|[^0-9a-z])http://[a-z]\.msn\.com/
214*  -300^1    ^[:>]
215*  1100^1    (^|[^0-9a-z])(=3D)?https?://(%[0-9][0-9]?[a-z]?)(%[0-9][0-9]?[a-z]?)*
216*  1100^1    (^|[^0-9a-z])(=3D)?https?://[0-9a-z][^ /]*(@|=40|\*)
217*  1100^1    (^|[^0-9a-z])(=3D)?https?://([0-9a-z][-_0-9a-z]+\.)*(&#[0-9]+;)+([0-9a-z][-_0-9a-z]+\.)*( |/|$)
218*  1100^1    (@|=40)[0-9][0-9]?[0-9]?(�|\.|=2E)[0-9][0-9]?[0-9]?(�|\.|=2E)[0-9][0-9]?[0-9]?(�|\.|=2E)[0-9][0-9]?[0-9]?/
219*  1100^1    (@|=40)([0-9a-z][-_0-9a-z]*\.)+[a-z][a-z][a-z]?[a-z]?/
220*  1100^1    (^|[^0-9a-z])(=3D)?https?://[0-9a-z][-_0-9a-z\.]+(@|=40)[0-9][0-9]?[0-9]?(�|\.|=2E)[0-9][0-9]?[0-9]?(�|\.|=2E)[0-9][0-9]?[0-9]?(�|\.|=2E)[0-9][0-9]?[0-9]?([^0-9a-z\.]|$)
221*  1100^1    (^|[^0-9a-z])(=3D)?https?://[0-9a-z][-_0-9a-z\.]+(@|=40)([0-9a-z][-_0-9a-z]*\.)+[a-z][a-z][a-z]?[a-z]?([^0-9a-z\.]|$)
222*  1100^1    (^|[^0-9a-z])(=3D)?https?://[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]([^0-9a-z]|$)
223*  1100^1    (^|[^0-9a-z])(=3D)?https?://[01][01][01][01][01][01][01][01][01][01][01][01][01][01][01][01][01][01][01][01]*/\?
224*  1100^1    (^|[^0-9a-z])(=3D)?https?://[0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z][-_0-9a-z]+\.[a-z][a-z]+([^0-9a-z\.]|$)
225*  1100^1    ()<a href[0-9a-z]+=
226*  1100^1    (^|[^0-9a-z])(=3D)?https?://[0-9a-z][-_0-9a-z]+(�|\.|[=%]2E)[a-z][a-z][a-z]?[a-z]?&
227*  1100^0    ()<a href=\"https?://((%[0-9A-F][0-9A-F])+/)+((%[0-9A-F][0-9A-F])+)?\">
228{
229 SBLOG="C3T-${TESTNAME} (Obfuscated URL)"
230 INCLUDERC=${SBDIR}/functions/loglevel.rc
231
232 :0
233 * $ ${LOCALSCORE}^0
234 * 5^0
235 { LOCALSCORE=$= }
236}
237
238# Encoded URL in A HREF Tag
239#
240:0 B
241* LEANTAG ?? ^no$
242* ()<A HREF=\"((&#[0-9a-f]+;)+|[0-9a-z]+)\">
243{
244 SBLOG="C3T-${TESTNAME} (Encoded URL in A HREF Tag)"
245 INCLUDERC=${SBDIR}/functions/loglevel.rc
246
247 :0
248 * $ ${LOCALSCORE}^0
249 * 5^0
250 { LOCALSCORE=$= }
251}
252
253# Encoded ASCII entities in URL
254#
255#   Lots of spammers using this to evade SURBL and other message
256#   body URI patterns.
257#
258:0
259* LEANTAG ?? ^no$
260* ! FROMEMAIL ^([0-9a-z][-_0-9a-z]+@([0-9a-z][-_0-9a-z]+\.)+amazon\.com)$
261* H ?? ! ^Content-Type: text/(plain|html); charset=.?(big5|chinesebig5|euc.(jp|kr)|gb-?(1988|2312|18030)|iso-10646|iso-2022-(cn|jp|kr)|jis.C622[06]|jis.x02(01|08|12)|shift.jis|ks.c.5601-198[79]|utf.?[78]|windows.31j).?$
262* -1000^0
263* B ?? -1100^0  (^|[^0-9a-z]|=3D)https?://([0-9a-z][-_0-9a-z]+(�|\.|[=%]2E))*\
264                                [0-9a-z][-_0-9a-z]+[=%](20|2E|3D)
265* B ?? 1100^0  (^|[^0-9a-z]|=3D)https?://([0-9a-z][-_0-9a-z]+(�|\.|[=%]2E))*\
266                                ([0-9a-z]|%[0-9a-f][0-9a-f][0-9a-f]?[0-9a-f]?[0-9a-f]?)\
267                                [-_0-9a-z]*(%[0-9a-f][0-9a-f][0-9a-f]?[0-9a-f]?[0-9a-f]?)+\
268                                ([-_0-9a-z]|(%[0-9a-f][0-9a-f][0-9a-f]?[0-9a-f]?[0-9a-f]?))*([^0-9a-z]|$)
269* B ?? 1100^0  (^|[^0-9a-z]|=3D)mailto:([0-9a-z][-_0-9a-z]+(�|\.|[=%]2E))*\
270                                ([0-9a-z.@]|%[0-9a-f][0-9a-f][0-9a-f]?[0-9a-f]?[0-9a-f]?)\
271                                [-_0-9a-z.@]*(%[0-9a-f][0-9a-f][0-9a-f]?[0-9a-f]?[0-9a-f]?)+\
272                                ([-_0-9a-z.@]|(%[0-9a-f][0-9a-f][0-9a-f]?[0-9a-f]?[0-9a-f]?))*([^0-9a-z]|$)
273* B ?? 1100^0  ()<a href=(3D)?\"([0-9a-z][-_0-9a-z]+(�|\.|[=%]2E))*\
274                                ([0-9a-z]|%[0-9a-f][0-9a-f][0-9a-f]?[0-9a-f]?[0-9a-f]?)\
275                                [-_0-9a-z]*(%[0-9a-f][0-9a-f][0-9a-f]?[0-9a-f]?[0-9a-f]?)+\
276                                ([-_0-9a-z]|(%[0-9a-f][0-9a-f][0-9a-f]?[0-9a-f]?[0-9a-f]?))*([^0-9a-z]|$)
277{
278 SBLOG="C3T-${TESTNAME} (Encoded ASCII Entity in URI)"
279 INCLUDERC=${SBDIR}/functions/loglevel.rc
280
281 :0
282 * $ ${LOCALSCORE}^0
283 * 5^0
284 { LOCALSCORE=$= }
285}
286
287:0
288* -4^0
289* $ ${LOCALSCORE}^0
290{ LT4=yes }
291