1# xxxswf.py was created by alexander dot hanel at gmail dot com
2# version 0.1
3# Date - 12-07-2011
4# To do list
5#   - Tag Parser
6#   - ActionScript Decompiler
7
8# 2016-11-01 PL: - A few changes for Python 2+3 compatibility
9
10import fnmatch
11import hashlib
12import imp
13import math
14import os
15import re
16import struct
17import sys
18import time
19from io import BytesIO
20from optparse import OptionParser
21import zlib
22
23def checkMD5(md5):
24# checks if MD5 has been seen in MD5 Dictionary
25# MD5Dict contains the MD5 and the CVE
26# For { 'MD5':'CVE', 'MD5-1':'CVE-1', 'MD5-2':'CVE-2'}
27    MD5Dict = {'c46299a5015c6d31ad5766cb49e4ab4b':'CVE-XXXX-XXXX'}
28    if MD5Dict.get(md5):
29        print('\t[BAD] MD5 Match on', MD5Dict.get(md5))
30    return
31
32def bad(f):
33    for idx, x in enumerate(findSWF(f)):
34        tmp = verifySWF(f,x)
35        if tmp != None:
36            yaraScan(tmp)
37            checkMD5(hashBuff(tmp))
38    return
39
40def yaraScan(d):
41# d = buffer of the read file
42# Scans SWF using Yara
43    # test if yara module is installed
44    # if not Yara can be downloaded from http://code.google.com/p/yara-project/
45    try:
46        imp.find_module('yara')
47        import yara
48    except ImportError:
49        print('\t[ERROR] Yara module not installed - aborting scan')
50        return
51    # test for yara compile errors
52    try:
53        r = yara.compile(r'rules.yar')
54    except:
55        pass
56        print('\t[ERROR] Yara compile error - aborting scan')
57        return
58    # get matches
59    m = r.match(data=d)
60    # print matches
61    for X in m:
62        print('\t[BAD] Yara Signature Hit: %s' % X)
63    return
64
65def findSWF(d):
66# d = buffer of the read file
67# Search for SWF Header Sigs in files
68    return [tmp.start() for tmp in re.finditer(b'CWS|FWS', d.read())]
69
70def hashBuff(d):
71# d = buffer of the read file
72# This function hashes the buffer
73# source: http://stackoverflow.com/q/5853830
74    if type(d) is str:
75      d = BytesIO(d)
76    md5 = hashlib.md5()
77    while True:
78        data = d.read(128)
79        if not data:
80            break
81        md5.update(data)
82    return md5.hexdigest()
83
84def verifySWF(f,addr):
85    # Start of SWF
86    f.seek(addr)
87    # Read Header
88    header = f.read(3)
89    # Read Version
90    ver = struct.unpack('<b', f.read(1))[0]
91    # Read SWF Size
92    size = struct.unpack('<i', f.read(4))[0]
93    # Start of SWF
94    f.seek(addr)
95    try:
96        # Read SWF into buffer. If compressed read uncompressed size.
97        t = f.read(size)
98    except:
99        pass
100        # Error check for invalid SWF
101        print(' - [ERROR] Invalid SWF Size')
102        return None
103    if type(t) is str:
104      f = BytesIO(t)
105    # Error check for version above 20
106    if ver > 20:
107        print(' - [ERROR] Invalid SWF Version')
108        return None
109
110    if b'CWS' in header:
111        try:
112            f.read(3)
113            tmp = b'FWS' + f.read(5) + zlib.decompress(f.read())
114            print(' - CWS Header')
115            return tmp
116
117        except:
118            pass
119            print('- [ERROR]: Zlib decompression error. Invalid CWS SWF')
120            return None
121
122    elif b'FWS' in header:
123        try:
124            tmp = f.read(size)
125            print(' - FWS Header')
126            return tmp
127
128        except:
129            pass
130            print(' - [ERROR] Invalid SWF Size')
131            return None
132
133    else:
134        print(' - [Error] Logic Error Blame Programmer')
135        return None
136
137def headerInfo(f):
138# f is the already opended file handle
139# Yes, the format is is a rip off SWFDump. Can you blame me? Their tool is awesome.
140    # SWFDump FORMAT
141    # [HEADER]        File version: 8
142    # [HEADER]        File is zlib compressed. Ratio: 52%
143    # [HEADER]        File size: 37536
144    # [HEADER]        Frame rate: 18.000000
145    # [HEADER]        Frame count: 323
146    # [HEADER]        Movie width: 217.00
147    # [HEADER]        Movie height: 85.00
148    if type(f) is str:
149      f = BytesIO(f)
150    sig = f.read(3)
151    print('\t[HEADER] File header: %s' % sig)
152    if b'C' in sig:
153        print('\t[HEADER] File is zlib compressed.')
154    version = struct.unpack('<b', f.read(1))[0]
155    print('\t[HEADER] File version: %d' % version)
156    size = struct.unpack('<i', f.read(4))[0]
157    print('\t[HEADER] File size: %d' % size)
158    # deflate compressed SWF
159    if b'C' in sig:
160        f = verifySWF(f,0)
161        if type(f) is str:
162            f = BytesIO(f)
163        f.seek(0, 0)
164        x = f.read(8)
165    ta = f.tell()
166    tmp = struct.unpack('<b', f.read(1))[0]
167    nbit =  tmp >> 3
168    print('\t[HEADER] Rect Nbit: %d' % nbit)
169    # Curretely the nbit is static at 15. This could be modified in the
170    # future. If larger than 9 this will break the struct unpack. Will have
171    # to revist must be a more effective way to deal with bits. Tried to keep
172    # the algo but damn this is ugly...
173    f.seek(ta)
174    rect =  struct.unpack('>Q', f.read(int(math.ceil((nbit*4)/8.0))))[0]
175    tmp = struct.unpack('<b', f.read(1))[0]
176    tmp = bin(tmp>>7)[2:].zfill(1)
177    # bin requires Python 2.6 or higher
178    # skips string '0b' and the nbit
179    rect =  bin(rect)[7:]
180    xmin = int(rect[0:nbit-1],2)
181    print('\t[HEADER] Rect Xmin: %d' % xmin)
182    xmax = int(rect[nbit:(nbit*2)-1],2)
183    print('\t[HEADER] Rect Xmax: %d' % xmax)
184    ymin = int(rect[nbit*2:(nbit*3)-1],2)
185    print('\t[HEADER] Rect Ymin: %d' % ymin)
186    # one bit needs to be added, my math might be off here
187    ymax = int(rect[nbit*3:(nbit*4)-1] + str(tmp) ,2)
188    print('\t[HEADER] Rect Ymax: %d' % ymax)
189    framerate = struct.unpack('<H', f.read(2))[0]
190    print('\t[HEADER] Frame Rate: %d' % framerate)
191    framecount = struct.unpack('<H', f.read(2))[0]
192    print('\t[HEADER] Frame Count: %d' % framecount)
193
194def walk4SWF(path):
195    # returns a list of [folder-path, [addr1,addrw2]]
196    # Don't ask, will come back to this code.
197    p = ['',[]]
198    r = p*0
199    if os.path.isdir(path) != True and path != '':
200        print('\t[ERROR] walk4SWF path must be a dir.')
201        return
202    for root, dirs, files in os.walk(path):
203        for name in files:
204            try:
205                x = open(os.path.join(root, name), 'rb')
206            except:
207                pass
208                break
209            y = findSWF(x)
210            if len(y) != 0:
211                # Path of file SWF
212                p[0] = os.path.join(root, name)
213                # contains list of the file offset of SWF header
214                p[1] = y
215                r.insert(len(r),p)
216                p = ['',[]]
217                y = ''
218            x.close()
219    return r
220
221def tagsInfo(f):
222    return
223
224def fileExist(n, ext):
225    # Checks the working dir to see if the file is
226    # already in the dir. If exists the file will
227    # be named name.count.ext (n.c.ext). No more than
228    # 50 matching MD5s will be written to the dir.
229    if os.path.exists( n + '.' + ext):
230                c = 2
231                while os.path.exists(n + '.' + str(c) + '.' + ext):
232                    c =  c + 1
233                    if c == 50:
234                        print('\t[ERROR] Skipped 50 Matching MD5 SWFs')
235                        break
236                n = n + '.' + str(c)
237
238    return n + '.' + ext
239
240def CWSize(f):
241    # The file size in the header is of the uncompressed SWF.
242    # To estimate the size of the compressed data, we can grab
243    # the length, read that amount, deflate the data, then
244    # compress the data again, and then call len(). This will
245    # give us the length of the compressed SWF.
246    return
247
248def compressSWF(f):
249    if type(f) is str:
250      f = BytesIO(f)
251    try:
252        f.read(3)
253        tmp = b'CWS' + f.read(5) + zlib.compress(f.read())
254        return tmp
255    except:
256        pass
257        print('\t[ERROR] SWF Zlib Compression Failed')
258        return None
259
260def disneyland(f,filename, options):
261    # because this is where the magic happens
262    # but seriously I did the recursion part last..
263    retfindSWF = findSWF(f)
264    f.seek(0)
265    print('\n[SUMMARY] %d SWF(s) in MD5:%s:%s' % ( len(retfindSWF),hashBuff(f), filename ))
266    # for each SWF in file
267    for idx, x in enumerate(retfindSWF):
268        print('\t[ADDR] SWF %d at %s' % (idx+1, hex(x)))
269        f.seek(x)
270        h = f.read(1)
271        f.seek(x)
272        swf = verifySWF(f,x)
273        if swf == None:
274            continue
275        if options.extract != None:
276            name = fileExist(hashBuff(swf), 'swf')
277            print('\t\t[FILE] Carved SWF MD5: %s' % name)
278            try:
279                o = open(name, 'wb+')
280            except IOError as e:
281                print('\t[ERROR] Could Not Create %s ' % e)
282                continue
283            o.write(swf)
284            o.close()
285        if options.yara != None:
286            yaraScan(swf)
287        if options.md5scan != None:
288            checkMD5(hashBuff(swf))
289        if options.decompress != None:
290            name = fileExist(hashBuff(swf), 'swf')
291            print('\t\t[FILE] Carved SWF MD5: %s' % name)
292            try:
293                o = open(name, 'wb+')
294            except IOError as e:
295                print('\t[ERROR] Could Not Create %s ' % e)
296                continue
297            o.write(swf)
298            o.close()
299        if options.header != None:
300            headerInfo(swf)
301        if options.compress != None:
302            swf = compressSWF(swf)
303            if swf == None:
304                continue
305            name = fileExist(hashBuff(swf), 'swf')
306            print('\t\t[FILE] Compressed SWF MD5: %s' % name)
307            try:
308                o = open(name, 'wb+')
309            except IOError as e:
310                print('\t[ERROR] Could Not Create %s ' % e)
311                continue
312            o.write(swf)
313            o.close()
314
315def main():
316    # Scenarios:
317    # Scan file for SWF(s)
318    # Scan file for SWF(s) and extract them
319    # Scan file for SWF(s) and scan them with Yara
320    # Scan file for SWF(s), extract them and scan with Yara
321    # Scan directory recursively for files that contain SWF(s)
322    # Scan directory recursively for files that contain SWF(s) and extract them
323
324    parser = OptionParser()
325    usage = 'usage: %prog [options] <file.bad>'
326    parser = OptionParser(usage=usage)
327    parser.add_option('-x', '--extract', action='store_true', dest='extract', help='Extracts the embedded SWF(s), names it MD5HASH.swf & saves it in the working dir. No addition args needed')
328    parser.add_option('-y', '--yara', action='store_true', dest='yara', help='Scans the SWF(s) with yara. If the SWF(s) is compressed it will be deflated. No addition args needed')
329    parser.add_option('-s', '--md5scan', action='store_true', dest='md5scan', help='Scans the SWF(s) for MD5 signatures. Please see func checkMD5 to define hashes. No addition args needed')
330    parser.add_option('-H', '--header', action='store_true', dest='header', help='Displays the SWFs file header. No addition args needed')
331    parser.add_option('-d', '--decompress', action='store_true', dest='decompress', help='Deflates compressed SWFS(s)')
332    parser.add_option('-r', '--recdir', dest='PATH', type='string', help='Will recursively scan a directory for files that contain SWFs. Must provide path in quotes')
333    parser.add_option('-c', '--compress', action='store_true', dest='compress', help='Compresses the SWF using Zlib')
334
335    (options, args) = parser.parse_args()
336
337    # Print help if no argurments are passed
338    if len(sys.argv) < 2:
339        parser.print_help()
340        return
341
342    # Note files can't start with '-'
343    if '-' in sys.argv[len(sys.argv)-1][0] and options.PATH == None:
344        parser.print_help()
345        return
346
347    # Recusive Search
348    if options.PATH != None:
349        paths = walk4SWF(options.PATH)
350        for y in paths:
351            #if sys.argv[0] not in y[0]:
352            try:
353                t = open(y[0], 'rb+')
354                disneyland(t, y[0],options)
355            except IOError:
356                pass
357        return
358
359    # try to open file
360    try:
361        f = open(sys.argv[len(sys.argv)-1],'rb+')
362        filename = sys.argv[len(sys.argv)-1]
363    except Exception:
364        print('[ERROR] File can not be opended/accessed')
365        return
366
367    disneyland(f,filename,options)
368    f.close()
369    return
370
371if __name__ == '__main__':
372   main()
373
374