1#!/usr/local/bin/python
2# -*- coding: UTF-8 -*-
3"""
4-----------------------------------------------------------------------------
5
6 Vinetto : a forensics tool to examine Thumbs.db files
7 Copyright (C) 2005, 2006 by Michel Roukine
8
9This file is part of Vinetto.
10
11 Vinetto is free software; you can redistribute it and/or
12 modify it under the terms of the GNU General Public License as published
13 by the Free Software Foundation; either version 2 of the License, or (at
14 your option) any later version.
15
16 Vinetto is distributed in the hope that it will be
17 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
19 General Public License for more details.
20
21 You should have received a copy of the GNU General Public License along
22 with the vinetto package; if not, write to the Free Software
23 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
24
25-----------------------------------------------------------------------------
26"""
27
28__revision__ = "$Revision: 59 $"
29__version__ = "0.06"
30__author__ = 'Michel Roukine'
31
32import sys
33import os
34import StringIO
35import md5
36import vinreport
37
38from optparse import OptionParser
39from struct  import unpack
40from binascii import unhexlify
41from time    import ctime
42from vinutils import addCatEntry, nbCatEnt, nbTNstr, TNfname, catIndxOutOfSeq, \
43                     tnStreamOutOfSeq, addTNStream, extractStats
44
45PPS_TYPES = ["undefined", "storage", "stream", "undefined", "undefined", "root"]
46EOI = unhexlify('ffd9')
47HEADER1 = unhexlify('0c00')
48PIL = ""
49NUMBERED_THUMBS_DIR = ".thumbs"
50
51
52def getargs():
53    """Return arguments passed to vinetto on the command line.  """
54    verstr = "%prog " + __version__ + " (r" + __revision__[11:-2] +")\n\n" +\
55             "Copyright (C) 2005-2006 Michel Roukine.\n" + \
56             "Vinetto is open source software," + \
57             " see http://vinetto.sourceforge.net/"
58    parser = OptionParser(usage="%prog [OPTIONS] [-s] [-U] [-o DIR] file", version=verstr)
59    parser.add_option("-o", dest="outputdir",
60                      help="write thumbnails to DIR", metavar="DIR")
61    parser.set_defaults(htmlrep=False)
62    parser.add_option("-H", action="store_true", dest="htmlrep",
63                      help="write html report to DIR")
64    parser.set_defaults(encoding=False)
65    parser.add_option("-U", action="store_true", dest="encoding",
66                      help="use utf8 encodings")
67    parser.set_defaults(symlinks=False)
68    parser.add_option("-s", action="store_true", dest="symlinks",
69                      help="create symlink of the image realname to the numbered name in DIR/.thumbs")
70    opts, pargs = parser.parse_args()
71
72    if len(pargs) != 1:
73        parser.error("incorrect number of arguments")
74
75    if (opts.outputdir == None) and (opts.htmlrep == True):
76        parser.error("-H option requires -o with a directory name")
77    os.system("mkdir " + opts.outputdir + "/" + NUMBERED_THUMBS_DIR)
78    return (pargs[0], opts.outputdir, opts.htmlrep, opts.encoding, opts.symlinks)
79
80
81def getencodings():
82    """What encoding we use?"""
83    if utf8encoding:
84        fileencoding = 'utf8'
85    else:
86        fileencoding = 'iso-8859-1'
87    return fileencoding
88
89def encodefilename(originame):
90    """Convert filename to the global encoding. """
91    TNname = unicode(originame,
92                   'utf-16-le').encode(getencodings(), 'replace')
93    return TNname
94
95def conv2pytime (win32filetime):
96    """Convert win32 timestamp to python time.  """
97    SECS_BETWEEN_EPOCHS = 11644473600
98    SECS_TO_100NS = 10000000
99
100    if win32filetime == 0:
101        return 0
102    else:
103        return (win32filetime / SECS_TO_100NS) - SECS_BETWEEN_EPOCHS
104
105
106def nextBlock (TDB, Table, indx):
107    """Return next block.  """
108    iFAT = indx / 128  # FAT block number to search in
109    iSECT = indx % 128 # SECTor to search in the FAT block
110    offst = Table [iFAT] * 512 + 0x200 + iSECT*4
111    return unpack("<l", TDB[offst:offst+4])[0]
112
113# Beginning ...
114tDBfname, outputdir, htmlrep, utf8encoding, symlinks = getargs()
115
116# Testing file and DIR parameters
117if not os.access(tDBfname, os.F_OK):
118    print >> sys.stderr, "Error: ", tDBfname, "does not exist"
119    sys.exit(1)
120elif  not os.path.isfile(tDBfname):
121    print >> sys.stderr, "Error: ", tDBfname, "not a file"
122    sys.exit(1)
123elif  not os.access(tDBfname, os.R_OK):
124    print >> sys.stderr, "Error: ", tDBfname, "not readable"
125    sys.exit(1)
126
127# Opening Thumbs.db file
128thumbsDB = open(tDBfname,"rb").read()
129longueur = len(thumbsDB)
130if (longueur % 512 ) != 0:
131    print >> sys.stderr, " ** Warning: length " + tDBfname + " : " \
132                        + str(longueur) + ", non multiple 512"
133
134# Initializing extraction and optional html report
135if outputdir != None :
136    if not os.path.exists(outputdir):
137        try :
138            os.mkdir(outputdir)
139            print 'Note: ', outputdir, ' was created'
140        except EnvironmentError, e:
141            print >> sys.stderr, "Error creating", outputdir
142            sys.exit(1)
143    elif not os.path.isdir(outputdir):
144        print >> sys.stderr, 'Error: ', outputdir, ' is not a directory'
145        sys.exit(1)
146    elif not os.access(outputdir, os.W_OK):
147        print >> sys.stderr, 'Error: ', outputdir, ' not writable'
148        sys.exit(1)
149    outputdir += "/"
150
151    try:
152        from PIL import Image
153        PIL = "imported"
154    except ImportError, e:
155        print >> sys.stderr, ""
156        print >> sys.stderr, " ** Warning: Cannot find \"Image\" module."
157        print >> sys.stderr, "             Vinetto will only extract Type 2 thumbnails."
158        print >> sys.stderr, ""
159
160    header       = open("/usr/local/share/py38-vinetto/header","rb").read()
161    quantization = open("/usr/local/share/py38-vinetto/quantization","rb").read()
162    huffman      = open("/usr/local/share/py38-vinetto/huffman","rb").read()
163
164    if htmlrep == True:
165        report = vinreport.HtRep(tDBfname, outputdir, getencodings(), __version__ + " (r" + \
166                                 __revision__[11:-2] +")")
167        md5tDB = md5.new(thumbsDB).hexdigest()
168        report.SetFileSection(longueur, md5tDB)
169
170# -----------------------------------------------------------------------------
171# Analyzing header block ...
172
173if "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" != thumbsDB[0:8] :
174    print >> sys.stderr, " *** Error  " + tDBfname + ": header signature error "
175    sys.exit(8)
176
177nbFATblocks = unpack("<l", thumbsDB[0x2c:0x2c+4])[0]
178
179REfirstBlock = unpack("<l", thumbsDB[0x30:0x30+4])[0] #Root directory 1st block
180
181firstMiniFATblock = unpack("<l", thumbsDB[0x3c:0x3c+4])[0]
182
183FATblocks = []
184for i in range(nbFATblocks):
185    offset = 0x4c + i*4
186    FATblocks.append(unpack("<l", thumbsDB[offset:offset + 4])[0])
187
188# -----------------------------------------------------------------------------
189# Analyzing Root Entry directory ...
190
191currentBlock = REfirstBlock
192
193i = firstMiniFATblock
194MiniFATblocks = []
195while i != -2:
196    MiniFATblocks.append(i)
197    i = nextBlock (thumbsDB, FATblocks, i)
198
199offset = 0x200 + currentBlock * 0x200
200firstMiniFATstreamBlock = unpack("<l", thumbsDB[offset+0x74:offset+0x78])[0]
201
202i = firstMiniFATstreamBlock
203MiniFATstreamBlocks = []
204while i != -2:
205    MiniFATstreamBlocks.append(i)
206    i = nextBlock (thumbsDB, FATblocks, i)
207
208SID = 0
209
210while currentBlock != -2:
211    offset = 0x200 + currentBlock * 0x200
212    for i in range (offset, offset+0x200, 0x80):
213        pps_rawname     = unpack("64s", thumbsDB[i     :i+0x40])[0]
214        pps_sizeofname  = unpack("<h",  thumbsDB[i+0x40:i+0x42])[0]
215        pps_type        = unpack("b",   thumbsDB[i+0x42:i+0x43])[0]
216        pps_ts1         = unpack("<Q",  thumbsDB[i+0x64:i+0x6c])[0]
217        pps_ts2         = unpack("<Q",  thumbsDB[i+0x6c:i+0x74])[0]
218        pps_sb          = unpack("<l",  thumbsDB[i+0x74:i+0x78])[0]
219        pps_size        = unpack("<l",  thumbsDB[i+0x78:i+0x7c])[0]
220
221        if pps_type == 2: # stream files extraction
222            rawname = unicode(pps_rawname,"utf-16-le")[0:(pps_sizeofname/2 - 1)]
223            #SIDstr  = "%04i" % SID
224            SIDstr = rawname [::-1]
225            if len(SIDstr) < 4:
226                SIDstr = "%04i" % int(SIDstr)
227            bytesToWrite = pps_size
228
229            if pps_size >= 4096 : # stream located in the FAT
230                sr = ""
231                currentStreamBlock = pps_sb
232                while currentStreamBlock != -2:
233                    sOffset = 0x200 + currentStreamBlock * 0x200
234                    if bytesToWrite >= 512:
235                        sr = sr + thumbsDB[sOffset:sOffset + 512]
236                    else:
237                        sr = sr + thumbsDB[sOffset:sOffset + bytesToWrite]
238                    bytesToWrite = bytesToWrite - 512
239                    currentStreamBlock = nextBlock (thumbsDB, FATblocks,
240                                                    currentStreamBlock)
241
242            else:                # stream located in the MiniFAT
243                sr = ""
244                currentStreamMiniBlock = pps_sb
245                while currentStreamMiniBlock != -2 :
246                    # computing offset of the miniBlock to copy
247                    # 1 : which block of the miniFATstream ?
248                    nb = currentStreamMiniBlock / 8
249                    # 2 : where is this block ?
250                    bl = MiniFATstreamBlocks[nb]
251                    # 3 : which offset from the start of block ?
252                    ioffset = (currentStreamMiniBlock % 8) * 64
253
254                    sOffset = 0x200 + bl*0x200 + ioffset
255
256                    if bytesToWrite >= 64:
257                        sr = sr + thumbsDB[sOffset:sOffset + 64 ]
258                    else:
259                        sr = sr + thumbsDB[sOffset:sOffset + bytesToWrite]
260                    bytesToWrite = bytesToWrite - 64
261                    # computing next currentStreamMiniBlock
262                    currentStreamMiniBlock = nextBlock (thumbsDB,
263                                                        MiniFATblocks,
264                                                        currentStreamMiniBlock)
265
266            # extraction stream processing ... ---------------------------------
267
268            longueur = len(sr)
269
270            # is this a Catalog ?
271            if rawname == "Catalog" :
272                # -------------------------------------------------------------
273                # Skipping catalog header block ...
274
275                recordLen = unpack("<h", sr[0:2])[0]
276                indcat = recordLen
277                SID = SID - 1
278
279                # -------------------------------------------------------------
280                # Analyzing Catalog entries ...
281
282                while indcat < longueur :
283                    recordLen   = unpack("<h", sr[indcat   :indcat+2])[0]
284                    num         = unpack("<l", sr[indcat+4 :indcat+8])[0]
285                    timestamp   = unpack("<Q", sr[indcat+8 :indcat+16])[0]
286                    nameLen     = recordLen - 0x14
287
288                    originame   = sr[indcat+16 :indcat+16+nameLen]
289                    TNid = "%04i" % num
290                    TNtimestamp = ctime(conv2pytime(timestamp))
291                    TNname = encodefilename(originame)
292                    if symlinks:
293                        os.system( "ln -fs " + NUMBERED_THUMBS_DIR + "/" + TNid + ".jpg " + "\"" + \
294                                    outputdir + "/" + TNname + "\"" )
295                    print  " " + TNid, " ", TNtimestamp, " ", TNname
296                    addCatEntry(num, TNtimestamp, TNname)
297                    indcat = indcat + recordLen
298
299            else :
300
301                # is EOI = 0xffd9 ?
302                if sr[longueur-2:longueur] != EOI:
303                    print >> sys.stderr, " *** Err: missing EOI in stream", SID
304                    sys.exit(2)
305                # --------------------------- header 1 ------------------------
306                # is first header OK ?
307                if sr[0:2] != HEADER1:
308                    print >> sys.stderr, \
309                          " *** Err: unrecognized header in stream", SID
310                    sys.exit(3)
311
312                # is length OK ?
313                if  unpack("<H", sr[8:10])[0] != (longueur - 0x0c) :
314                    print >> sys.stderr, " *** Err: length error in stream", SID
315                    sys.exit(4)
316                # --------------------------- header 2 ------------------------
317                # is it a type 2 thumbnail ? (full jpeg)
318                if  sr[0x0c:0x10] == "\xff\xd8\xff\xe0" :
319                    if outputdir != None :
320                        open(outputdir + "/" + NUMBERED_THUMBS_DIR + "/" + TNfname(SIDstr, "2") + ".jpg", \
321                             "wb").write(sr[0x0c:])
322                elif  unpack("<L",sr[0x0c:0x10])[0] == 1 :
323                    # is second header OK ?
324                    if  unpack("<H", sr[0x0c+4:0x0c+6])[0] != (longueur -
325                                                              0x0c - 0x10):
326                        print >> sys.stderr, \
327                              " *** Err : length(2) error in stream", SID
328                        sys.exit(5)
329                    # Type 1 TN processing ...
330                    if (PIL == "imported") and (outputdir != None):
331                        type1sr = header[:0x14]
332                        type1sr = type1sr + quantization
333                        type1sr = type1sr + sr[0x1e:0x34]
334                        type1sr = type1sr + huffman
335                        type1sr = type1sr + sr[0x34:]
336
337                        im = Image.open(StringIO.StringIO(type1sr))
338                        r, g, b, a = im.split()
339                        im = Image.merge("RGB", (b, g, r))
340                        im = im.transpose(Image.FLIP_TOP_BOTTOM)
341                        im.save(outputdir + TNfname(SIDstr, "1") + ".jpg", \
342                                "JPEG", quality=100)
343                    elif outputdir != None: # Cannot extract : PIL not imported
344                        addTNStream(int(SIDstr), "1", "")
345                else :
346                    print >> sys.stderr, \
347                          " *** Err : header (2) error in stream", SID
348                    sys.exit(6)
349
350            # -----------------------------------------------------------------
351
352        elif pps_type == 5: # Root Entry
353            REtimestamp = pps_ts2
354            print
355            print " Root Entry modify timestamp :", \
356                  ctime(conv2pytime(REtimestamp))
357            if htmlrep == True:
358                report.SetREtst (ctime(conv2pytime(REtimestamp)))
359            print
360            print " ------------------------------------------------------"
361            print
362
363        SID = SID + 1
364
365    currentBlock = nextBlock(thumbsDB, FATblocks, currentBlock)
366
367print
368print " ------------------------------------------------------"
369print
370
371if catIndxOutOfSeq() == True:
372    print >> sys.stderr, " * Info: ", tDBfname, "Catalog",\
373                         " index number out of usual sequence"
374
375if tnStreamOutOfSeq() == True:
376    print >> sys.stderr, " * Info: ", tDBfname, \
377                         "thumbnail stream index number out of usual sequence"
378
379if  (outputdir != None) and (nbCatEnt() != nbTNstr()):
380    print >> sys.stderr, " ** Warning: ", tDBfname, " -- number of extracted ", \
381                         "thumbnails does not match number of Catalog entries"
382
383statstring = extractStats(outputdir)
384print statstring
385
386if htmlrep == True:
387    report.flush(statstring)
388
389