1#!/usr/local/bin/python 2# -*- coding: UTF-8 -*- 3""" 4----------------------------------------------------------------------------- 5 6 Vinetto : a forensics tool to examine Thumbs.db files 7 Copyright (C) 2005, 2006 by Michel Roukine 8 9This file is part of Vinetto. 10 11 Vinetto is free software; you can redistribute it and/or 12 modify it under the terms of the GNU General Public License as published 13 by the Free Software Foundation; either version 2 of the License, or (at 14 your option) any later version. 15 16 Vinetto is distributed in the hope that it will be 17 useful, but WITHOUT ANY WARRANTY; without even the implied warranty of 18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 19 General Public License for more details. 20 21 You should have received a copy of the GNU General Public License along 22 with the vinetto package; if not, write to the Free Software 23 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 24 25----------------------------------------------------------------------------- 26""" 27 28__revision__ = "$Revision: 59 $" 29__version__ = "0.06" 30__author__ = 'Michel Roukine' 31 32import sys 33import os 34import StringIO 35import md5 36import vinreport 37 38from optparse import OptionParser 39from struct import unpack 40from binascii import unhexlify 41from time import ctime 42from vinutils import addCatEntry, nbCatEnt, nbTNstr, TNfname, catIndxOutOfSeq, \ 43 tnStreamOutOfSeq, addTNStream, extractStats 44 45PPS_TYPES = ["undefined", "storage", "stream", "undefined", "undefined", "root"] 46EOI = unhexlify('ffd9') 47HEADER1 = unhexlify('0c00') 48PIL = "" 49NUMBERED_THUMBS_DIR = ".thumbs" 50 51 52def getargs(): 53 """Return arguments passed to vinetto on the command line. """ 54 verstr = "%prog " + __version__ + " (r" + __revision__[11:-2] +")\n\n" +\ 55 "Copyright (C) 2005-2006 Michel Roukine.\n" + \ 56 "Vinetto is open source software," + \ 57 " see http://vinetto.sourceforge.net/" 58 parser = OptionParser(usage="%prog [OPTIONS] [-s] [-U] [-o DIR] file", version=verstr) 59 parser.add_option("-o", dest="outputdir", 60 help="write thumbnails to DIR", metavar="DIR") 61 parser.set_defaults(htmlrep=False) 62 parser.add_option("-H", action="store_true", dest="htmlrep", 63 help="write html report to DIR") 64 parser.set_defaults(encoding=False) 65 parser.add_option("-U", action="store_true", dest="encoding", 66 help="use utf8 encodings") 67 parser.set_defaults(symlinks=False) 68 parser.add_option("-s", action="store_true", dest="symlinks", 69 help="create symlink of the image realname to the numbered name in DIR/.thumbs") 70 opts, pargs = parser.parse_args() 71 72 if len(pargs) != 1: 73 parser.error("incorrect number of arguments") 74 75 if (opts.outputdir == None) and (opts.htmlrep == True): 76 parser.error("-H option requires -o with a directory name") 77 os.system("mkdir " + opts.outputdir + "/" + NUMBERED_THUMBS_DIR) 78 return (pargs[0], opts.outputdir, opts.htmlrep, opts.encoding, opts.symlinks) 79 80 81def getencodings(): 82 """What encoding we use?""" 83 if utf8encoding: 84 fileencoding = 'utf8' 85 else: 86 fileencoding = 'iso-8859-1' 87 return fileencoding 88 89def encodefilename(originame): 90 """Convert filename to the global encoding. """ 91 TNname = unicode(originame, 92 'utf-16-le').encode(getencodings(), 'replace') 93 return TNname 94 95def conv2pytime (win32filetime): 96 """Convert win32 timestamp to python time. """ 97 SECS_BETWEEN_EPOCHS = 11644473600 98 SECS_TO_100NS = 10000000 99 100 if win32filetime == 0: 101 return 0 102 else: 103 return (win32filetime / SECS_TO_100NS) - SECS_BETWEEN_EPOCHS 104 105 106def nextBlock (TDB, Table, indx): 107 """Return next block. """ 108 iFAT = indx / 128 # FAT block number to search in 109 iSECT = indx % 128 # SECTor to search in the FAT block 110 offst = Table [iFAT] * 512 + 0x200 + iSECT*4 111 return unpack("<l", TDB[offst:offst+4])[0] 112 113# Beginning ... 114tDBfname, outputdir, htmlrep, utf8encoding, symlinks = getargs() 115 116# Testing file and DIR parameters 117if not os.access(tDBfname, os.F_OK): 118 print >> sys.stderr, "Error: ", tDBfname, "does not exist" 119 sys.exit(1) 120elif not os.path.isfile(tDBfname): 121 print >> sys.stderr, "Error: ", tDBfname, "not a file" 122 sys.exit(1) 123elif not os.access(tDBfname, os.R_OK): 124 print >> sys.stderr, "Error: ", tDBfname, "not readable" 125 sys.exit(1) 126 127# Opening Thumbs.db file 128thumbsDB = open(tDBfname,"rb").read() 129longueur = len(thumbsDB) 130if (longueur % 512 ) != 0: 131 print >> sys.stderr, " ** Warning: length " + tDBfname + " : " \ 132 + str(longueur) + ", non multiple 512" 133 134# Initializing extraction and optional html report 135if outputdir != None : 136 if not os.path.exists(outputdir): 137 try : 138 os.mkdir(outputdir) 139 print 'Note: ', outputdir, ' was created' 140 except EnvironmentError, e: 141 print >> sys.stderr, "Error creating", outputdir 142 sys.exit(1) 143 elif not os.path.isdir(outputdir): 144 print >> sys.stderr, 'Error: ', outputdir, ' is not a directory' 145 sys.exit(1) 146 elif not os.access(outputdir, os.W_OK): 147 print >> sys.stderr, 'Error: ', outputdir, ' not writable' 148 sys.exit(1) 149 outputdir += "/" 150 151 try: 152 from PIL import Image 153 PIL = "imported" 154 except ImportError, e: 155 print >> sys.stderr, "" 156 print >> sys.stderr, " ** Warning: Cannot find \"Image\" module." 157 print >> sys.stderr, " Vinetto will only extract Type 2 thumbnails." 158 print >> sys.stderr, "" 159 160 header = open("/usr/local/share/py38-vinetto/header","rb").read() 161 quantization = open("/usr/local/share/py38-vinetto/quantization","rb").read() 162 huffman = open("/usr/local/share/py38-vinetto/huffman","rb").read() 163 164 if htmlrep == True: 165 report = vinreport.HtRep(tDBfname, outputdir, getencodings(), __version__ + " (r" + \ 166 __revision__[11:-2] +")") 167 md5tDB = md5.new(thumbsDB).hexdigest() 168 report.SetFileSection(longueur, md5tDB) 169 170# ----------------------------------------------------------------------------- 171# Analyzing header block ... 172 173if "\xd0\xcf\x11\xe0\xa1\xb1\x1a\xe1" != thumbsDB[0:8] : 174 print >> sys.stderr, " *** Error " + tDBfname + ": header signature error " 175 sys.exit(8) 176 177nbFATblocks = unpack("<l", thumbsDB[0x2c:0x2c+4])[0] 178 179REfirstBlock = unpack("<l", thumbsDB[0x30:0x30+4])[0] #Root directory 1st block 180 181firstMiniFATblock = unpack("<l", thumbsDB[0x3c:0x3c+4])[0] 182 183FATblocks = [] 184for i in range(nbFATblocks): 185 offset = 0x4c + i*4 186 FATblocks.append(unpack("<l", thumbsDB[offset:offset + 4])[0]) 187 188# ----------------------------------------------------------------------------- 189# Analyzing Root Entry directory ... 190 191currentBlock = REfirstBlock 192 193i = firstMiniFATblock 194MiniFATblocks = [] 195while i != -2: 196 MiniFATblocks.append(i) 197 i = nextBlock (thumbsDB, FATblocks, i) 198 199offset = 0x200 + currentBlock * 0x200 200firstMiniFATstreamBlock = unpack("<l", thumbsDB[offset+0x74:offset+0x78])[0] 201 202i = firstMiniFATstreamBlock 203MiniFATstreamBlocks = [] 204while i != -2: 205 MiniFATstreamBlocks.append(i) 206 i = nextBlock (thumbsDB, FATblocks, i) 207 208SID = 0 209 210while currentBlock != -2: 211 offset = 0x200 + currentBlock * 0x200 212 for i in range (offset, offset+0x200, 0x80): 213 pps_rawname = unpack("64s", thumbsDB[i :i+0x40])[0] 214 pps_sizeofname = unpack("<h", thumbsDB[i+0x40:i+0x42])[0] 215 pps_type = unpack("b", thumbsDB[i+0x42:i+0x43])[0] 216 pps_ts1 = unpack("<Q", thumbsDB[i+0x64:i+0x6c])[0] 217 pps_ts2 = unpack("<Q", thumbsDB[i+0x6c:i+0x74])[0] 218 pps_sb = unpack("<l", thumbsDB[i+0x74:i+0x78])[0] 219 pps_size = unpack("<l", thumbsDB[i+0x78:i+0x7c])[0] 220 221 if pps_type == 2: # stream files extraction 222 rawname = unicode(pps_rawname,"utf-16-le")[0:(pps_sizeofname/2 - 1)] 223 #SIDstr = "%04i" % SID 224 SIDstr = rawname [::-1] 225 if len(SIDstr) < 4: 226 SIDstr = "%04i" % int(SIDstr) 227 bytesToWrite = pps_size 228 229 if pps_size >= 4096 : # stream located in the FAT 230 sr = "" 231 currentStreamBlock = pps_sb 232 while currentStreamBlock != -2: 233 sOffset = 0x200 + currentStreamBlock * 0x200 234 if bytesToWrite >= 512: 235 sr = sr + thumbsDB[sOffset:sOffset + 512] 236 else: 237 sr = sr + thumbsDB[sOffset:sOffset + bytesToWrite] 238 bytesToWrite = bytesToWrite - 512 239 currentStreamBlock = nextBlock (thumbsDB, FATblocks, 240 currentStreamBlock) 241 242 else: # stream located in the MiniFAT 243 sr = "" 244 currentStreamMiniBlock = pps_sb 245 while currentStreamMiniBlock != -2 : 246 # computing offset of the miniBlock to copy 247 # 1 : which block of the miniFATstream ? 248 nb = currentStreamMiniBlock / 8 249 # 2 : where is this block ? 250 bl = MiniFATstreamBlocks[nb] 251 # 3 : which offset from the start of block ? 252 ioffset = (currentStreamMiniBlock % 8) * 64 253 254 sOffset = 0x200 + bl*0x200 + ioffset 255 256 if bytesToWrite >= 64: 257 sr = sr + thumbsDB[sOffset:sOffset + 64 ] 258 else: 259 sr = sr + thumbsDB[sOffset:sOffset + bytesToWrite] 260 bytesToWrite = bytesToWrite - 64 261 # computing next currentStreamMiniBlock 262 currentStreamMiniBlock = nextBlock (thumbsDB, 263 MiniFATblocks, 264 currentStreamMiniBlock) 265 266 # extraction stream processing ... --------------------------------- 267 268 longueur = len(sr) 269 270 # is this a Catalog ? 271 if rawname == "Catalog" : 272 # ------------------------------------------------------------- 273 # Skipping catalog header block ... 274 275 recordLen = unpack("<h", sr[0:2])[0] 276 indcat = recordLen 277 SID = SID - 1 278 279 # ------------------------------------------------------------- 280 # Analyzing Catalog entries ... 281 282 while indcat < longueur : 283 recordLen = unpack("<h", sr[indcat :indcat+2])[0] 284 num = unpack("<l", sr[indcat+4 :indcat+8])[0] 285 timestamp = unpack("<Q", sr[indcat+8 :indcat+16])[0] 286 nameLen = recordLen - 0x14 287 288 originame = sr[indcat+16 :indcat+16+nameLen] 289 TNid = "%04i" % num 290 TNtimestamp = ctime(conv2pytime(timestamp)) 291 TNname = encodefilename(originame) 292 if symlinks: 293 os.system( "ln -fs " + NUMBERED_THUMBS_DIR + "/" + TNid + ".jpg " + "\"" + \ 294 outputdir + "/" + TNname + "\"" ) 295 print " " + TNid, " ", TNtimestamp, " ", TNname 296 addCatEntry(num, TNtimestamp, TNname) 297 indcat = indcat + recordLen 298 299 else : 300 301 # is EOI = 0xffd9 ? 302 if sr[longueur-2:longueur] != EOI: 303 print >> sys.stderr, " *** Err: missing EOI in stream", SID 304 sys.exit(2) 305 # --------------------------- header 1 ------------------------ 306 # is first header OK ? 307 if sr[0:2] != HEADER1: 308 print >> sys.stderr, \ 309 " *** Err: unrecognized header in stream", SID 310 sys.exit(3) 311 312 # is length OK ? 313 if unpack("<H", sr[8:10])[0] != (longueur - 0x0c) : 314 print >> sys.stderr, " *** Err: length error in stream", SID 315 sys.exit(4) 316 # --------------------------- header 2 ------------------------ 317 # is it a type 2 thumbnail ? (full jpeg) 318 if sr[0x0c:0x10] == "\xff\xd8\xff\xe0" : 319 if outputdir != None : 320 open(outputdir + "/" + NUMBERED_THUMBS_DIR + "/" + TNfname(SIDstr, "2") + ".jpg", \ 321 "wb").write(sr[0x0c:]) 322 elif unpack("<L",sr[0x0c:0x10])[0] == 1 : 323 # is second header OK ? 324 if unpack("<H", sr[0x0c+4:0x0c+6])[0] != (longueur - 325 0x0c - 0x10): 326 print >> sys.stderr, \ 327 " *** Err : length(2) error in stream", SID 328 sys.exit(5) 329 # Type 1 TN processing ... 330 if (PIL == "imported") and (outputdir != None): 331 type1sr = header[:0x14] 332 type1sr = type1sr + quantization 333 type1sr = type1sr + sr[0x1e:0x34] 334 type1sr = type1sr + huffman 335 type1sr = type1sr + sr[0x34:] 336 337 im = Image.open(StringIO.StringIO(type1sr)) 338 r, g, b, a = im.split() 339 im = Image.merge("RGB", (b, g, r)) 340 im = im.transpose(Image.FLIP_TOP_BOTTOM) 341 im.save(outputdir + TNfname(SIDstr, "1") + ".jpg", \ 342 "JPEG", quality=100) 343 elif outputdir != None: # Cannot extract : PIL not imported 344 addTNStream(int(SIDstr), "1", "") 345 else : 346 print >> sys.stderr, \ 347 " *** Err : header (2) error in stream", SID 348 sys.exit(6) 349 350 # ----------------------------------------------------------------- 351 352 elif pps_type == 5: # Root Entry 353 REtimestamp = pps_ts2 354 print 355 print " Root Entry modify timestamp :", \ 356 ctime(conv2pytime(REtimestamp)) 357 if htmlrep == True: 358 report.SetREtst (ctime(conv2pytime(REtimestamp))) 359 print 360 print " ------------------------------------------------------" 361 print 362 363 SID = SID + 1 364 365 currentBlock = nextBlock(thumbsDB, FATblocks, currentBlock) 366 367print 368print " ------------------------------------------------------" 369print 370 371if catIndxOutOfSeq() == True: 372 print >> sys.stderr, " * Info: ", tDBfname, "Catalog",\ 373 " index number out of usual sequence" 374 375if tnStreamOutOfSeq() == True: 376 print >> sys.stderr, " * Info: ", tDBfname, \ 377 "thumbnail stream index number out of usual sequence" 378 379if (outputdir != None) and (nbCatEnt() != nbTNstr()): 380 print >> sys.stderr, " ** Warning: ", tDBfname, " -- number of extracted ", \ 381 "thumbnails does not match number of Catalog entries" 382 383statstring = extractStats(outputdir) 384print statstring 385 386if htmlrep == True: 387 report.flush(statstring) 388 389