1# Copyright (c) 2003-2016 CORE Security Technologies
2#
3# This software is provided under under a slightly modified version
4# of the Apache Software License. See the accompanying LICENSE file
5# for more information.
6#
7# Description:
8#             Microsoft Extensive Storage Engine parser, just focused on trying
9#             to parse NTDS.dit files (not meant as a full parser, although it might work)
10#
11# Author:
12#  Alberto Solino (@agsolino)
13#
14# Reference for:
15#  Structure.
16#
17# Excellent reference done by Joachim Metz
18# http://forensic-proof.com/wp-content/uploads/2011/07/Extensible-Storage-Engine-ESE-Database-File-EDB-format.pdf
19#
20# ToDo:
21# [ ] Parse multi-values properly
22# [ ] Support long values properly
23
24from impacket import LOG
25try:
26    from collections import OrderedDict
27except:
28    try:
29        from ordereddict.ordereddict import OrderedDict
30    except:
31        from ordereddict import OrderedDict
32from impacket.structure import Structure
33from struct import unpack
34from binascii import hexlify
35
36# Constants
37
38FILE_TYPE_DATABASE       = 0
39FILE_TYPE_STREAMING_FILE = 1
40
41# Database state
42JET_dbstateJustCreated    = 1
43JET_dbstateDirtyShutdown  = 2
44JET_dbstateCleanShutdown  = 3
45JET_dbstateBeingConverted = 4
46JET_dbstateForceDetach    = 5
47
48# Page Flags
49FLAGS_ROOT         = 1
50FLAGS_LEAF         = 2
51FLAGS_PARENT       = 4
52FLAGS_EMPTY        = 8
53FLAGS_SPACE_TREE   = 0x20
54FLAGS_INDEX        = 0x40
55FLAGS_LONG_VALUE   = 0x80
56FLAGS_NEW_FORMAT   = 0x2000
57FLAGS_NEW_CHECKSUM = 0x2000
58
59# Tag Flags
60TAG_UNKNOWN = 0x1
61TAG_DEFUNCT = 0x2
62TAG_COMMON  = 0x4
63
64# Fixed Page Numbers
65DATABASE_PAGE_NUMBER           = 1
66CATALOG_PAGE_NUMBER            = 4
67CATALOG_BACKUP_PAGE_NUMBER     = 24
68
69# Fixed FatherDataPages
70DATABASE_FDP         = 1
71CATALOG_FDP          = 2
72CATALOG_BACKUP_FDP   = 3
73
74# Catalog Types
75CATALOG_TYPE_TABLE        = 1
76CATALOG_TYPE_COLUMN       = 2
77CATALOG_TYPE_INDEX        = 3
78CATALOG_TYPE_LONG_VALUE   = 4
79CATALOG_TYPE_CALLBACK     = 5
80
81# Column Types
82JET_coltypNil          = 0
83JET_coltypBit          = 1
84JET_coltypUnsignedByte = 2
85JET_coltypShort        = 3
86JET_coltypLong         = 4
87JET_coltypCurrency     = 5
88JET_coltypIEEESingle   = 6
89JET_coltypIEEEDouble   = 7
90JET_coltypDateTime     = 8
91JET_coltypBinary       = 9
92JET_coltypText         = 10
93JET_coltypLongBinary   = 11
94JET_coltypLongText     = 12
95JET_coltypSLV          = 13
96JET_coltypUnsignedLong = 14
97JET_coltypLongLong     = 15
98JET_coltypGUID         = 16
99JET_coltypUnsignedShort= 17
100JET_coltypMax          = 18
101
102ColumnTypeToName = {
103    JET_coltypNil          : 'NULL',
104    JET_coltypBit          : 'Boolean',
105    JET_coltypUnsignedByte : 'Signed byte',
106    JET_coltypShort        : 'Signed short',
107    JET_coltypLong         : 'Signed long',
108    JET_coltypCurrency     : 'Currency',
109    JET_coltypIEEESingle   : 'Single precision FP',
110    JET_coltypIEEEDouble   : 'Double precision FP',
111    JET_coltypDateTime     : 'DateTime',
112    JET_coltypBinary       : 'Binary',
113    JET_coltypText         : 'Text',
114    JET_coltypLongBinary   : 'Long Binary',
115    JET_coltypLongText     : 'Long Text',
116    JET_coltypSLV          : 'Obsolete',
117    JET_coltypUnsignedLong : 'Unsigned long',
118    JET_coltypLongLong     : 'Long long',
119    JET_coltypGUID         : 'GUID',
120    JET_coltypUnsignedShort: 'Unsigned short',
121    JET_coltypMax          : 'Max',
122}
123
124ColumnTypeSize = {
125    JET_coltypNil          : None,
126    JET_coltypBit          : (1,'B'),
127    JET_coltypUnsignedByte : (1,'B'),
128    JET_coltypShort        : (2,'<h'),
129    JET_coltypLong         : (4,'<l'),
130    JET_coltypCurrency     : (8,'<Q'),
131    JET_coltypIEEESingle   : (4,'<f'),
132    JET_coltypIEEEDouble   : (8,'<d'),
133    JET_coltypDateTime     : (8,'<Q'),
134    JET_coltypBinary       : None,
135    JET_coltypText         : None,
136    JET_coltypLongBinary   : None,
137    JET_coltypLongText     : None,
138    JET_coltypSLV          : None,
139    JET_coltypUnsignedLong : (4,'<L'),
140    JET_coltypLongLong     : (8,'<Q'),
141    JET_coltypGUID         : (16,'16s'),
142    JET_coltypUnsignedShort: (2,'<H'),
143    JET_coltypMax          : None,
144}
145
146# Tagged Data Type Flags
147TAGGED_DATA_TYPE_VARIABLE_SIZE = 1
148TAGGED_DATA_TYPE_COMPRESSED    = 2
149TAGGED_DATA_TYPE_STORED        = 4
150TAGGED_DATA_TYPE_MULTI_VALUE   = 8
151TAGGED_DATA_TYPE_WHO_KNOWS     = 10
152
153# Code pages
154CODEPAGE_UNICODE = 1200
155CODEPAGE_ASCII   = 20127
156CODEPAGE_WESTERN = 1252
157
158StringCodePages = {
159    CODEPAGE_UNICODE : 'utf-16le',
160    CODEPAGE_ASCII   : 'ascii',
161    CODEPAGE_WESTERN : 'cp1252',
162}
163
164# Structures
165
166TABLE_CURSOR = {
167    'TableData' : '',
168    'FatherDataPageNumber': 0,
169    'CurrentPageData' : '',
170    'CurrentTag' : 0,
171}
172
173class ESENT_JET_SIGNATURE(Structure):
174    structure = (
175        ('Random','<L=0'),
176        ('CreationTime','<Q=0'),
177        ('NetBiosName','16s=""'),
178    )
179
180class ESENT_DB_HEADER(Structure):
181    structure = (
182        ('CheckSum','<L=0'),
183        ('Signature','"\xef\xcd\xab\x89'),
184        ('Version','<L=0'),
185        ('FileType','<L=0'),
186        ('DBTime','<Q=0'),
187        ('DBSignature',':',ESENT_JET_SIGNATURE),
188        ('DBState','<L=0'),
189        ('ConsistentPosition','<Q=0'),
190        ('ConsistentTime','<Q=0'),
191        ('AttachTime','<Q=0'),
192        ('AttachPosition','<Q=0'),
193        ('DetachTime','<Q=0'),
194        ('DetachPosition','<Q=0'),
195        ('LogSignature',':',ESENT_JET_SIGNATURE),
196        ('Unknown','<L=0'),
197        ('PreviousBackup','24s=""'),
198        ('PreviousIncBackup','24s=""'),
199        ('CurrentFullBackup','24s=""'),
200        ('ShadowingDisables','<L=0'),
201        ('LastObjectID','<L=0'),
202        ('WindowsMajorVersion','<L=0'),
203        ('WindowsMinorVersion','<L=0'),
204        ('WindowsBuildNumber','<L=0'),
205        ('WindowsServicePackNumber','<L=0'),
206        ('FileFormatRevision','<L=0'),
207        ('PageSize','<L=0'),
208        ('RepairCount','<L=0'),
209        ('RepairTime','<Q=0'),
210        ('Unknown2','28s=""'),
211        ('ScrubTime','<Q=0'),
212        ('RequiredLog','<Q=0'),
213        ('UpgradeExchangeFormat','<L=0'),
214        ('UpgradeFreePages','<L=0'),
215        ('UpgradeSpaceMapPages','<L=0'),
216        ('CurrentShadowBackup','24s=""'),
217        ('CreationFileFormatVersion','<L=0'),
218        ('CreationFileFormatRevision','<L=0'),
219        ('Unknown3','16s=""'),
220        ('OldRepairCount','<L=0'),
221        ('ECCCount','<L=0'),
222        ('LastECCTime','<Q=0'),
223        ('OldECCFixSuccessCount','<L=0'),
224        ('ECCFixErrorCount','<L=0'),
225        ('LastECCFixErrorTime','<Q=0'),
226        ('OldECCFixErrorCount','<L=0'),
227        ('BadCheckSumErrorCount','<L=0'),
228        ('LastBadCheckSumTime','<Q=0'),
229        ('OldCheckSumErrorCount','<L=0'),
230        ('CommittedLog','<L=0'),
231        ('PreviousShadowCopy','24s=""'),
232        ('PreviousDifferentialBackup','24s=""'),
233        ('Unknown4','40s=""'),
234        ('NLSMajorVersion','<L=0'),
235        ('NLSMinorVersion','<L=0'),
236        ('Unknown5','148s=""'),
237        ('UnknownFlags','<L=0'),
238    )
239
240class ESENT_PAGE_HEADER(Structure):
241    structure_2003_SP0 = (
242        ('CheckSum','<L=0'),
243        ('PageNumber','<L=0'),
244    )
245    structure_0x620_0x0b = (
246        ('CheckSum','<L=0'),
247        ('ECCCheckSum','<L=0'),
248    )
249    structure_win7 = (
250        ('CheckSum','<Q=0'),
251    )
252    common = (
253        ('LastModificationTime','<Q=0'),
254        ('PreviousPageNumber','<L=0'),
255        ('NextPageNumber','<L=0'),
256        ('FatherDataPage','<L=0'),
257        ('AvailableDataSize','<H=0'),
258        ('AvailableUncommittedDataSize','<H=0'),
259        ('FirstAvailableDataOffset','<H=0'),
260        ('FirstAvailablePageTag','<H=0'),
261        ('PageFlags','<L=0'),
262    )
263    extended_win7 = (
264        ('ExtendedCheckSum1','<Q=0'),
265        ('ExtendedCheckSum2','<Q=0'),
266        ('ExtendedCheckSum3','<Q=0'),
267        ('PageNumber','<Q=0'),
268        ('Unknown','<Q=0'),
269    )
270    def __init__(self, version, revision, pageSize=8192, data=None):
271        if (version < 0x620) or (version == 0x620 and revision < 0x0b):
272            # For sure the old format
273            self.structure = self.structure_2003_SP0 + self.common
274        elif version == 0x620 and revision < 0x11:
275            # Exchange 2003 SP1 and Windows Vista and later
276            self.structure = self.structure_0x620_0x0b + self.common
277        else:
278            # Windows 7 and later
279            self.structure = self.structure_win7 + self.common
280            if pageSize > 8192:
281                self.structure += self.extended_win7
282
283        Structure.__init__(self,data)
284
285class ESENT_ROOT_HEADER(Structure):
286    structure = (
287        ('InitialNumberOfPages','<L=0'),
288        ('ParentFatherDataPage','<L=0'),
289        ('ExtentSpace','<L=0'),
290        ('SpaceTreePageNumber','<L=0'),
291    )
292
293class ESENT_BRANCH_HEADER(Structure):
294    structure = (
295        ('CommonPageKey',':'),
296    )
297
298class ESENT_BRANCH_ENTRY(Structure):
299    common = (
300        ('CommonPageKeySize','<H=0'),
301    )
302    structure = (
303        ('LocalPageKeySize','<H=0'),
304        ('_LocalPageKey','_-LocalPageKey','self["LocalPageKeySize"]'),
305        ('LocalPageKey',':'),
306        ('ChildPageNumber','<L=0'),
307    )
308    def __init__(self, flags, data=None):
309        if flags & TAG_COMMON > 0:
310            # Include the common header
311            self.structure = self.common + self.structure
312        Structure.__init__(self,data)
313
314class ESENT_LEAF_HEADER(Structure):
315    structure = (
316        ('CommonPageKey',':'),
317    )
318
319class ESENT_LEAF_ENTRY(Structure):
320    common = (
321        ('CommonPageKeySize','<H=0'),
322    )
323    structure = (
324        ('LocalPageKeySize','<H=0'),
325        ('_LocalPageKey','_-LocalPageKey','self["LocalPageKeySize"]'),
326        ('LocalPageKey',':'),
327        ('EntryData',':'),
328    )
329    def __init__(self, flags, data=None):
330        if flags & TAG_COMMON > 0:
331            # Include the common header
332            self.structure = self.common + self.structure
333        Structure.__init__(self,data)
334
335class ESENT_SPACE_TREE_HEADER(Structure):
336    structure = (
337        ('Unknown','<Q=0'),
338    )
339
340class ESENT_SPACE_TREE_ENTRY(Structure):
341    structure = (
342        ('PageKeySize','<H=0'),
343        ('LastPageNumber','<L=0'),
344        ('NumberOfPages','<L=0'),
345    )
346
347class ESENT_INDEX_ENTRY(Structure):
348    structure = (
349        ('RecordPageKey',':'),
350    )
351
352class ESENT_DATA_DEFINITION_HEADER(Structure):
353    structure = (
354        ('LastFixedSize','<B=0'),
355        ('LastVariableDataType','<B=0'),
356        ('VariableSizeOffset','<H=0'),
357    )
358
359class ESENT_CATALOG_DATA_DEFINITION_ENTRY(Structure):
360    fixed = (
361        ('FatherDataPageID','<L=0'),
362        ('Type','<H=0'),
363        ('Identifier','<L=0'),
364    )
365
366    column_stuff = (
367        ('ColumnType','<L=0'),
368        ('SpaceUsage','<L=0'),
369        ('ColumnFlags','<L=0'),
370        ('CodePage','<L=0'),
371    )
372
373    other = (
374        ('FatherDataPageNumber','<L=0'),
375    )
376
377    table_stuff = (
378        ('SpaceUsage','<L=0'),
379#        ('TableFlags','<L=0'),
380#        ('InitialNumberOfPages','<L=0'),
381    )
382
383    index_stuff = (
384        ('SpaceUsage','<L=0'),
385        ('IndexFlags','<L=0'),
386        ('Locale','<L=0'),
387    )
388
389    lv_stuff = (
390        ('SpaceUsage','<L=0'),
391#        ('LVFlags','<L=0'),
392#        ('InitialNumberOfPages','<L=0'),
393    )
394    common = (
395#        ('RootFlag','<B=0'),
396#        ('RecordOffset','<H=0'),
397#        ('LCMapFlags','<L=0'),
398#        ('KeyMost','<H=0'),
399        ('Trailing',':'),
400    )
401
402    def __init__(self,data):
403        # Depending on the type of data we'll end up building a different struct
404        dataType = unpack('<H', data[4:][:2])[0]
405        self.structure = self.fixed
406
407        if dataType == CATALOG_TYPE_TABLE:
408            self.structure += self.other + self.table_stuff
409        elif dataType == CATALOG_TYPE_COLUMN:
410            self.structure += self.column_stuff
411        elif dataType == CATALOG_TYPE_INDEX:
412            self.structure += self.other + self.index_stuff
413        elif dataType == CATALOG_TYPE_LONG_VALUE:
414            self.structure += self.other + self.lv_stuff
415        elif dataType == CATALOG_TYPE_CALLBACK:
416            raise Exception('CallBack types not supported!')
417        else:
418            LOG.error('Unknown catalog type 0x%x' % dataType)
419            self.structure = ()
420            Structure.__init__(self,data)
421
422        self.structure += self.common
423
424        Structure.__init__(self,data)
425
426
427def pretty_print(x):
428    if x in '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ ':
429       return x
430    else:
431       return '.'
432
433def hexdump(data):
434    x=str(data)
435    strLen = len(x)
436    i = 0
437    while i < strLen:
438        print "%04x  " % i,
439        for j in range(16):
440            if i+j < strLen:
441                print "%02X" % ord(x[i+j]),
442
443            else:
444                print "  ",
445            if j%16 == 7:
446                print "",
447        print " ",
448        print ''.join(pretty_print(x) for x in x[i:i+16] )
449        i += 16
450
451def getUnixTime(t):
452    t -= 116444736000000000
453    t /= 10000000
454    return t
455
456class ESENT_PAGE:
457    def __init__(self, db, data=None):
458        self.__DBHeader = db
459        self.data = data
460        self.record = None
461        if data is not None:
462            self.record = ESENT_PAGE_HEADER(self.__DBHeader['Version'], self.__DBHeader['FileFormatRevision'], self.__DBHeader['PageSize'], data)
463
464    def printFlags(self):
465        flags = self.record['PageFlags']
466        if flags & FLAGS_EMPTY:
467            print "\tEmpty"
468        if flags & FLAGS_INDEX:
469            print "\tIndex"
470        if flags & FLAGS_LEAF:
471            print "\tLeaf"
472        else:
473            print "\tBranch"
474        if flags & FLAGS_LONG_VALUE:
475            print "\tLong Value"
476        if flags & FLAGS_NEW_CHECKSUM:
477            print "\tNew Checksum"
478        if flags & FLAGS_NEW_FORMAT:
479            print "\tNew Format"
480        if flags & FLAGS_PARENT:
481            print "\tParent"
482        if flags & FLAGS_ROOT:
483            print "\tRoot"
484        if flags & FLAGS_SPACE_TREE:
485            print "\tSpace Tree"
486
487    def dump(self):
488        baseOffset = len(self.record)
489        self.record.dump()
490        tags = self.data[-4*self.record['FirstAvailablePageTag']:]
491
492        print "FLAGS: "
493        self.printFlags()
494
495        print
496
497        for i in range(self.record['FirstAvailablePageTag']):
498            tag = tags[-4:]
499            if self.__DBHeader['Version'] == 0x620 and self.__DBHeader['FileFormatRevision'] > 11 and self.__DBHeader['PageSize'] > 8192:
500                valueSize = unpack('<H', tag[:2])[0] & 0x7fff
501                valueOffset = unpack('<H',tag[2:])[0] & 0x7fff
502                hexdump((self.data[baseOffset+valueOffset:][:6]))
503                pageFlags = ord(self.data[baseOffset+valueOffset:][1]) >> 5
504                #print "TAG FLAG: 0x%x " % (unpack('<L', self.data[baseOffset+valueOffset:][:4]) ) >> 5
505                #print "TAG FLAG: 0x " , ord(self.data[baseOffset+valueOffset:][0])
506            else:
507                valueSize = unpack('<H', tag[:2])[0] & 0x1fff
508                pageFlags = (unpack('<H', tag[2:])[0] & 0xe000) >> 13
509                valueOffset = unpack('<H',tag[2:])[0] & 0x1fff
510
511            print "TAG %-8d offset:0x%-6x flags:0x%-4x valueSize:0x%x" % (i,valueOffset,pageFlags,valueSize)
512            #hexdump(self.getTag(i)[1])
513            tags = tags[:-4]
514
515        if self.record['PageFlags'] & FLAGS_ROOT > 0:
516            rootHeader = ESENT_ROOT_HEADER(self.getTag(0)[1])
517            rootHeader.dump()
518        elif self.record['PageFlags'] & FLAGS_LEAF == 0:
519            # Branch Header
520            flags, data = self.getTag(0)
521            branchHeader = ESENT_BRANCH_HEADER(data)
522            branchHeader.dump()
523        else:
524            # Leaf Header
525            flags, data = self.getTag(0)
526            if self.record['PageFlags'] & FLAGS_SPACE_TREE > 0:
527                # Space Tree
528                spaceTreeHeader = ESENT_SPACE_TREE_HEADER(data)
529                spaceTreeHeader.dump()
530            else:
531                leafHeader = ESENT_LEAF_HEADER(data)
532                leafHeader.dump()
533
534        # Print the leaf/branch tags
535        for tagNum in range(1,self.record['FirstAvailablePageTag']):
536            flags, data = self.getTag(tagNum)
537            if self.record['PageFlags'] & FLAGS_LEAF == 0:
538                # Branch page
539                branchEntry = ESENT_BRANCH_ENTRY(flags, data)
540                branchEntry.dump()
541            elif self.record['PageFlags'] & FLAGS_LEAF > 0:
542                # Leaf page
543                if self.record['PageFlags'] & FLAGS_SPACE_TREE > 0:
544                    # Space Tree
545                    spaceTreeEntry = ESENT_SPACE_TREE_ENTRY(data)
546                    #spaceTreeEntry.dump()
547
548                elif self.record['PageFlags'] & FLAGS_INDEX > 0:
549                    # Index Entry
550                    indexEntry = ESENT_INDEX_ENTRY(data)
551                    #indexEntry.dump()
552                elif self.record['PageFlags'] & FLAGS_LONG_VALUE > 0:
553                    # Long Page Value
554                    raise Exception('Long value still not supported')
555                else:
556                    # Table Value
557                    leafEntry = ESENT_LEAF_ENTRY(flags, data)
558                    dataDefinitionHeader = ESENT_DATA_DEFINITION_HEADER(leafEntry['EntryData'])
559                    dataDefinitionHeader.dump()
560                    catalogEntry = ESENT_CATALOG_DATA_DEFINITION_ENTRY(leafEntry['EntryData'][len(dataDefinitionHeader):])
561                    catalogEntry.dump()
562                    hexdump(leafEntry['EntryData'])
563
564    def getTag(self, tagNum):
565        if self.record['FirstAvailablePageTag'] < tagNum:
566            raise Exception('Trying to grab an unknown tag 0x%x' % tagNum)
567
568        tags = self.data[-4*self.record['FirstAvailablePageTag']:]
569        baseOffset = len(self.record)
570        for i in range(tagNum):
571            tags = tags[:-4]
572
573        tag = tags[-4:]
574
575        if self.__DBHeader['Version'] == 0x620 and self.__DBHeader['FileFormatRevision'] >= 17 and self.__DBHeader['PageSize'] > 8192:
576            valueSize = unpack('<H', tag[:2])[0] & 0x7fff
577            valueOffset = unpack('<H',tag[2:])[0] & 0x7fff
578            tmpData = list(self.data[baseOffset+valueOffset:][:valueSize])
579            pageFlags = ord(tmpData[1]) >> 5
580            tmpData[1] = chr(ord(tmpData[1]) & 0x1f)
581            tagData = "".join(tmpData)
582        else:
583            valueSize = unpack('<H', tag[:2])[0] & 0x1fff
584            pageFlags = (unpack('<H', tag[2:])[0] & 0xe000) >> 13
585            valueOffset = unpack('<H',tag[2:])[0] & 0x1fff
586            tagData = self.data[baseOffset+valueOffset:][:valueSize]
587
588        #return pageFlags, self.data[baseOffset+valueOffset:][:valueSize]
589        return pageFlags, tagData
590
591class ESENT_DB:
592    def __init__(self, fileName, pageSize = 8192, isRemote = False):
593        self.__fileName = fileName
594        self.__pageSize = pageSize
595        self.__DB = None
596        self.__DBHeader = None
597        self.__totalPages = None
598        self.__tables = OrderedDict()
599        self.__currentTable = None
600        self.__isRemote = isRemote
601        self.mountDB()
602
603    def mountDB(self):
604        LOG.debug("Mounting DB...")
605        if self.__isRemote is True:
606            self.__DB = self.__fileName
607            self.__DB.open()
608        else:
609            self.__DB = open(self.__fileName,"rb")
610        mainHeader = self.getPage(-1)
611        self.__DBHeader = ESENT_DB_HEADER(mainHeader)
612        self.__pageSize = self.__DBHeader['PageSize']
613        self.__DB.seek(0,2)
614        self.__totalPages = (self.__DB.tell() / self.__pageSize) -2
615        LOG.debug("Database Version:0x%x, Revision:0x%x"% (self.__DBHeader['Version'], self.__DBHeader['FileFormatRevision']))
616        LOG.debug("Page Size: %d" % self.__pageSize)
617        LOG.debug("Total Pages in file: %d" % self.__totalPages)
618        self.parseCatalog(CATALOG_PAGE_NUMBER)
619
620    def printCatalog(self):
621        indent = '    '
622
623        print "Database version: 0x%x, 0x%x" % (self.__DBHeader['Version'], self.__DBHeader['FileFormatRevision'] )
624        print "Page size: %d " % self.__pageSize
625        print "Number of pages: %d" % self.__totalPages
626        print
627        print "Catalog for %s" % self.__fileName
628        for table in self.__tables.keys():
629            print "[%s]" % table
630            print "%sColumns " % indent
631            for column in self.__tables[table]['Columns'].keys():
632                record = self.__tables[table]['Columns'][column]['Record']
633                print "%s%-5d%-30s%s" % (indent*2, record['Identifier'], column,ColumnTypeToName[record['ColumnType']])
634            print "%sIndexes"% indent
635            for index in self.__tables[table]['Indexes'].keys():
636                print "%s%s" % (indent*2, index)
637            print ""
638
639    def __addItem(self, entry):
640        dataDefinitionHeader = ESENT_DATA_DEFINITION_HEADER(entry['EntryData'])
641        catalogEntry = ESENT_CATALOG_DATA_DEFINITION_ENTRY(entry['EntryData'][len(dataDefinitionHeader):])
642        itemName = self.__parseItemName(entry)
643
644        if catalogEntry['Type'] == CATALOG_TYPE_TABLE:
645            self.__tables[itemName] = OrderedDict()
646            self.__tables[itemName]['TableEntry'] = entry
647            self.__tables[itemName]['Columns']    = OrderedDict()
648            self.__tables[itemName]['Indexes']    = OrderedDict()
649            self.__tables[itemName]['LongValues'] = OrderedDict()
650            self.__currentTable = itemName
651        elif catalogEntry['Type'] == CATALOG_TYPE_COLUMN:
652            self.__tables[self.__currentTable]['Columns'][itemName] = entry
653            self.__tables[self.__currentTable]['Columns'][itemName]['Header'] = dataDefinitionHeader
654            self.__tables[self.__currentTable]['Columns'][itemName]['Record'] = catalogEntry
655        elif catalogEntry['Type'] == CATALOG_TYPE_INDEX:
656            self.__tables[self.__currentTable]['Indexes'][itemName] = entry
657        elif catalogEntry['Type'] == CATALOG_TYPE_LONG_VALUE:
658            self.__addLongValue(entry)
659        else:
660            raise Exception('Unknown type 0x%x' % catalogEntry['Type'])
661
662    def __parseItemName(self,entry):
663        dataDefinitionHeader = ESENT_DATA_DEFINITION_HEADER(entry['EntryData'])
664
665        if dataDefinitionHeader['LastVariableDataType'] > 127:
666            numEntries =  dataDefinitionHeader['LastVariableDataType'] - 127
667        else:
668            numEntries =  dataDefinitionHeader['LastVariableDataType']
669
670        itemLen = unpack('<H',entry['EntryData'][dataDefinitionHeader['VariableSizeOffset']:][:2])[0]
671        itemName = entry['EntryData'][dataDefinitionHeader['VariableSizeOffset']:][2*numEntries:][:itemLen]
672        return itemName
673
674    def __addLongValue(self, entry):
675        dataDefinitionHeader = ESENT_DATA_DEFINITION_HEADER(entry['EntryData'])
676        catalogEntry = ESENT_CATALOG_DATA_DEFINITION_ENTRY(entry['EntryData'][len(dataDefinitionHeader):])
677        lvLen = unpack('<H',entry['EntryData'][dataDefinitionHeader['VariableSizeOffset']:][:2])[0]
678        lvName = entry['EntryData'][dataDefinitionHeader['VariableSizeOffset']:][7:][:lvLen]
679        self.__tables[self.__currentTable]['LongValues'][lvName] = entry
680
681    def parsePage(self, page):
682        baseOffset = len(page.record)
683
684        # Print the leaf/branch tags
685        for tagNum in range(1,page.record['FirstAvailablePageTag']):
686            flags, data = page.getTag(tagNum)
687            if page.record['PageFlags'] & FLAGS_LEAF > 0:
688                # Leaf page
689                if page.record['PageFlags'] & FLAGS_SPACE_TREE > 0:
690                    pass
691                elif page.record['PageFlags'] & FLAGS_INDEX > 0:
692                    pass
693                elif page.record['PageFlags'] & FLAGS_LONG_VALUE > 0:
694                    pass
695                else:
696                    # Table Value
697                    leafEntry = ESENT_LEAF_ENTRY(flags, data)
698                    self.__addItem(leafEntry)
699
700    def parseCatalog(self, pageNum):
701        # Parse all the pages starting at pageNum and commit table data
702        page = self.getPage(pageNum)
703        self.parsePage(page)
704
705        for i in range(1, page.record['FirstAvailablePageTag']):
706            flags, data = page.getTag(i)
707            if page.record['PageFlags'] & FLAGS_LEAF == 0:
708                # Branch page
709                branchEntry = ESENT_BRANCH_ENTRY(flags, data)
710                self.parseCatalog(branchEntry['ChildPageNumber'])
711
712
713    def readHeader(self):
714        LOG.debug("Reading Boot Sector for %s" % self.__volumeName)
715
716    def getPage(self, pageNum):
717        LOG.debug("Trying to fetch page %d (0x%x)" % (pageNum, (pageNum+1)*self.__pageSize))
718        self.__DB.seek((pageNum+1)*self.__pageSize, 0)
719        data = self.__DB.read(self.__pageSize)
720        while len(data) < self.__pageSize:
721            remaining = self.__pageSize - len(data)
722            data += self.__DB.read(remaining)
723        # Special case for the first page
724        if pageNum <= 0:
725            return data
726        else:
727            return ESENT_PAGE(self.__DBHeader, data)
728
729    def close(self):
730        self.__DB.close()
731
732    def openTable(self, tableName):
733        # Returns a cursos for later use
734
735        if tableName in self.__tables:
736            entry = self.__tables[tableName]['TableEntry']
737            dataDefinitionHeader = ESENT_DATA_DEFINITION_HEADER(entry['EntryData'])
738            catalogEntry = ESENT_CATALOG_DATA_DEFINITION_ENTRY(entry['EntryData'][len(dataDefinitionHeader):])
739
740            # Let's position the cursor at the leaf levels for fast reading
741            pageNum = catalogEntry['FatherDataPageNumber']
742            done = False
743            while done is False:
744                page = self.getPage(pageNum)
745                if page.record['FirstAvailablePageTag'] <= 1:
746                    # There are no records
747                    done = True
748                for i in range(1, page.record['FirstAvailablePageTag']):
749                    flags, data = page.getTag(i)
750                    if page.record['PageFlags'] & FLAGS_LEAF == 0:
751                        # Branch page, move on to the next page
752                        branchEntry = ESENT_BRANCH_ENTRY(flags, data)
753                        pageNum = branchEntry['ChildPageNumber']
754                        break
755                    else:
756                        done = True
757                        break
758
759            cursor = TABLE_CURSOR
760            cursor['TableData'] = self.__tables[tableName]
761            cursor['FatherDataPageNumber'] = catalogEntry['FatherDataPageNumber']
762            cursor['CurrentPageData'] = page
763            cursor['CurrentTag']  = 0
764            return cursor
765        else:
766            return None
767
768    def __getNextTag(self, cursor):
769        page = cursor['CurrentPageData']
770
771        if cursor['CurrentTag'] >= page.record['FirstAvailablePageTag']:
772            # No more data in this page, chau
773            return None
774
775        flags, data = page.getTag(cursor['CurrentTag'])
776        if page.record['PageFlags'] & FLAGS_LEAF > 0:
777            # Leaf page
778            if page.record['PageFlags'] & FLAGS_SPACE_TREE > 0:
779                raise Exception('FLAGS_SPACE_TREE > 0')
780            elif page.record['PageFlags'] & FLAGS_INDEX > 0:
781                raise Exception('FLAGS_INDEX > 0')
782            elif page.record['PageFlags'] & FLAGS_LONG_VALUE > 0:
783                raise Exception('FLAGS_LONG_VALUE > 0')
784            else:
785                # Table Value
786                leafEntry = ESENT_LEAF_ENTRY(flags, data)
787                return leafEntry
788
789        return None
790
791    def getNextRow(self, cursor):
792        cursor['CurrentTag'] += 1
793
794        tag = self.__getNextTag(cursor)
795        #hexdump(tag)
796
797        if tag is None:
798            # No more tags in this page, search for the next one on the right
799            page = cursor['CurrentPageData']
800            if page.record['NextPageNumber'] == 0:
801                # No more pages, chau
802                return None
803            else:
804                cursor['CurrentPageData'] = self.getPage(page.record['NextPageNumber'])
805                cursor['CurrentTag'] = 0
806                return self.getNextRow(cursor)
807        else:
808            return self.__tagToRecord(cursor, tag['EntryData'])
809
810    def __tagToRecord(self, cursor, tag):
811        # So my brain doesn't forget, the data record is composed of:
812        # Header
813        # Fixed Size Data (ID < 127)
814        #     The easiest to parse. Their size is fixed in the record. You can get its size
815        #     from the Column Record, field SpaceUsage
816        # Variable Size Data (127 < ID < 255)
817        #     At VariableSizeOffset you get an array of two bytes per variable entry, pointing
818        #     to the length of the value. Values start at:
819        #                numEntries = LastVariableDataType - 127
820        #                VariableSizeOffset + numEntries * 2 (bytes)
821        # Tagged Data ( > 255 )
822        #     After the Variable Size Value, there's more data for the tagged values.
823        #     Right at the beginning there's another array (taggedItems), pointing to the
824        #     values, size.
825        #
826        # The interesting thing about this DB records is there's no need for all the columns to be there, hence
827        # saving space. That's why I got over all the columns, and if I find data (of any type), i assign it. If
828        # not, the column's empty.
829        #
830        # There are a lot of caveats in the code, so take your time to explore it.
831        #
832        # ToDo: Better complete this description
833        #
834
835        record = OrderedDict()
836        taggedItems = OrderedDict()
837        taggedItemsParsed = False
838
839        dataDefinitionHeader = ESENT_DATA_DEFINITION_HEADER(tag)
840        #dataDefinitionHeader.dump()
841        variableDataBytesProcessed = (dataDefinitionHeader['LastVariableDataType'] - 127) * 2
842        prevItemLen = 0
843        tagLen = len(tag)
844        fixedSizeOffset = len(dataDefinitionHeader)
845        variableSizeOffset = dataDefinitionHeader['VariableSizeOffset']
846
847        columns = cursor['TableData']['Columns']
848
849        for column in columns.keys():
850            columnRecord = columns[column]['Record']
851            #columnRecord.dump()
852            if columnRecord['Identifier'] <= dataDefinitionHeader['LastFixedSize']:
853                # Fixed Size column data type, still available data
854                record[column] = tag[fixedSizeOffset:][:columnRecord['SpaceUsage']]
855                fixedSizeOffset += columnRecord['SpaceUsage']
856
857            elif 127 < columnRecord['Identifier'] <= dataDefinitionHeader['LastVariableDataType']:
858                # Variable data type
859                index = columnRecord['Identifier'] - 127 - 1
860                itemLen = unpack('<H',tag[variableSizeOffset+index*2:][:2])[0]
861
862                if itemLen & 0x8000:
863                    # Empty item
864                    itemLen = prevItemLen
865                    record[column] = None
866                else:
867                    itemValue = tag[variableSizeOffset+variableDataBytesProcessed:][:itemLen-prevItemLen]
868                    record[column] = itemValue
869
870                #if columnRecord['Identifier'] <= dataDefinitionHeader['LastVariableDataType']:
871                variableDataBytesProcessed +=itemLen-prevItemLen
872
873                prevItemLen = itemLen
874
875            elif columnRecord['Identifier'] > 255:
876                # Have we parsed the tagged items already?
877                if taggedItemsParsed is False and (variableDataBytesProcessed+variableSizeOffset) < tagLen:
878                    index = variableDataBytesProcessed+variableSizeOffset
879                    #hexdump(tag[index:])
880                    endOfVS = self.__pageSize
881                    firstOffsetTag = (unpack('<H', tag[index+2:][:2])[0] & 0x3fff) + variableDataBytesProcessed+variableSizeOffset
882                    while True:
883                        taggedIdentifier = unpack('<H', tag[index:][:2])[0]
884                        index += 2
885                        taggedOffset = (unpack('<H', tag[index:][:2])[0] & 0x3fff)
886                        # As of Windows 7 and later ( version 0x620 revision 0x11) the
887                        # tagged data type flags are always present
888                        if self.__DBHeader['Version'] == 0x620 and self.__DBHeader['FileFormatRevision'] >= 17 and self.__DBHeader['PageSize'] > 8192:
889                            flagsPresent = 1
890                        else:
891                            flagsPresent = (unpack('<H', tag[index:][:2])[0] & 0x4000)
892                        index += 2
893                        if taggedOffset < endOfVS:
894                            endOfVS = taggedOffset
895                        taggedItems[taggedIdentifier] = (taggedOffset, tagLen, flagsPresent)
896                        #print "ID: %d, Offset:%d, firstOffset:%d, index:%d, flag: 0x%x" % (taggedIdentifier, taggedOffset,firstOffsetTag,index, flagsPresent)
897                        if index >= firstOffsetTag:
898                            # We reached the end of the variable size array
899                            break
900
901                    # Calculate length of variable items
902                    # Ugly.. should be redone
903                    prevKey = taggedItems.keys()[0]
904                    for i in range(1,len(taggedItems)):
905                        offset0, length, flags = taggedItems[prevKey]
906                        offset, _, _ = taggedItems.items()[i][1]
907                        taggedItems[prevKey] = (offset0, offset-offset0, flags)
908                        #print "ID: %d, Offset: %d, Len: %d, flags: %d" % (prevKey, offset0, offset-offset0, flags)
909                        prevKey = taggedItems.keys()[i]
910                    taggedItemsParsed = True
911
912                # Tagged data type
913                if taggedItems.has_key(columnRecord['Identifier']):
914                    offsetItem = variableDataBytesProcessed + variableSizeOffset + taggedItems[columnRecord['Identifier']][0]
915                    itemSize = taggedItems[columnRecord['Identifier']][1]
916                    # If item have flags, we should skip them
917                    if taggedItems[columnRecord['Identifier']][2] > 0:
918                        itemFlag = ord(tag[offsetItem:offsetItem+1])
919                        offsetItem += 1
920                        itemSize -= 1
921                    else:
922                        itemFlag = 0
923
924                    #print "ID: %d, itemFlag: 0x%x" %( columnRecord['Identifier'], itemFlag)
925                    if itemFlag & (TAGGED_DATA_TYPE_COMPRESSED ):
926                        LOG.error('Unsupported tag column: %s, flag:0x%x' % (column, itemFlag))
927                        record[column] = None
928                    elif itemFlag & TAGGED_DATA_TYPE_MULTI_VALUE:
929                        # ToDo: Parse multi-values properly
930                        LOG.debug('Multivalue detected in column %s, returning raw results' % (column))
931                        record[column] = (hexlify(tag[offsetItem:][:itemSize]),)
932                    else:
933                        record[column] = tag[offsetItem:][:itemSize]
934
935                else:
936                    record[column] = None
937            else:
938                record[column] = None
939
940            # If we understand the data type, we unpack it and cast it accordingly
941            # otherwise, we just encode it in hex
942            if type(record[column]) is tuple:
943                # A multi value data, we won't decode it, just leave it this way
944                record[column] = record[column][0]
945            elif columnRecord['ColumnType'] == JET_coltypText or columnRecord['ColumnType'] == JET_coltypLongText:
946                # Let's handle strings
947                if record[column] is not None:
948                    if columnRecord['CodePage'] not in StringCodePages:
949                        raise Exception('Unknown codepage 0x%x'% columnRecord['CodePage'])
950                    stringDecoder = StringCodePages[columnRecord['CodePage']]
951
952                    record[column] = record[column].decode(stringDecoder)
953
954            else:
955                unpackData = ColumnTypeSize[columnRecord['ColumnType']]
956                if record[column] is not None:
957                    if unpackData is None:
958                        record[column] = hexlify(record[column])
959                    else:
960                        unpackStr = unpackData[1]
961                        unpackSize = unpackData[0]
962                        record[column] = unpack(unpackStr, record[column])[0]
963
964        return record
965
966
967