1# Copyright (c) 2003-2016 CORE Security Technologies 2# 3# This software is provided under under a slightly modified version 4# of the Apache Software License. See the accompanying LICENSE file 5# for more information. 6# 7# Description: 8# Microsoft Extensive Storage Engine parser, just focused on trying 9# to parse NTDS.dit files (not meant as a full parser, although it might work) 10# 11# Author: 12# Alberto Solino (@agsolino) 13# 14# Reference for: 15# Structure. 16# 17# Excellent reference done by Joachim Metz 18# http://forensic-proof.com/wp-content/uploads/2011/07/Extensible-Storage-Engine-ESE-Database-File-EDB-format.pdf 19# 20# ToDo: 21# [ ] Parse multi-values properly 22# [ ] Support long values properly 23 24from impacket import LOG 25try: 26 from collections import OrderedDict 27except: 28 try: 29 from ordereddict.ordereddict import OrderedDict 30 except: 31 from ordereddict import OrderedDict 32from impacket.structure import Structure 33from struct import unpack 34from binascii import hexlify 35 36# Constants 37 38FILE_TYPE_DATABASE = 0 39FILE_TYPE_STREAMING_FILE = 1 40 41# Database state 42JET_dbstateJustCreated = 1 43JET_dbstateDirtyShutdown = 2 44JET_dbstateCleanShutdown = 3 45JET_dbstateBeingConverted = 4 46JET_dbstateForceDetach = 5 47 48# Page Flags 49FLAGS_ROOT = 1 50FLAGS_LEAF = 2 51FLAGS_PARENT = 4 52FLAGS_EMPTY = 8 53FLAGS_SPACE_TREE = 0x20 54FLAGS_INDEX = 0x40 55FLAGS_LONG_VALUE = 0x80 56FLAGS_NEW_FORMAT = 0x2000 57FLAGS_NEW_CHECKSUM = 0x2000 58 59# Tag Flags 60TAG_UNKNOWN = 0x1 61TAG_DEFUNCT = 0x2 62TAG_COMMON = 0x4 63 64# Fixed Page Numbers 65DATABASE_PAGE_NUMBER = 1 66CATALOG_PAGE_NUMBER = 4 67CATALOG_BACKUP_PAGE_NUMBER = 24 68 69# Fixed FatherDataPages 70DATABASE_FDP = 1 71CATALOG_FDP = 2 72CATALOG_BACKUP_FDP = 3 73 74# Catalog Types 75CATALOG_TYPE_TABLE = 1 76CATALOG_TYPE_COLUMN = 2 77CATALOG_TYPE_INDEX = 3 78CATALOG_TYPE_LONG_VALUE = 4 79CATALOG_TYPE_CALLBACK = 5 80 81# Column Types 82JET_coltypNil = 0 83JET_coltypBit = 1 84JET_coltypUnsignedByte = 2 85JET_coltypShort = 3 86JET_coltypLong = 4 87JET_coltypCurrency = 5 88JET_coltypIEEESingle = 6 89JET_coltypIEEEDouble = 7 90JET_coltypDateTime = 8 91JET_coltypBinary = 9 92JET_coltypText = 10 93JET_coltypLongBinary = 11 94JET_coltypLongText = 12 95JET_coltypSLV = 13 96JET_coltypUnsignedLong = 14 97JET_coltypLongLong = 15 98JET_coltypGUID = 16 99JET_coltypUnsignedShort= 17 100JET_coltypMax = 18 101 102ColumnTypeToName = { 103 JET_coltypNil : 'NULL', 104 JET_coltypBit : 'Boolean', 105 JET_coltypUnsignedByte : 'Signed byte', 106 JET_coltypShort : 'Signed short', 107 JET_coltypLong : 'Signed long', 108 JET_coltypCurrency : 'Currency', 109 JET_coltypIEEESingle : 'Single precision FP', 110 JET_coltypIEEEDouble : 'Double precision FP', 111 JET_coltypDateTime : 'DateTime', 112 JET_coltypBinary : 'Binary', 113 JET_coltypText : 'Text', 114 JET_coltypLongBinary : 'Long Binary', 115 JET_coltypLongText : 'Long Text', 116 JET_coltypSLV : 'Obsolete', 117 JET_coltypUnsignedLong : 'Unsigned long', 118 JET_coltypLongLong : 'Long long', 119 JET_coltypGUID : 'GUID', 120 JET_coltypUnsignedShort: 'Unsigned short', 121 JET_coltypMax : 'Max', 122} 123 124ColumnTypeSize = { 125 JET_coltypNil : None, 126 JET_coltypBit : (1,'B'), 127 JET_coltypUnsignedByte : (1,'B'), 128 JET_coltypShort : (2,'<h'), 129 JET_coltypLong : (4,'<l'), 130 JET_coltypCurrency : (8,'<Q'), 131 JET_coltypIEEESingle : (4,'<f'), 132 JET_coltypIEEEDouble : (8,'<d'), 133 JET_coltypDateTime : (8,'<Q'), 134 JET_coltypBinary : None, 135 JET_coltypText : None, 136 JET_coltypLongBinary : None, 137 JET_coltypLongText : None, 138 JET_coltypSLV : None, 139 JET_coltypUnsignedLong : (4,'<L'), 140 JET_coltypLongLong : (8,'<Q'), 141 JET_coltypGUID : (16,'16s'), 142 JET_coltypUnsignedShort: (2,'<H'), 143 JET_coltypMax : None, 144} 145 146# Tagged Data Type Flags 147TAGGED_DATA_TYPE_VARIABLE_SIZE = 1 148TAGGED_DATA_TYPE_COMPRESSED = 2 149TAGGED_DATA_TYPE_STORED = 4 150TAGGED_DATA_TYPE_MULTI_VALUE = 8 151TAGGED_DATA_TYPE_WHO_KNOWS = 10 152 153# Code pages 154CODEPAGE_UNICODE = 1200 155CODEPAGE_ASCII = 20127 156CODEPAGE_WESTERN = 1252 157 158StringCodePages = { 159 CODEPAGE_UNICODE : 'utf-16le', 160 CODEPAGE_ASCII : 'ascii', 161 CODEPAGE_WESTERN : 'cp1252', 162} 163 164# Structures 165 166TABLE_CURSOR = { 167 'TableData' : '', 168 'FatherDataPageNumber': 0, 169 'CurrentPageData' : '', 170 'CurrentTag' : 0, 171} 172 173class ESENT_JET_SIGNATURE(Structure): 174 structure = ( 175 ('Random','<L=0'), 176 ('CreationTime','<Q=0'), 177 ('NetBiosName','16s=""'), 178 ) 179 180class ESENT_DB_HEADER(Structure): 181 structure = ( 182 ('CheckSum','<L=0'), 183 ('Signature','"\xef\xcd\xab\x89'), 184 ('Version','<L=0'), 185 ('FileType','<L=0'), 186 ('DBTime','<Q=0'), 187 ('DBSignature',':',ESENT_JET_SIGNATURE), 188 ('DBState','<L=0'), 189 ('ConsistentPosition','<Q=0'), 190 ('ConsistentTime','<Q=0'), 191 ('AttachTime','<Q=0'), 192 ('AttachPosition','<Q=0'), 193 ('DetachTime','<Q=0'), 194 ('DetachPosition','<Q=0'), 195 ('LogSignature',':',ESENT_JET_SIGNATURE), 196 ('Unknown','<L=0'), 197 ('PreviousBackup','24s=""'), 198 ('PreviousIncBackup','24s=""'), 199 ('CurrentFullBackup','24s=""'), 200 ('ShadowingDisables','<L=0'), 201 ('LastObjectID','<L=0'), 202 ('WindowsMajorVersion','<L=0'), 203 ('WindowsMinorVersion','<L=0'), 204 ('WindowsBuildNumber','<L=0'), 205 ('WindowsServicePackNumber','<L=0'), 206 ('FileFormatRevision','<L=0'), 207 ('PageSize','<L=0'), 208 ('RepairCount','<L=0'), 209 ('RepairTime','<Q=0'), 210 ('Unknown2','28s=""'), 211 ('ScrubTime','<Q=0'), 212 ('RequiredLog','<Q=0'), 213 ('UpgradeExchangeFormat','<L=0'), 214 ('UpgradeFreePages','<L=0'), 215 ('UpgradeSpaceMapPages','<L=0'), 216 ('CurrentShadowBackup','24s=""'), 217 ('CreationFileFormatVersion','<L=0'), 218 ('CreationFileFormatRevision','<L=0'), 219 ('Unknown3','16s=""'), 220 ('OldRepairCount','<L=0'), 221 ('ECCCount','<L=0'), 222 ('LastECCTime','<Q=0'), 223 ('OldECCFixSuccessCount','<L=0'), 224 ('ECCFixErrorCount','<L=0'), 225 ('LastECCFixErrorTime','<Q=0'), 226 ('OldECCFixErrorCount','<L=0'), 227 ('BadCheckSumErrorCount','<L=0'), 228 ('LastBadCheckSumTime','<Q=0'), 229 ('OldCheckSumErrorCount','<L=0'), 230 ('CommittedLog','<L=0'), 231 ('PreviousShadowCopy','24s=""'), 232 ('PreviousDifferentialBackup','24s=""'), 233 ('Unknown4','40s=""'), 234 ('NLSMajorVersion','<L=0'), 235 ('NLSMinorVersion','<L=0'), 236 ('Unknown5','148s=""'), 237 ('UnknownFlags','<L=0'), 238 ) 239 240class ESENT_PAGE_HEADER(Structure): 241 structure_2003_SP0 = ( 242 ('CheckSum','<L=0'), 243 ('PageNumber','<L=0'), 244 ) 245 structure_0x620_0x0b = ( 246 ('CheckSum','<L=0'), 247 ('ECCCheckSum','<L=0'), 248 ) 249 structure_win7 = ( 250 ('CheckSum','<Q=0'), 251 ) 252 common = ( 253 ('LastModificationTime','<Q=0'), 254 ('PreviousPageNumber','<L=0'), 255 ('NextPageNumber','<L=0'), 256 ('FatherDataPage','<L=0'), 257 ('AvailableDataSize','<H=0'), 258 ('AvailableUncommittedDataSize','<H=0'), 259 ('FirstAvailableDataOffset','<H=0'), 260 ('FirstAvailablePageTag','<H=0'), 261 ('PageFlags','<L=0'), 262 ) 263 extended_win7 = ( 264 ('ExtendedCheckSum1','<Q=0'), 265 ('ExtendedCheckSum2','<Q=0'), 266 ('ExtendedCheckSum3','<Q=0'), 267 ('PageNumber','<Q=0'), 268 ('Unknown','<Q=0'), 269 ) 270 def __init__(self, version, revision, pageSize=8192, data=None): 271 if (version < 0x620) or (version == 0x620 and revision < 0x0b): 272 # For sure the old format 273 self.structure = self.structure_2003_SP0 + self.common 274 elif version == 0x620 and revision < 0x11: 275 # Exchange 2003 SP1 and Windows Vista and later 276 self.structure = self.structure_0x620_0x0b + self.common 277 else: 278 # Windows 7 and later 279 self.structure = self.structure_win7 + self.common 280 if pageSize > 8192: 281 self.structure += self.extended_win7 282 283 Structure.__init__(self,data) 284 285class ESENT_ROOT_HEADER(Structure): 286 structure = ( 287 ('InitialNumberOfPages','<L=0'), 288 ('ParentFatherDataPage','<L=0'), 289 ('ExtentSpace','<L=0'), 290 ('SpaceTreePageNumber','<L=0'), 291 ) 292 293class ESENT_BRANCH_HEADER(Structure): 294 structure = ( 295 ('CommonPageKey',':'), 296 ) 297 298class ESENT_BRANCH_ENTRY(Structure): 299 common = ( 300 ('CommonPageKeySize','<H=0'), 301 ) 302 structure = ( 303 ('LocalPageKeySize','<H=0'), 304 ('_LocalPageKey','_-LocalPageKey','self["LocalPageKeySize"]'), 305 ('LocalPageKey',':'), 306 ('ChildPageNumber','<L=0'), 307 ) 308 def __init__(self, flags, data=None): 309 if flags & TAG_COMMON > 0: 310 # Include the common header 311 self.structure = self.common + self.structure 312 Structure.__init__(self,data) 313 314class ESENT_LEAF_HEADER(Structure): 315 structure = ( 316 ('CommonPageKey',':'), 317 ) 318 319class ESENT_LEAF_ENTRY(Structure): 320 common = ( 321 ('CommonPageKeySize','<H=0'), 322 ) 323 structure = ( 324 ('LocalPageKeySize','<H=0'), 325 ('_LocalPageKey','_-LocalPageKey','self["LocalPageKeySize"]'), 326 ('LocalPageKey',':'), 327 ('EntryData',':'), 328 ) 329 def __init__(self, flags, data=None): 330 if flags & TAG_COMMON > 0: 331 # Include the common header 332 self.structure = self.common + self.structure 333 Structure.__init__(self,data) 334 335class ESENT_SPACE_TREE_HEADER(Structure): 336 structure = ( 337 ('Unknown','<Q=0'), 338 ) 339 340class ESENT_SPACE_TREE_ENTRY(Structure): 341 structure = ( 342 ('PageKeySize','<H=0'), 343 ('LastPageNumber','<L=0'), 344 ('NumberOfPages','<L=0'), 345 ) 346 347class ESENT_INDEX_ENTRY(Structure): 348 structure = ( 349 ('RecordPageKey',':'), 350 ) 351 352class ESENT_DATA_DEFINITION_HEADER(Structure): 353 structure = ( 354 ('LastFixedSize','<B=0'), 355 ('LastVariableDataType','<B=0'), 356 ('VariableSizeOffset','<H=0'), 357 ) 358 359class ESENT_CATALOG_DATA_DEFINITION_ENTRY(Structure): 360 fixed = ( 361 ('FatherDataPageID','<L=0'), 362 ('Type','<H=0'), 363 ('Identifier','<L=0'), 364 ) 365 366 column_stuff = ( 367 ('ColumnType','<L=0'), 368 ('SpaceUsage','<L=0'), 369 ('ColumnFlags','<L=0'), 370 ('CodePage','<L=0'), 371 ) 372 373 other = ( 374 ('FatherDataPageNumber','<L=0'), 375 ) 376 377 table_stuff = ( 378 ('SpaceUsage','<L=0'), 379# ('TableFlags','<L=0'), 380# ('InitialNumberOfPages','<L=0'), 381 ) 382 383 index_stuff = ( 384 ('SpaceUsage','<L=0'), 385 ('IndexFlags','<L=0'), 386 ('Locale','<L=0'), 387 ) 388 389 lv_stuff = ( 390 ('SpaceUsage','<L=0'), 391# ('LVFlags','<L=0'), 392# ('InitialNumberOfPages','<L=0'), 393 ) 394 common = ( 395# ('RootFlag','<B=0'), 396# ('RecordOffset','<H=0'), 397# ('LCMapFlags','<L=0'), 398# ('KeyMost','<H=0'), 399 ('Trailing',':'), 400 ) 401 402 def __init__(self,data): 403 # Depending on the type of data we'll end up building a different struct 404 dataType = unpack('<H', data[4:][:2])[0] 405 self.structure = self.fixed 406 407 if dataType == CATALOG_TYPE_TABLE: 408 self.structure += self.other + self.table_stuff 409 elif dataType == CATALOG_TYPE_COLUMN: 410 self.structure += self.column_stuff 411 elif dataType == CATALOG_TYPE_INDEX: 412 self.structure += self.other + self.index_stuff 413 elif dataType == CATALOG_TYPE_LONG_VALUE: 414 self.structure += self.other + self.lv_stuff 415 elif dataType == CATALOG_TYPE_CALLBACK: 416 raise Exception('CallBack types not supported!') 417 else: 418 LOG.error('Unknown catalog type 0x%x' % dataType) 419 self.structure = () 420 Structure.__init__(self,data) 421 422 self.structure += self.common 423 424 Structure.__init__(self,data) 425 426 427def pretty_print(x): 428 if x in '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ ': 429 return x 430 else: 431 return '.' 432 433def hexdump(data): 434 x=str(data) 435 strLen = len(x) 436 i = 0 437 while i < strLen: 438 print "%04x " % i, 439 for j in range(16): 440 if i+j < strLen: 441 print "%02X" % ord(x[i+j]), 442 443 else: 444 print " ", 445 if j%16 == 7: 446 print "", 447 print " ", 448 print ''.join(pretty_print(x) for x in x[i:i+16] ) 449 i += 16 450 451def getUnixTime(t): 452 t -= 116444736000000000 453 t /= 10000000 454 return t 455 456class ESENT_PAGE: 457 def __init__(self, db, data=None): 458 self.__DBHeader = db 459 self.data = data 460 self.record = None 461 if data is not None: 462 self.record = ESENT_PAGE_HEADER(self.__DBHeader['Version'], self.__DBHeader['FileFormatRevision'], self.__DBHeader['PageSize'], data) 463 464 def printFlags(self): 465 flags = self.record['PageFlags'] 466 if flags & FLAGS_EMPTY: 467 print "\tEmpty" 468 if flags & FLAGS_INDEX: 469 print "\tIndex" 470 if flags & FLAGS_LEAF: 471 print "\tLeaf" 472 else: 473 print "\tBranch" 474 if flags & FLAGS_LONG_VALUE: 475 print "\tLong Value" 476 if flags & FLAGS_NEW_CHECKSUM: 477 print "\tNew Checksum" 478 if flags & FLAGS_NEW_FORMAT: 479 print "\tNew Format" 480 if flags & FLAGS_PARENT: 481 print "\tParent" 482 if flags & FLAGS_ROOT: 483 print "\tRoot" 484 if flags & FLAGS_SPACE_TREE: 485 print "\tSpace Tree" 486 487 def dump(self): 488 baseOffset = len(self.record) 489 self.record.dump() 490 tags = self.data[-4*self.record['FirstAvailablePageTag']:] 491 492 print "FLAGS: " 493 self.printFlags() 494 495 print 496 497 for i in range(self.record['FirstAvailablePageTag']): 498 tag = tags[-4:] 499 if self.__DBHeader['Version'] == 0x620 and self.__DBHeader['FileFormatRevision'] > 11 and self.__DBHeader['PageSize'] > 8192: 500 valueSize = unpack('<H', tag[:2])[0] & 0x7fff 501 valueOffset = unpack('<H',tag[2:])[0] & 0x7fff 502 hexdump((self.data[baseOffset+valueOffset:][:6])) 503 pageFlags = ord(self.data[baseOffset+valueOffset:][1]) >> 5 504 #print "TAG FLAG: 0x%x " % (unpack('<L', self.data[baseOffset+valueOffset:][:4]) ) >> 5 505 #print "TAG FLAG: 0x " , ord(self.data[baseOffset+valueOffset:][0]) 506 else: 507 valueSize = unpack('<H', tag[:2])[0] & 0x1fff 508 pageFlags = (unpack('<H', tag[2:])[0] & 0xe000) >> 13 509 valueOffset = unpack('<H',tag[2:])[0] & 0x1fff 510 511 print "TAG %-8d offset:0x%-6x flags:0x%-4x valueSize:0x%x" % (i,valueOffset,pageFlags,valueSize) 512 #hexdump(self.getTag(i)[1]) 513 tags = tags[:-4] 514 515 if self.record['PageFlags'] & FLAGS_ROOT > 0: 516 rootHeader = ESENT_ROOT_HEADER(self.getTag(0)[1]) 517 rootHeader.dump() 518 elif self.record['PageFlags'] & FLAGS_LEAF == 0: 519 # Branch Header 520 flags, data = self.getTag(0) 521 branchHeader = ESENT_BRANCH_HEADER(data) 522 branchHeader.dump() 523 else: 524 # Leaf Header 525 flags, data = self.getTag(0) 526 if self.record['PageFlags'] & FLAGS_SPACE_TREE > 0: 527 # Space Tree 528 spaceTreeHeader = ESENT_SPACE_TREE_HEADER(data) 529 spaceTreeHeader.dump() 530 else: 531 leafHeader = ESENT_LEAF_HEADER(data) 532 leafHeader.dump() 533 534 # Print the leaf/branch tags 535 for tagNum in range(1,self.record['FirstAvailablePageTag']): 536 flags, data = self.getTag(tagNum) 537 if self.record['PageFlags'] & FLAGS_LEAF == 0: 538 # Branch page 539 branchEntry = ESENT_BRANCH_ENTRY(flags, data) 540 branchEntry.dump() 541 elif self.record['PageFlags'] & FLAGS_LEAF > 0: 542 # Leaf page 543 if self.record['PageFlags'] & FLAGS_SPACE_TREE > 0: 544 # Space Tree 545 spaceTreeEntry = ESENT_SPACE_TREE_ENTRY(data) 546 #spaceTreeEntry.dump() 547 548 elif self.record['PageFlags'] & FLAGS_INDEX > 0: 549 # Index Entry 550 indexEntry = ESENT_INDEX_ENTRY(data) 551 #indexEntry.dump() 552 elif self.record['PageFlags'] & FLAGS_LONG_VALUE > 0: 553 # Long Page Value 554 raise Exception('Long value still not supported') 555 else: 556 # Table Value 557 leafEntry = ESENT_LEAF_ENTRY(flags, data) 558 dataDefinitionHeader = ESENT_DATA_DEFINITION_HEADER(leafEntry['EntryData']) 559 dataDefinitionHeader.dump() 560 catalogEntry = ESENT_CATALOG_DATA_DEFINITION_ENTRY(leafEntry['EntryData'][len(dataDefinitionHeader):]) 561 catalogEntry.dump() 562 hexdump(leafEntry['EntryData']) 563 564 def getTag(self, tagNum): 565 if self.record['FirstAvailablePageTag'] < tagNum: 566 raise Exception('Trying to grab an unknown tag 0x%x' % tagNum) 567 568 tags = self.data[-4*self.record['FirstAvailablePageTag']:] 569 baseOffset = len(self.record) 570 for i in range(tagNum): 571 tags = tags[:-4] 572 573 tag = tags[-4:] 574 575 if self.__DBHeader['Version'] == 0x620 and self.__DBHeader['FileFormatRevision'] >= 17 and self.__DBHeader['PageSize'] > 8192: 576 valueSize = unpack('<H', tag[:2])[0] & 0x7fff 577 valueOffset = unpack('<H',tag[2:])[0] & 0x7fff 578 tmpData = list(self.data[baseOffset+valueOffset:][:valueSize]) 579 pageFlags = ord(tmpData[1]) >> 5 580 tmpData[1] = chr(ord(tmpData[1]) & 0x1f) 581 tagData = "".join(tmpData) 582 else: 583 valueSize = unpack('<H', tag[:2])[0] & 0x1fff 584 pageFlags = (unpack('<H', tag[2:])[0] & 0xe000) >> 13 585 valueOffset = unpack('<H',tag[2:])[0] & 0x1fff 586 tagData = self.data[baseOffset+valueOffset:][:valueSize] 587 588 #return pageFlags, self.data[baseOffset+valueOffset:][:valueSize] 589 return pageFlags, tagData 590 591class ESENT_DB: 592 def __init__(self, fileName, pageSize = 8192, isRemote = False): 593 self.__fileName = fileName 594 self.__pageSize = pageSize 595 self.__DB = None 596 self.__DBHeader = None 597 self.__totalPages = None 598 self.__tables = OrderedDict() 599 self.__currentTable = None 600 self.__isRemote = isRemote 601 self.mountDB() 602 603 def mountDB(self): 604 LOG.debug("Mounting DB...") 605 if self.__isRemote is True: 606 self.__DB = self.__fileName 607 self.__DB.open() 608 else: 609 self.__DB = open(self.__fileName,"rb") 610 mainHeader = self.getPage(-1) 611 self.__DBHeader = ESENT_DB_HEADER(mainHeader) 612 self.__pageSize = self.__DBHeader['PageSize'] 613 self.__DB.seek(0,2) 614 self.__totalPages = (self.__DB.tell() / self.__pageSize) -2 615 LOG.debug("Database Version:0x%x, Revision:0x%x"% (self.__DBHeader['Version'], self.__DBHeader['FileFormatRevision'])) 616 LOG.debug("Page Size: %d" % self.__pageSize) 617 LOG.debug("Total Pages in file: %d" % self.__totalPages) 618 self.parseCatalog(CATALOG_PAGE_NUMBER) 619 620 def printCatalog(self): 621 indent = ' ' 622 623 print "Database version: 0x%x, 0x%x" % (self.__DBHeader['Version'], self.__DBHeader['FileFormatRevision'] ) 624 print "Page size: %d " % self.__pageSize 625 print "Number of pages: %d" % self.__totalPages 626 print 627 print "Catalog for %s" % self.__fileName 628 for table in self.__tables.keys(): 629 print "[%s]" % table 630 print "%sColumns " % indent 631 for column in self.__tables[table]['Columns'].keys(): 632 record = self.__tables[table]['Columns'][column]['Record'] 633 print "%s%-5d%-30s%s" % (indent*2, record['Identifier'], column,ColumnTypeToName[record['ColumnType']]) 634 print "%sIndexes"% indent 635 for index in self.__tables[table]['Indexes'].keys(): 636 print "%s%s" % (indent*2, index) 637 print "" 638 639 def __addItem(self, entry): 640 dataDefinitionHeader = ESENT_DATA_DEFINITION_HEADER(entry['EntryData']) 641 catalogEntry = ESENT_CATALOG_DATA_DEFINITION_ENTRY(entry['EntryData'][len(dataDefinitionHeader):]) 642 itemName = self.__parseItemName(entry) 643 644 if catalogEntry['Type'] == CATALOG_TYPE_TABLE: 645 self.__tables[itemName] = OrderedDict() 646 self.__tables[itemName]['TableEntry'] = entry 647 self.__tables[itemName]['Columns'] = OrderedDict() 648 self.__tables[itemName]['Indexes'] = OrderedDict() 649 self.__tables[itemName]['LongValues'] = OrderedDict() 650 self.__currentTable = itemName 651 elif catalogEntry['Type'] == CATALOG_TYPE_COLUMN: 652 self.__tables[self.__currentTable]['Columns'][itemName] = entry 653 self.__tables[self.__currentTable]['Columns'][itemName]['Header'] = dataDefinitionHeader 654 self.__tables[self.__currentTable]['Columns'][itemName]['Record'] = catalogEntry 655 elif catalogEntry['Type'] == CATALOG_TYPE_INDEX: 656 self.__tables[self.__currentTable]['Indexes'][itemName] = entry 657 elif catalogEntry['Type'] == CATALOG_TYPE_LONG_VALUE: 658 self.__addLongValue(entry) 659 else: 660 raise Exception('Unknown type 0x%x' % catalogEntry['Type']) 661 662 def __parseItemName(self,entry): 663 dataDefinitionHeader = ESENT_DATA_DEFINITION_HEADER(entry['EntryData']) 664 665 if dataDefinitionHeader['LastVariableDataType'] > 127: 666 numEntries = dataDefinitionHeader['LastVariableDataType'] - 127 667 else: 668 numEntries = dataDefinitionHeader['LastVariableDataType'] 669 670 itemLen = unpack('<H',entry['EntryData'][dataDefinitionHeader['VariableSizeOffset']:][:2])[0] 671 itemName = entry['EntryData'][dataDefinitionHeader['VariableSizeOffset']:][2*numEntries:][:itemLen] 672 return itemName 673 674 def __addLongValue(self, entry): 675 dataDefinitionHeader = ESENT_DATA_DEFINITION_HEADER(entry['EntryData']) 676 catalogEntry = ESENT_CATALOG_DATA_DEFINITION_ENTRY(entry['EntryData'][len(dataDefinitionHeader):]) 677 lvLen = unpack('<H',entry['EntryData'][dataDefinitionHeader['VariableSizeOffset']:][:2])[0] 678 lvName = entry['EntryData'][dataDefinitionHeader['VariableSizeOffset']:][7:][:lvLen] 679 self.__tables[self.__currentTable]['LongValues'][lvName] = entry 680 681 def parsePage(self, page): 682 baseOffset = len(page.record) 683 684 # Print the leaf/branch tags 685 for tagNum in range(1,page.record['FirstAvailablePageTag']): 686 flags, data = page.getTag(tagNum) 687 if page.record['PageFlags'] & FLAGS_LEAF > 0: 688 # Leaf page 689 if page.record['PageFlags'] & FLAGS_SPACE_TREE > 0: 690 pass 691 elif page.record['PageFlags'] & FLAGS_INDEX > 0: 692 pass 693 elif page.record['PageFlags'] & FLAGS_LONG_VALUE > 0: 694 pass 695 else: 696 # Table Value 697 leafEntry = ESENT_LEAF_ENTRY(flags, data) 698 self.__addItem(leafEntry) 699 700 def parseCatalog(self, pageNum): 701 # Parse all the pages starting at pageNum and commit table data 702 page = self.getPage(pageNum) 703 self.parsePage(page) 704 705 for i in range(1, page.record['FirstAvailablePageTag']): 706 flags, data = page.getTag(i) 707 if page.record['PageFlags'] & FLAGS_LEAF == 0: 708 # Branch page 709 branchEntry = ESENT_BRANCH_ENTRY(flags, data) 710 self.parseCatalog(branchEntry['ChildPageNumber']) 711 712 713 def readHeader(self): 714 LOG.debug("Reading Boot Sector for %s" % self.__volumeName) 715 716 def getPage(self, pageNum): 717 LOG.debug("Trying to fetch page %d (0x%x)" % (pageNum, (pageNum+1)*self.__pageSize)) 718 self.__DB.seek((pageNum+1)*self.__pageSize, 0) 719 data = self.__DB.read(self.__pageSize) 720 while len(data) < self.__pageSize: 721 remaining = self.__pageSize - len(data) 722 data += self.__DB.read(remaining) 723 # Special case for the first page 724 if pageNum <= 0: 725 return data 726 else: 727 return ESENT_PAGE(self.__DBHeader, data) 728 729 def close(self): 730 self.__DB.close() 731 732 def openTable(self, tableName): 733 # Returns a cursos for later use 734 735 if tableName in self.__tables: 736 entry = self.__tables[tableName]['TableEntry'] 737 dataDefinitionHeader = ESENT_DATA_DEFINITION_HEADER(entry['EntryData']) 738 catalogEntry = ESENT_CATALOG_DATA_DEFINITION_ENTRY(entry['EntryData'][len(dataDefinitionHeader):]) 739 740 # Let's position the cursor at the leaf levels for fast reading 741 pageNum = catalogEntry['FatherDataPageNumber'] 742 done = False 743 while done is False: 744 page = self.getPage(pageNum) 745 if page.record['FirstAvailablePageTag'] <= 1: 746 # There are no records 747 done = True 748 for i in range(1, page.record['FirstAvailablePageTag']): 749 flags, data = page.getTag(i) 750 if page.record['PageFlags'] & FLAGS_LEAF == 0: 751 # Branch page, move on to the next page 752 branchEntry = ESENT_BRANCH_ENTRY(flags, data) 753 pageNum = branchEntry['ChildPageNumber'] 754 break 755 else: 756 done = True 757 break 758 759 cursor = TABLE_CURSOR 760 cursor['TableData'] = self.__tables[tableName] 761 cursor['FatherDataPageNumber'] = catalogEntry['FatherDataPageNumber'] 762 cursor['CurrentPageData'] = page 763 cursor['CurrentTag'] = 0 764 return cursor 765 else: 766 return None 767 768 def __getNextTag(self, cursor): 769 page = cursor['CurrentPageData'] 770 771 if cursor['CurrentTag'] >= page.record['FirstAvailablePageTag']: 772 # No more data in this page, chau 773 return None 774 775 flags, data = page.getTag(cursor['CurrentTag']) 776 if page.record['PageFlags'] & FLAGS_LEAF > 0: 777 # Leaf page 778 if page.record['PageFlags'] & FLAGS_SPACE_TREE > 0: 779 raise Exception('FLAGS_SPACE_TREE > 0') 780 elif page.record['PageFlags'] & FLAGS_INDEX > 0: 781 raise Exception('FLAGS_INDEX > 0') 782 elif page.record['PageFlags'] & FLAGS_LONG_VALUE > 0: 783 raise Exception('FLAGS_LONG_VALUE > 0') 784 else: 785 # Table Value 786 leafEntry = ESENT_LEAF_ENTRY(flags, data) 787 return leafEntry 788 789 return None 790 791 def getNextRow(self, cursor): 792 cursor['CurrentTag'] += 1 793 794 tag = self.__getNextTag(cursor) 795 #hexdump(tag) 796 797 if tag is None: 798 # No more tags in this page, search for the next one on the right 799 page = cursor['CurrentPageData'] 800 if page.record['NextPageNumber'] == 0: 801 # No more pages, chau 802 return None 803 else: 804 cursor['CurrentPageData'] = self.getPage(page.record['NextPageNumber']) 805 cursor['CurrentTag'] = 0 806 return self.getNextRow(cursor) 807 else: 808 return self.__tagToRecord(cursor, tag['EntryData']) 809 810 def __tagToRecord(self, cursor, tag): 811 # So my brain doesn't forget, the data record is composed of: 812 # Header 813 # Fixed Size Data (ID < 127) 814 # The easiest to parse. Their size is fixed in the record. You can get its size 815 # from the Column Record, field SpaceUsage 816 # Variable Size Data (127 < ID < 255) 817 # At VariableSizeOffset you get an array of two bytes per variable entry, pointing 818 # to the length of the value. Values start at: 819 # numEntries = LastVariableDataType - 127 820 # VariableSizeOffset + numEntries * 2 (bytes) 821 # Tagged Data ( > 255 ) 822 # After the Variable Size Value, there's more data for the tagged values. 823 # Right at the beginning there's another array (taggedItems), pointing to the 824 # values, size. 825 # 826 # The interesting thing about this DB records is there's no need for all the columns to be there, hence 827 # saving space. That's why I got over all the columns, and if I find data (of any type), i assign it. If 828 # not, the column's empty. 829 # 830 # There are a lot of caveats in the code, so take your time to explore it. 831 # 832 # ToDo: Better complete this description 833 # 834 835 record = OrderedDict() 836 taggedItems = OrderedDict() 837 taggedItemsParsed = False 838 839 dataDefinitionHeader = ESENT_DATA_DEFINITION_HEADER(tag) 840 #dataDefinitionHeader.dump() 841 variableDataBytesProcessed = (dataDefinitionHeader['LastVariableDataType'] - 127) * 2 842 prevItemLen = 0 843 tagLen = len(tag) 844 fixedSizeOffset = len(dataDefinitionHeader) 845 variableSizeOffset = dataDefinitionHeader['VariableSizeOffset'] 846 847 columns = cursor['TableData']['Columns'] 848 849 for column in columns.keys(): 850 columnRecord = columns[column]['Record'] 851 #columnRecord.dump() 852 if columnRecord['Identifier'] <= dataDefinitionHeader['LastFixedSize']: 853 # Fixed Size column data type, still available data 854 record[column] = tag[fixedSizeOffset:][:columnRecord['SpaceUsage']] 855 fixedSizeOffset += columnRecord['SpaceUsage'] 856 857 elif 127 < columnRecord['Identifier'] <= dataDefinitionHeader['LastVariableDataType']: 858 # Variable data type 859 index = columnRecord['Identifier'] - 127 - 1 860 itemLen = unpack('<H',tag[variableSizeOffset+index*2:][:2])[0] 861 862 if itemLen & 0x8000: 863 # Empty item 864 itemLen = prevItemLen 865 record[column] = None 866 else: 867 itemValue = tag[variableSizeOffset+variableDataBytesProcessed:][:itemLen-prevItemLen] 868 record[column] = itemValue 869 870 #if columnRecord['Identifier'] <= dataDefinitionHeader['LastVariableDataType']: 871 variableDataBytesProcessed +=itemLen-prevItemLen 872 873 prevItemLen = itemLen 874 875 elif columnRecord['Identifier'] > 255: 876 # Have we parsed the tagged items already? 877 if taggedItemsParsed is False and (variableDataBytesProcessed+variableSizeOffset) < tagLen: 878 index = variableDataBytesProcessed+variableSizeOffset 879 #hexdump(tag[index:]) 880 endOfVS = self.__pageSize 881 firstOffsetTag = (unpack('<H', tag[index+2:][:2])[0] & 0x3fff) + variableDataBytesProcessed+variableSizeOffset 882 while True: 883 taggedIdentifier = unpack('<H', tag[index:][:2])[0] 884 index += 2 885 taggedOffset = (unpack('<H', tag[index:][:2])[0] & 0x3fff) 886 # As of Windows 7 and later ( version 0x620 revision 0x11) the 887 # tagged data type flags are always present 888 if self.__DBHeader['Version'] == 0x620 and self.__DBHeader['FileFormatRevision'] >= 17 and self.__DBHeader['PageSize'] > 8192: 889 flagsPresent = 1 890 else: 891 flagsPresent = (unpack('<H', tag[index:][:2])[0] & 0x4000) 892 index += 2 893 if taggedOffset < endOfVS: 894 endOfVS = taggedOffset 895 taggedItems[taggedIdentifier] = (taggedOffset, tagLen, flagsPresent) 896 #print "ID: %d, Offset:%d, firstOffset:%d, index:%d, flag: 0x%x" % (taggedIdentifier, taggedOffset,firstOffsetTag,index, flagsPresent) 897 if index >= firstOffsetTag: 898 # We reached the end of the variable size array 899 break 900 901 # Calculate length of variable items 902 # Ugly.. should be redone 903 prevKey = taggedItems.keys()[0] 904 for i in range(1,len(taggedItems)): 905 offset0, length, flags = taggedItems[prevKey] 906 offset, _, _ = taggedItems.items()[i][1] 907 taggedItems[prevKey] = (offset0, offset-offset0, flags) 908 #print "ID: %d, Offset: %d, Len: %d, flags: %d" % (prevKey, offset0, offset-offset0, flags) 909 prevKey = taggedItems.keys()[i] 910 taggedItemsParsed = True 911 912 # Tagged data type 913 if taggedItems.has_key(columnRecord['Identifier']): 914 offsetItem = variableDataBytesProcessed + variableSizeOffset + taggedItems[columnRecord['Identifier']][0] 915 itemSize = taggedItems[columnRecord['Identifier']][1] 916 # If item have flags, we should skip them 917 if taggedItems[columnRecord['Identifier']][2] > 0: 918 itemFlag = ord(tag[offsetItem:offsetItem+1]) 919 offsetItem += 1 920 itemSize -= 1 921 else: 922 itemFlag = 0 923 924 #print "ID: %d, itemFlag: 0x%x" %( columnRecord['Identifier'], itemFlag) 925 if itemFlag & (TAGGED_DATA_TYPE_COMPRESSED ): 926 LOG.error('Unsupported tag column: %s, flag:0x%x' % (column, itemFlag)) 927 record[column] = None 928 elif itemFlag & TAGGED_DATA_TYPE_MULTI_VALUE: 929 # ToDo: Parse multi-values properly 930 LOG.debug('Multivalue detected in column %s, returning raw results' % (column)) 931 record[column] = (hexlify(tag[offsetItem:][:itemSize]),) 932 else: 933 record[column] = tag[offsetItem:][:itemSize] 934 935 else: 936 record[column] = None 937 else: 938 record[column] = None 939 940 # If we understand the data type, we unpack it and cast it accordingly 941 # otherwise, we just encode it in hex 942 if type(record[column]) is tuple: 943 # A multi value data, we won't decode it, just leave it this way 944 record[column] = record[column][0] 945 elif columnRecord['ColumnType'] == JET_coltypText or columnRecord['ColumnType'] == JET_coltypLongText: 946 # Let's handle strings 947 if record[column] is not None: 948 if columnRecord['CodePage'] not in StringCodePages: 949 raise Exception('Unknown codepage 0x%x'% columnRecord['CodePage']) 950 stringDecoder = StringCodePages[columnRecord['CodePage']] 951 952 record[column] = record[column].decode(stringDecoder) 953 954 else: 955 unpackData = ColumnTypeSize[columnRecord['ColumnType']] 956 if record[column] is not None: 957 if unpackData is None: 958 record[column] = hexlify(record[column]) 959 else: 960 unpackStr = unpackData[1] 961 unpackSize = unpackData[0] 962 record[column] = unpack(unpackStr, record[column])[0] 963 964 return record 965 966 967