1 /* 2 * 3 * The Sleuth Kit 4 * 5 * Contact: Brian Carrier [carrier <at> sleuthkit [dot] org] 6 * Copyright (c) 2010-2012 Basis Technology Corporation. All Rights 7 * reserved. 8 * 9 * This software is distributed under the Common Public License 1.0 10 */ 11 12 13 #ifndef _TSK_IMGDB_H 14 #define _TSK_IMGDB_H 15 16 #define IMGDB_SCHEMA_VERSION "1.5" 17 18 #include <string> // to get std::wstring 19 #include <list> 20 #include <vector> 21 #include "tsk/libtsk.h" 22 #include "tsk/framework/framework_i.h" 23 #include "tsk/framework/utilities/SectorRuns.h" 24 #include "tsk/framework/utilities/UnallocRun.h" 25 #include "TskBlackboardAttribute.h" 26 #include "TskBlackboard.h" 27 #include "TskBlackboardArtifact.h" 28 29 using namespace std; 30 31 class TskArtifactNames; 32 class TskAttributeNames; 33 34 typedef uint64_t artifact_t; 35 36 37 /** 38 * Contains data from a volume/partition record in the database. 39 */ 40 struct TskVolumeInfoRecord 41 { 42 uint64_t vol_id; 43 TSK_DADDR_T sect_start; 44 TSK_DADDR_T sect_len; 45 std::string description; 46 TSK_VS_PART_FLAG_ENUM flags; 47 }; 48 49 /** 50 * Contains data from a file system record in the database. 51 */ 52 struct TskFsInfoRecord 53 { 54 uint64_t fs_id; 55 TSK_OFF_T img_byte_offset; 56 uint64_t vol_id; 57 TSK_FS_TYPE_ENUM fs_type; 58 unsigned int block_size; 59 TSK_DADDR_T block_count; 60 TSK_INUM_T root_inum; 61 TSK_INUM_T first_inum; 62 TSK_INUM_T last_inum; 63 }; 64 65 /** 66 * Contains data derived from joining carved file records from multiple tables in the image database. 67 */ 68 struct TskCarvedFileInfo 69 { 70 /** 71 * The unique ID of the carved file. 72 */ 73 uint64_t fileID; 74 75 /** 76 * A hash of the carved file. The type of the hash is a parameter to the function 77 * that returns objects of this type and is not included in the struct to reduce object size, 78 * since this struct is used to satisfy potentially high-volume data requests. 79 * The hash member may be an empty string if the requested hash is unavailable. 80 */ 81 std::string hash; 82 83 /** 84 * A "cfile" name for the carved file of the form: cfile_[vol_id]_[start_sector]_[file_id].[ext]. 85 */ 86 std::string cFileName; 87 }; 88 89 struct TskFileTypeRecord 90 { 91 std::string suffix; // file extension, normalized to lowercase. If no extension, it is an empty string. 92 std::string description; // descript of the file type. 93 uint64_t count; // count of files with this extension. 94 }; 95 96 struct TskModuleStatus; 97 struct TskModuleInfo; 98 struct TskBlackboardRecord; 99 struct TskUnallocImgStatusRecord; 100 101 /** 102 * Contains data about the mapping of data in the unallocated chunks back 103 * to their original location in the disk image. 104 */ 105 struct TskAllocUnallocMapRecord 106 { 107 int vol_id; 108 int unalloc_img_id; 109 TSK_DADDR_T unalloc_img_sect_start; 110 TSK_DADDR_T sect_len; 111 TSK_DADDR_T orig_img_sect_start; 112 }; 113 114 /** 115 * contains data about the 'unused sectors', which did not have carvable data. 116 */ 117 struct TskUnusedSectorsRecord 118 { 119 uint64_t fileId; 120 TSK_DADDR_T sectStart; 121 TSK_DADDR_T sectLen; 122 }; 123 124 struct TskFileRecord; 125 126 /** 127 * Interface for class that implments database storage for an image. 128 * The database will be used to store information about the data 129 * being analyzed. 130 * Can be registered with and retrieved from TskServices. 131 */ 132 class TSK_FRAMEWORK_API TskImgDB 133 { 134 public: 135 /// File type classifications used by the framework 136 enum FILE_TYPES 137 { 138 IMGDB_FILES_TYPE_FS = 0, 139 IMGDB_FILES_TYPE_CARVED, 140 IMGDB_FILES_TYPE_DERIVED, 141 IMGDB_FILES_TYPE_UNUSED 142 }; 143 144 /// File analysis statuses used by the framework 145 enum FILE_STATUS 146 { 147 IMGDB_FILES_STATUS_CREATED = 0, 148 IMGDB_FILES_STATUS_READY_FOR_ANALYSIS, 149 IMGDB_FILES_STATUS_ANALYSIS_IN_PROGRESS, 150 IMGDB_FILES_STATUS_ANALYSIS_COMPLETE, 151 IMGDB_FILES_STATUS_ANALYSIS_FAILED, 152 IMGDB_FILES_STATUS_ANALYSIS_SKIPPED 153 }; 154 155 /** 156 * Files have a 'known' status that is updated 157 * with the use of hash databases. */ 158 enum KNOWN_STATUS 159 { 160 IMGDB_FILES_KNOWN = 0, ///< 'Known', but cannot differentiate between good or bad. NSRL, for example, identifies known, but does not assign a good or bad status. 161 IMGDB_FILES_KNOWN_GOOD, ///< Known to be good / safely ignorable. 162 IMGDB_FILES_KNOWN_BAD, ///< Known to be bad or notable 163 IMGDB_FILES_UNKNOWN ///< Unknown files. Perhaps because they haven't been analyzed yet or perhaps because they are user files that are not in a database. All files start off in this state. 164 }; 165 166 /// Hash types supported by framework 167 enum HASH_TYPE 168 { 169 MD5 = 0, ///< 128-bit MD5 170 SHA1, ///< 160-bit SHA1 171 SHA2_256, ///< 256-bit SHA2 172 SHA2_512 ///< 512-bit SHA2 173 }; 174 175 /// Data types that can be stored in blackboard 176 enum VALUE_TYPE 177 { 178 BB_VALUE_TYPE_BYTE = 0, ///< Single byte 179 BB_VALUE_TYPE_STRING, ///< String 180 BB_VALUE_TYPE_INT32, ///< 32-bit integer 181 BB_VALUE_TYPE_INT64, ///< 64-bit integer 182 BB_VALUE_TYPE_DOUBLE ///< double floating point 183 }; 184 185 /// Unallocated sectors file statuses used by the framework 186 enum UNALLOC_IMG_STATUS 187 { 188 IMGDB_UNALLOC_IMG_STATUS_CREATED = 0, 189 IMGDB_UNALLOC_IMG_STATUS_SCHEDULE_OK, 190 IMGDB_UNALLOC_IMG_STATUS_SCHEDULE_ERR, 191 IMGDB_UNALLOC_IMG_STATUS_CARVED_OK, 192 IMGDB_UNALLOC_IMG_STATUS_CARVED_ERR, 193 IMGDB_UNALLOC_IMG_STATUS_CARVED_NOT_NEEDED, 194 }; 195 196 TskImgDB(); 197 virtual ~ TskImgDB(); 198 199 /** 200 * Opens the database and creates the needed tables. 201 * @returns 1 on error and 0 on success. 202 */ 203 virtual int initialize() = 0; 204 205 /** 206 * Opens an existing database. Use initialize() to create 207 * a new one. 208 * @returns 1 on error and 0 on success. 209 */ 210 virtual int open() = 0; 211 212 /** 213 * Close the database. 214 * @returns 0 on success and 1 on failure. 215 */ 216 virtual int close() = 0; 217 218 virtual int begin() = 0; 219 virtual int commit() = 0; 220 221 virtual int addToolInfo(const char* name, const char* version) = 0; 222 virtual int addImageInfo(int type, int sectorSize) = 0; 223 224 /** 225 * Add the path to the image to the image database 226 * 227 * @param imgPath The image path. 228 */ 229 virtual int addImageName(char const * imgPath) = 0; 230 231 virtual int addVolumeInfo(const TSK_VS_PART_INFO * vs_part) = 0; 232 virtual int addFsInfo(int volId, int fsId, const TSK_FS_INFO * fs_info) = 0; 233 234 /** 235 * Add data for a file system file to the image database. 236 * @param fileSystemID File system ID of the file system the file belongs to 237 * @param fileSystemFile TSK_FS_FILE object for the file 238 * @param fileName File name 239 * @param fileSystemAttrType File system attribute type (see #TSK_FS_ATTR_TYPE_ENUM) 240 * @param fileSystemAttrID File system attribute ID, used to index attributes for files with multiple attributes 241 * @param [out] fileID File ID assigned to the file by the image database 242 * @param filePath Path to the file in the image, file name omitted 243 * @returns 0 on success or -1 on error. 244 */ 245 virtual int addFsFileInfo(int fileSystemID, const TSK_FS_FILE *fileSystemFile, const char *fileName, int fileSystemAttrType, int fileSystemAttrID, uint64_t &fileID, const char *filePath) = 0; 246 247 virtual int addCarvedFileInfo(int vol_id, const char *name, uint64_t size, uint64_t *runStarts, uint64_t *runLengths, int numRuns, uint64_t & fileId) = 0; 248 virtual int addDerivedFileInfo(const std::string& name, const uint64_t parentId, 249 const bool isDirectory, const uint64_t size, const std::string& details, 250 const int ctime, const int crtime, const int atime, const int mtime, uint64_t & fileId, std::string path) = 0; 251 virtual int addFsBlockInfo(int fsID, uint64_t a_mFileId, int count, uint64_t blk_addr, uint64_t len) = 0; 252 253 /** 254 * Add information about how the unallocated images were created so that we can 255 later 256 * map where data was recovered from. This is typically used by CarvePrep and the results are 257 * used by CarveExtract via getUnallocRun(). 258 * @param a_volID Volume ID that the data was extracted from. 259 * @param unallocImgID ID of the unallocated image that the sectors were copied into. 260 * @param unallocImgStart Sector offset of where in the unallocated image that t 261 he run starts. 262 * @param length Number of sectors that are in the run. 263 * @param origImgStart Sector offset in the original image (relative to start of 264 image) where the run starts 265 * @returns 1 on errror 266 */ 267 virtual int addAllocUnallocMapInfo(int a_volID, int unallocImgID, uint64_t unallocImgStart, uint64_t length, uint64_t origImgStart) = 0; 268 269 virtual int getSessionID() const = 0; 270 virtual int getFileIds(char *a_fileName, uint64_t *a_outBuffer, int a_buffSize) const = 0; 271 virtual int getNumFiles() const = 0; 272 virtual int getMaxFileIdReadyForAnalysis(uint64_t a_lastFileId, uint64_t & maxFileId) const = 0; 273 virtual int getMinFileIdReadyForAnalysis(uint64_t & minFileId) const = 0; 274 virtual uint64_t getFileId(int fsId, uint64_t fs_file_id) const = 0; 275 276 /** 277 * Queries the blackboard for raw information about a specific file. 278 * @param fileId ID of file to lookup 279 * @param fileRecord Location where data should be stored 280 * @returns -1 on error and 0 on success. 281 */ 282 virtual int getFileRecord(const uint64_t fileId, TskFileRecord& fileRecord) const = 0; 283 virtual SectorRuns * getFileSectors(uint64_t fileId) const = 0; 284 285 /** 286 * Gets the base name of the image, i.e., the file name of the first image path stored in the database. 287 * 288 * @return The name of the image, possibly the empty string if no image paths have been stored. 289 */ 290 virtual std::string getImageBaseName() const = 0; 291 292 /** 293 * Gets a list of image paths. 294 * 295 * @returns A vector of image paths as std::strings. There may be multiple paths for a split image or the list may be empty if no image paths have been stored. 296 */ 297 virtual std::vector<std::wstring> getImageNamesW() const = 0; 298 virtual std::vector<std::string> getImageNames() const = 0; 299 300 virtual int getFileUniqueIdentifiers(uint64_t a_fileId, uint64_t &a_fsOffset, uint64_t &a_fsFileId, int &a_attrType, int &a_attrId) const = 0; 301 virtual int getNumVolumes() const = 0; 302 virtual int getImageInfo(int & type, int & sectorSize) const = 0; 303 virtual int getVolumeInfo(std::list<TskVolumeInfoRecord> & volumeInfoList) const = 0; 304 virtual int getFsInfo(std::list<TskFsInfoRecord> & fsInfoList) const = 0; 305 virtual int getFileInfoSummary(std::list<TskFileTypeRecord>& fileTypeInfoList) const = 0; 306 virtual int getFileInfoSummary(FILE_TYPES fileType, std::list<TskFileTypeRecord> & fileTypeInfoList) const = 0; 307 /** 308 * Return the known status of the file with the given id 309 * @param fileId id of the file to get the status of 310 * @returns KNOWN_STATUS or -1 on error 311 */ 312 virtual KNOWN_STATUS getKnownStatus(const uint64_t fileId) const = 0; 313 314 315 /** 316 * Given an offset in an unallocated image that was created for carving, 317 * return information about where that data came from in the original image. 318 * This is used to map where a carved file is located in the original image. 319 * 320 * @param a_unalloc_img_id ID of the unallocated image that you want data about 321 * @param a_file_offset Sector offset where file was found in the unallocated image 322 * @return NULL on error or a run descriptor. 323 */ 324 virtual UnallocRun * getUnallocRun(int a_unalloc_img_id, int a_file_offset) const = 0; 325 326 /** 327 * Returns a list of the sectors that are not used by files and that 328 * are in unpartitioned space. Typically this is used by CarvePrep. 329 */ 330 virtual SectorRuns * getFreeSectors() const = 0; 331 332 /** 333 * update the status field in the database for a given file. 334 * @param a_file_id File to update. 335 * @param a_status Status flag to update to. 336 * @returns 1 on error. 337 */ 338 virtual int updateFileStatus(uint64_t a_file_id, FILE_STATUS a_status) = 0; 339 340 /** 341 * update the known status field in the database for a given file. 342 * @param a_file_id File to update. 343 * @param a_status Status flag to update to. 344 * @returns 1 on error. 345 */ 346 virtual int updateKnownStatus(uint64_t a_file_id, KNOWN_STATUS a_status) = 0; 347 virtual bool dbExist() const = 0; 348 349 // Get set of file ids that match the given condition (i.e. SQL where clause) 350 virtual std::vector<uint64_t> getFileIds(const std::string& condition) const = 0; 351 virtual const std::vector<TskFileRecord> getFileRecords(const std::string& condition) const = 0; 352 353 // Get the number of files that match the given condition 354 virtual int getFileCount(const std::string& condition) const = 0; 355 356 /** 357 * Returns the file ids and carved file names for a unique set of carved files. 358 * Uniqueness is based on the value of a particular hash type. Where duplicate 359 * hash values exist, the lowest file_id is chosen. 360 * NOTE: This function is deprecated and will be removed in the next major release, 361 * use the getUniqueCarvedFilesInfo() member function instead. 362 * 363 * @param hashType The type of hash value to use when determining uniqueness. 364 * @return A map of file ids to the corresponding carved file name. 365 */ 366 virtual std::map<uint64_t, std::string> getUniqueCarvedFiles(HASH_TYPE hashType) const = 0; 367 368 /** 369 * Returns the file ids, content hashes and, carved file names for a unique set of carved files. 370 * Uniqueness is based on the value of a particular hash type. Where duplicate 371 * hash values exist, the lowest file_id is chosen. 372 * 373 * @param hashType The type of hash value to use when determining uniqueness. 374 * @return A map of file ids to the corresponding carved file name. Throws TskException. 375 */ 376 virtual std::vector<TskCarvedFileInfo> getUniqueCarvedFilesInfo(HASH_TYPE hashType) const = 0; 377 378 virtual std::vector<uint64_t> getCarvedFileIds() const = 0; 379 380 virtual std::vector<uint64_t> getUniqueFileIds(HASH_TYPE hashType) const = 0; 381 virtual std::vector<uint64_t> getFileIds() const = 0; 382 383 virtual int setHash(const uint64_t a_file_id, const TskImgDB::HASH_TYPE hashType, const std::string& hash) const = 0; 384 virtual std::string getCfileName(const uint64_t a_file_id) const = 0; 385 386 virtual int addModule(const std::string& name, const std::string& description, int & moduleId) = 0; 387 virtual int setModuleStatus(uint64_t file_id, int module_id, int status) = 0; 388 virtual int getModuleInfo(std::vector<TskModuleInfo> & moduleInfoList) const = 0; 389 virtual int getModuleErrors(std::vector<TskModuleStatus> & moduleStatusList) const = 0; 390 virtual std::string getFileName(uint64_t file_id) const = 0; 391 392 /** 393 * Used when a new unallocated image file is created for carving. 394 * @param unallocImgId [out] Stores the unique ID assigned to the image. 395 * @returns -1 on error, 0 on success. 396 */ 397 virtual int addUnallocImg(int & unallocImgId) = 0; 398 399 virtual int setUnallocImgStatus(int unallocImgId, TskImgDB::UNALLOC_IMG_STATUS status) = 0; 400 virtual TskImgDB::UNALLOC_IMG_STATUS getUnallocImgStatus(int unallocImgId) const = 0; 401 virtual int getAllUnallocImgStatus(std::vector<TskUnallocImgStatusRecord> & unallocImgStatusList) const = 0; 402 403 virtual int addUnusedSectors(int unallocImgId, std::vector<TskUnusedSectorsRecord> & unusedSectorsList) = 0; 404 virtual int getUnusedSector(uint64_t fileId, TskUnusedSectorsRecord & unusedSectorsRecord) const = 0; 405 406 // Quote and escape a string, the returned quoted string can be used as string literal in SQL statement. 407 virtual std::string quote(const std::string str) const = 0; 408 409 friend class TskDBBlackboard; 410 411 protected: 412 map<int64_t, map<TSK_INUM_T, map<uint32_t, int64_t> > > m_parentDirIdCache; //maps a file system ID to a map, which maps a directory file system meta address to a map, which maps a sequence ID to its object ID in the database 413 414 /** 415 * Store meta_addr to object id mapping of the directory in a local cache map 416 * @param fsObjId fs id of the directory 417 * @param fs_file file object for the directory 418 * @param objId object id of the directory from the objects table 419 */ 420 void storeParObjId(const int64_t & fsObjId, const TSK_FS_FILE * fs_file, const int64_t & objId); 421 422 /** 423 * Find parent object id of TSK_FS_FILE. Use local cache map, if not found, fall back to SQL 424 * @param fs_file file to find parent obj id for 425 * @param fsObjId fs id of this file 426 * @returns parent obj id ( > 0), -1 on error 427 */ 428 int64_t findParObjId(const TSK_FS_FILE * fs_file, const int64_t & fsObjId); 429 430 // Blackboard methods. 431 virtual TskBlackboardArtifact createBlackboardArtifact(uint64_t file_id, int artifactTypeID) = 0; 432 virtual void addBlackboardAttribute(TskBlackboardAttribute attr) = 0; 433 434 virtual string getArtifactTypeDisplayName(int artifactTypeID) = 0; 435 virtual int getArtifactTypeID(string artifactTypeString) = 0; 436 virtual string getArtifactTypeName(int artifactTypeID) = 0; 437 virtual vector<TskBlackboardArtifact> getMatchingArtifacts(string whereClause) = 0; 438 439 virtual void addArtifactType(int typeID, string artifactTypeName, string displayName) = 0; 440 virtual void addAttributeType(int typeID, string attributeTypeName, string displayName)= 0; 441 442 virtual string getAttributeTypeDisplayName(int attributeTypeID) = 0; 443 virtual int getAttributeTypeID(string attributeTypeString) = 0; 444 virtual string getAttributeTypeName(int attributeTypeID) = 0; 445 virtual vector<TskBlackboardAttribute> getMatchingAttributes(string whereClause) = 0; 446 TskBlackboardAttribute createAttribute(uint64_t artifactID, int attributeTypeID, uint64_t objectID, string moduleName, string context, 447 TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE valueType, int valueInt, uint64_t valueLong, double valueDouble, 448 string valueString, vector<unsigned char> valueBytes); 449 TskBlackboardArtifact createArtifact(uint64_t artifactID, uint64_t objID, int artifactTypeID); 450 virtual map<int, TskArtifactNames> getAllArtifactTypes(); 451 virtual map<int, TskAttributeNames> getAllAttributeTypes(); 452 virtual vector<int> findAttributeTypes(int artifactTypeId) = 0; 453 454 private: 455 456 }; 457 458 /** 459 * Contains data from a file record in the database. 460 */ 461 struct TskFileRecord 462 { 463 uint64_t fileId; 464 TskImgDB::FILE_TYPES typeId; 465 std::string name; 466 uint64_t parentFileId; 467 TSK_FS_NAME_TYPE_ENUM dirType; 468 TSK_FS_META_TYPE_ENUM metaType; 469 TSK_FS_NAME_FLAG_ENUM dirFlags; 470 TSK_FS_META_FLAG_ENUM metaFlags; 471 TSK_OFF_T size; 472 time_t ctime; 473 time_t crtime; 474 time_t atime; 475 time_t mtime; 476 TSK_FS_META_MODE_ENUM mode; 477 TSK_UID_T uid; 478 TSK_GID_T gid; 479 TskImgDB::FILE_STATUS status; 480 std::string md5; 481 std::string sha1; 482 std::string sha2_256; 483 std::string sha2_512; 484 std::string fullPath; 485 }; 486 487 /** 488 * Contains data about the module return status for a given file (as recorded in the database) 489 */ 490 struct TskModuleStatus 491 { 492 uint64_t file_id; 493 std::string module_name; 494 int status; 495 }; 496 497 /** 498 * Contains data about a module 499 */ 500 struct TskModuleInfo 501 { 502 int module_id; 503 std::string module_name; 504 std::string module_description; 505 }; 506 507 /** 508 * Contains data for a blackboard entry for a given file and artifact ID 509 */ 510 struct TskBlackboardRecord 511 { 512 artifact_t artifactId; 513 uint64_t fileId; ///< File that this information pertains to. 514 string attribute; ///< Name / type of the data being stored. Standard attribute names are defined in TskBlackboard 515 string source; ///< Name of the module that added this data 516 string context; ///< Optional string that provides more context about the data. For example, it may have "Last Printed" if the entry is a DATETIME entry about when a document was last printed. 517 TskImgDB::VALUE_TYPE valueType; ///< Type of data being stored 518 int32_t valueInt32; 519 int64_t valueInt64; 520 string valueString; 521 double valueDouble; 522 vector<unsigned char> valueByte; 523 TskBlackboardRecordTskBlackboardRecord524 TskBlackboardRecord(artifact_t a_artifactId, uint64_t a_fileId, string a_attribute, string a_source, string a_context) 525 : artifactId(a_artifactId), fileId(a_fileId), attribute(a_attribute), source(a_source), context(a_context) 526 { 527 } TskBlackboardRecordTskBlackboardRecord528 TskBlackboardRecord() {} 529 }; 530 531 /** 532 * Contains data about the current status for an unallocated chunk of data. 533 */ 534 struct TskUnallocImgStatusRecord 535 { 536 int unallocImgId; 537 TskImgDB::UNALLOC_IMG_STATUS status; 538 }; 539 540 541 #endif 542