1 /*
2  *
3  *  The Sleuth Kit
4  *
5  *  Contact: Brian Carrier [carrier <at> sleuthkit [dot] org]
6  *  Copyright (c) 2010-2012 Basis Technology Corporation. All Rights
7  *  reserved.
8  *
9  *  This software is distributed under the Common Public License 1.0
10  */
11 
12 
13 #ifndef _TSK_IMGDB_H
14 #define _TSK_IMGDB_H
15 
16 #define IMGDB_SCHEMA_VERSION "1.5"
17 
18 #include <string> // to get std::wstring
19 #include <list>
20 #include <vector>
21 #include "tsk/libtsk.h"
22 #include "tsk/framework/framework_i.h"
23 #include "tsk/framework/utilities/SectorRuns.h"
24 #include "tsk/framework/utilities/UnallocRun.h"
25 #include "TskBlackboardAttribute.h"
26 #include "TskBlackboard.h"
27 #include "TskBlackboardArtifact.h"
28 
29 using namespace std;
30 
31 class TskArtifactNames;
32 class TskAttributeNames;
33 
34 typedef uint64_t artifact_t;
35 
36 
37 /**
38  * Contains data from a volume/partition record in the database.
39  */
40 struct TskVolumeInfoRecord
41 {
42     uint64_t vol_id;
43     TSK_DADDR_T sect_start;
44     TSK_DADDR_T sect_len;
45     std::string description;
46     TSK_VS_PART_FLAG_ENUM flags;
47 };
48 
49 /**
50  * Contains data from a file system record in the database.
51  */
52 struct TskFsInfoRecord
53 {
54     uint64_t fs_id;
55     TSK_OFF_T img_byte_offset;
56     uint64_t vol_id;
57     TSK_FS_TYPE_ENUM  fs_type;
58     unsigned int block_size;
59     TSK_DADDR_T block_count;
60     TSK_INUM_T root_inum;
61     TSK_INUM_T first_inum;
62     TSK_INUM_T last_inum;
63 };
64 
65 /**
66  * Contains data derived from joining carved file records from multiple tables in the image database.
67  */
68 struct TskCarvedFileInfo
69 {
70     /**
71      * The unique ID of the carved file.
72      */
73     uint64_t fileID;
74 
75     /**
76      * A hash of the carved file. The type of the hash is a parameter to the function
77      * that returns objects of this type and is not included in the struct to reduce object size,
78      * since this struct is used to satisfy potentially high-volume data requests.
79      * The hash member may be an empty string if the requested hash is unavailable.
80      */
81     std::string hash;
82 
83     /**
84      * A "cfile" name for the carved file of the form: cfile_[vol_id]_[start_sector]_[file_id].[ext].
85      */
86     std::string cFileName;
87 };
88 
89 struct TskFileTypeRecord
90 {
91     std::string suffix; // file extension, normalized to lowercase. If no extension, it is an empty string.
92     std::string description; // descript of the file type.
93     uint64_t count; // count of files with this extension.
94 };
95 
96 struct TskModuleStatus;
97 struct TskModuleInfo;
98 struct TskBlackboardRecord;
99 struct TskUnallocImgStatusRecord;
100 
101 /**
102  * Contains data about the mapping of data in the unallocated chunks back
103  * to their original location in the disk image.
104  */
105 struct TskAllocUnallocMapRecord
106 {
107     int vol_id;
108     int unalloc_img_id;
109     TSK_DADDR_T unalloc_img_sect_start;
110     TSK_DADDR_T sect_len;
111     TSK_DADDR_T orig_img_sect_start;
112 };
113 
114 /**
115  * contains data about the 'unused sectors', which did not have carvable data.
116  */
117 struct TskUnusedSectorsRecord
118 {
119     uint64_t fileId;
120     TSK_DADDR_T sectStart;
121     TSK_DADDR_T sectLen;
122 };
123 
124 struct TskFileRecord;
125 
126 /**
127  * Interface for class that implments database storage for an image.
128  * The database will be used to store information about the data
129  * being analyzed.
130  * Can be registered with and retrieved from TskServices.
131  */
132 class TSK_FRAMEWORK_API TskImgDB
133 {
134 public:
135     /// File type classifications used by the framework
136     enum FILE_TYPES
137     {
138         IMGDB_FILES_TYPE_FS = 0,
139         IMGDB_FILES_TYPE_CARVED,
140         IMGDB_FILES_TYPE_DERIVED,
141         IMGDB_FILES_TYPE_UNUSED
142     };
143 
144     /// File analysis statuses used by the framework
145     enum FILE_STATUS
146     {
147         IMGDB_FILES_STATUS_CREATED = 0,
148         IMGDB_FILES_STATUS_READY_FOR_ANALYSIS,
149         IMGDB_FILES_STATUS_ANALYSIS_IN_PROGRESS,
150         IMGDB_FILES_STATUS_ANALYSIS_COMPLETE,
151         IMGDB_FILES_STATUS_ANALYSIS_FAILED,
152         IMGDB_FILES_STATUS_ANALYSIS_SKIPPED
153     };
154 
155     /**
156      * Files have a 'known' status that is updated
157      * with the use of hash databases. */
158     enum KNOWN_STATUS
159     {
160         IMGDB_FILES_KNOWN = 0,  ///< 'Known', but cannot differentiate between good or bad.  NSRL, for example, identifies known, but does not assign a good or bad status.
161         IMGDB_FILES_KNOWN_GOOD,  ///< Known to be good / safely ignorable.
162         IMGDB_FILES_KNOWN_BAD,  ///< Known to be bad or notable
163         IMGDB_FILES_UNKNOWN     ///< Unknown files.  Perhaps because they haven't been analyzed yet or perhaps because they are user files that are not in a database.  All files start off in this state.
164     };
165 
166     /// Hash types supported by framework
167     enum HASH_TYPE
168     {
169         MD5 = 0,    ///< 128-bit MD5
170         SHA1,       ///< 160-bit SHA1
171         SHA2_256,   ///< 256-bit SHA2
172         SHA2_512    ///< 512-bit SHA2
173     };
174 
175     /// Data types that can be stored in blackboard
176     enum VALUE_TYPE
177     {
178         BB_VALUE_TYPE_BYTE = 0, ///< Single byte
179         BB_VALUE_TYPE_STRING,   ///< String
180         BB_VALUE_TYPE_INT32,    ///< 32-bit integer
181         BB_VALUE_TYPE_INT64,    ///< 64-bit integer
182         BB_VALUE_TYPE_DOUBLE    ///< double floating point
183     };
184 
185     /// Unallocated sectors file statuses used by the framework
186     enum UNALLOC_IMG_STATUS
187     {
188         IMGDB_UNALLOC_IMG_STATUS_CREATED = 0,
189         IMGDB_UNALLOC_IMG_STATUS_SCHEDULE_OK,
190         IMGDB_UNALLOC_IMG_STATUS_SCHEDULE_ERR,
191         IMGDB_UNALLOC_IMG_STATUS_CARVED_OK,
192         IMGDB_UNALLOC_IMG_STATUS_CARVED_ERR,
193         IMGDB_UNALLOC_IMG_STATUS_CARVED_NOT_NEEDED,
194     };
195 
196     TskImgDB();
197     virtual ~ TskImgDB();
198 
199     /**
200      * Opens the database and creates the needed tables.
201      * @returns 1 on error and 0 on success.
202      */
203     virtual int initialize() = 0;
204 
205     /**
206      * Opens an existing database. Use initialize() to create
207      * a new one.
208      * @returns 1 on error and 0 on success.
209      */
210     virtual int open() = 0;
211 
212     /**
213      * Close the database.
214      * @returns 0 on success and 1 on failure.
215      */
216     virtual int close() = 0;
217 
218     virtual int begin() = 0;
219     virtual int commit() = 0;
220 
221     virtual int addToolInfo(const char* name, const char* version) = 0;
222     virtual int addImageInfo(int type, int sectorSize) = 0;
223 
224     /**
225      * Add the path to the image to the image database
226      *
227      * @param imgPath The image path.
228      */
229     virtual int addImageName(char const * imgPath) = 0;
230 
231     virtual int addVolumeInfo(const TSK_VS_PART_INFO * vs_part) = 0;
232     virtual int addFsInfo(int volId, int fsId, const TSK_FS_INFO * fs_info) = 0;
233 
234     /**
235      * Add data for a file system file to the image database.
236      * @param fileSystemID File system ID of the file system the file belongs to
237      * @param fileSystemFile TSK_FS_FILE object for the file
238      * @param fileName File name
239      * @param fileSystemAttrType File system attribute type (see #TSK_FS_ATTR_TYPE_ENUM)
240      * @param fileSystemAttrID File system attribute ID, used to index attributes for files with multiple attributes
241      * @param [out] fileID File ID assigned to the file by the image database
242      * @param filePath Path to the file in the image, file name omitted
243      * @returns 0 on success or -1 on error.
244      */
245     virtual int addFsFileInfo(int fileSystemID, const TSK_FS_FILE *fileSystemFile, const char *fileName, int fileSystemAttrType, int fileSystemAttrID, uint64_t &fileID, const char *filePath) = 0;
246 
247     virtual int addCarvedFileInfo(int vol_id, const char *name, uint64_t size, uint64_t *runStarts, uint64_t *runLengths, int numRuns, uint64_t & fileId) = 0;
248     virtual int addDerivedFileInfo(const std::string& name, const uint64_t parentId,
249                                         const bool isDirectory, const uint64_t size, const std::string& details,
250                                         const int ctime, const int crtime, const int atime, const int mtime, uint64_t & fileId, std::string path) = 0;
251     virtual int addFsBlockInfo(int fsID, uint64_t a_mFileId, int count, uint64_t blk_addr, uint64_t len) = 0;
252 
253     /**
254      * Add information about how the unallocated images were created so that we can
255      later
256      * map where data was recovered from. This is typically used by CarvePrep and the results are
257      * used by CarveExtract via getUnallocRun().
258      * @param a_volID Volume ID that the data was extracted from.
259      * @param unallocImgID ID of the unallocated image that the sectors were copied into.
260      * @param unallocImgStart Sector offset of where in the unallocated image that t
261      he run starts.
262      * @param length Number of sectors that are in the run.
263      * @param origImgStart Sector offset in the original image (relative to start of
264         image) where the run starts
265      * @returns 1 on errror
266      */
267     virtual int addAllocUnallocMapInfo(int a_volID, int unallocImgID, uint64_t unallocImgStart, uint64_t length, uint64_t origImgStart) = 0;
268 
269     virtual int getSessionID() const = 0;
270     virtual int getFileIds(char *a_fileName, uint64_t *a_outBuffer, int a_buffSize) const = 0;
271     virtual int getNumFiles() const = 0;
272     virtual int getMaxFileIdReadyForAnalysis(uint64_t a_lastFileId, uint64_t & maxFileId) const = 0;
273     virtual int getMinFileIdReadyForAnalysis(uint64_t & minFileId) const = 0;
274     virtual uint64_t getFileId(int fsId, uint64_t fs_file_id) const = 0;
275 
276     /**
277      * Queries the blackboard for raw information about a specific file.
278      * @param fileId ID of file to lookup
279      * @param fileRecord Location where data should be stored
280      * @returns -1 on error and 0 on success.
281      */
282     virtual int getFileRecord(const uint64_t fileId, TskFileRecord& fileRecord) const = 0;
283     virtual SectorRuns * getFileSectors(uint64_t fileId) const = 0;
284 
285     /**
286      * Gets the base name of the image, i.e., the file name of the first image path stored in the database.
287      *
288      * @return The name of the image, possibly the empty string if no image paths have been stored.
289      */
290     virtual std::string getImageBaseName() const = 0;
291 
292     /**
293      * Gets a list of image paths.
294      *
295      * @returns A vector of image paths as std::strings. There may be multiple paths for a split image or the list may be empty if no image paths have been stored.
296      */
297     virtual std::vector<std::wstring> getImageNamesW() const = 0;
298     virtual std::vector<std::string>  getImageNames() const = 0;
299 
300     virtual int getFileUniqueIdentifiers(uint64_t a_fileId, uint64_t &a_fsOffset, uint64_t &a_fsFileId, int &a_attrType, int &a_attrId) const = 0;
301     virtual int getNumVolumes() const = 0;
302     virtual int getImageInfo(int & type, int & sectorSize) const = 0;
303     virtual int getVolumeInfo(std::list<TskVolumeInfoRecord> & volumeInfoList) const = 0;
304     virtual int getFsInfo(std::list<TskFsInfoRecord> & fsInfoList) const = 0;
305     virtual int getFileInfoSummary(std::list<TskFileTypeRecord>& fileTypeInfoList) const = 0;
306     virtual int getFileInfoSummary(FILE_TYPES fileType, std::list<TskFileTypeRecord> & fileTypeInfoList) const = 0;
307     /**
308      * Return the known status of the file with the given id
309      * @param fileId id of the file to get the status of
310      * @returns KNOWN_STATUS or -1 on error
311      */
312     virtual KNOWN_STATUS getKnownStatus(const uint64_t fileId) const = 0;
313 
314 
315     /**
316      * Given an offset in an unallocated image that was created for carving,
317      * return information about where that data came from in the original image.
318      * This is used to map where a carved file is located in the original image.
319      *
320      * @param a_unalloc_img_id ID of the unallocated image that you want data about
321      * @param a_file_offset Sector offset where file was found in the unallocated image
322      * @return NULL on error or a run descriptor.
323      */
324     virtual UnallocRun * getUnallocRun(int a_unalloc_img_id, int a_file_offset) const = 0;
325 
326     /**
327      * Returns a list of the sectors that are not used by files and that
328      * are in unpartitioned space.  Typically this is used by CarvePrep.
329      */
330     virtual SectorRuns * getFreeSectors() const = 0;
331 
332     /**
333      * update the status field in the database for a given file.
334      * @param a_file_id File to update.
335      * @param a_status Status flag to update to.
336      * @returns 1 on error.
337      */
338     virtual int updateFileStatus(uint64_t a_file_id, FILE_STATUS a_status) = 0;
339 
340     /**
341      * update the known status field in the database for a given file.
342      * @param a_file_id File to update.
343      * @param a_status Status flag to update to.
344      * @returns 1 on error.
345      */
346     virtual int updateKnownStatus(uint64_t a_file_id, KNOWN_STATUS a_status) = 0;
347 	virtual bool dbExist() const = 0;
348 
349     // Get set of file ids that match the given condition (i.e. SQL where clause)
350     virtual std::vector<uint64_t> getFileIds(const std::string& condition) const = 0;
351     virtual const std::vector<TskFileRecord> getFileRecords(const std::string& condition) const = 0;
352 
353     // Get the number of files that match the given condition
354     virtual int getFileCount(const std::string& condition) const = 0;
355 
356     /**
357      * Returns the file ids and carved file names for a unique set of carved files.
358      * Uniqueness is based on the value of a particular hash type. Where duplicate
359      * hash values exist, the lowest file_id is chosen.
360      * NOTE: This function is deprecated and will be removed in the next major release,
361      * use the getUniqueCarvedFilesInfo() member function instead.
362      *
363      * @param hashType The type of hash value to use when determining uniqueness.
364      * @return A map of file ids to the corresponding carved file name.
365      */
366     virtual std::map<uint64_t, std::string> getUniqueCarvedFiles(HASH_TYPE hashType) const = 0;
367 
368     /**
369      * Returns the file ids, content hashes and, carved file names for a unique set of carved files.
370      * Uniqueness is based on the value of a particular hash type. Where duplicate
371      * hash values exist, the lowest file_id is chosen.
372      *
373      * @param hashType The type of hash value to use when determining uniqueness.
374      * @return A map of file ids to the corresponding carved file name. Throws TskException.
375      */
376     virtual std::vector<TskCarvedFileInfo> getUniqueCarvedFilesInfo(HASH_TYPE hashType) const = 0;
377 
378     virtual std::vector<uint64_t> getCarvedFileIds() const = 0;
379 
380     virtual std::vector<uint64_t> getUniqueFileIds(HASH_TYPE hashType) const = 0;
381     virtual std::vector<uint64_t> getFileIds() const = 0;
382 
383     virtual int setHash(const uint64_t a_file_id, const TskImgDB::HASH_TYPE hashType, const std::string& hash) const = 0;
384     virtual std::string getCfileName(const uint64_t a_file_id) const = 0;
385 
386     virtual int addModule(const std::string& name, const std::string& description, int & moduleId) = 0;
387     virtual int setModuleStatus(uint64_t file_id, int module_id, int status) = 0;
388 	virtual int getModuleInfo(std::vector<TskModuleInfo> & moduleInfoList) const = 0;
389     virtual int getModuleErrors(std::vector<TskModuleStatus> & moduleStatusList) const = 0;
390     virtual std::string getFileName(uint64_t file_id) const = 0;
391 
392     /**
393      * Used when a new unallocated image file is created for carving.
394      * @param unallocImgId [out] Stores the unique ID assigned to the image.
395      * @returns -1 on error, 0 on success.
396      */
397     virtual int addUnallocImg(int & unallocImgId) = 0;
398 
399     virtual int setUnallocImgStatus(int unallocImgId, TskImgDB::UNALLOC_IMG_STATUS status) = 0;
400     virtual TskImgDB::UNALLOC_IMG_STATUS getUnallocImgStatus(int unallocImgId) const = 0;
401     virtual int getAllUnallocImgStatus(std::vector<TskUnallocImgStatusRecord> & unallocImgStatusList) const = 0;
402 
403     virtual int addUnusedSectors(int unallocImgId, std::vector<TskUnusedSectorsRecord> & unusedSectorsList) = 0;
404     virtual int getUnusedSector(uint64_t fileId, TskUnusedSectorsRecord & unusedSectorsRecord) const = 0;
405 
406 	// Quote and escape a string, the returned quoted string can be used as string literal in SQL statement.
407 	virtual std::string quote(const std::string str) const = 0;
408 
409     friend class TskDBBlackboard;
410 
411 protected:
412     map<int64_t, map<TSK_INUM_T, map<uint32_t, int64_t> > > m_parentDirIdCache; //maps a file system ID to a map, which maps a directory file system meta address to a map, which maps a sequence ID to its object ID in the database
413 
414     /**
415 	 * Store meta_addr to object id mapping of the directory in a local cache map
416 	 * @param fsObjId fs id of the directory
417 	 * @param fs_file file object for the directory
418 	 * @param objId object id of the directory from the objects table
419 	 */
420     void storeParObjId(const int64_t & fsObjId, const TSK_FS_FILE * fs_file, const int64_t & objId);
421 
422 	/**
423 	 * Find parent object id of TSK_FS_FILE. Use local cache map, if not found, fall back to SQL
424      * @param fs_file file to find parent obj id for
425      * @param fsObjId fs id of this file
426 	 * @returns parent obj id ( > 0), -1 on error
427 	 */
428 	int64_t findParObjId(const TSK_FS_FILE * fs_file, const int64_t & fsObjId);
429 
430 	// Blackboard methods.
431     virtual TskBlackboardArtifact createBlackboardArtifact(uint64_t file_id, int artifactTypeID) = 0;
432     virtual void addBlackboardAttribute(TskBlackboardAttribute attr) = 0;
433 
434     virtual string getArtifactTypeDisplayName(int artifactTypeID) = 0;
435     virtual int getArtifactTypeID(string artifactTypeString) = 0;
436     virtual string getArtifactTypeName(int artifactTypeID) = 0;
437     virtual vector<TskBlackboardArtifact> getMatchingArtifacts(string whereClause) = 0;
438 
439     virtual void addArtifactType(int typeID, string artifactTypeName, string displayName) = 0;
440     virtual void addAttributeType(int typeID, string attributeTypeName, string displayName)= 0;
441 
442     virtual string getAttributeTypeDisplayName(int attributeTypeID) = 0;
443     virtual int getAttributeTypeID(string attributeTypeString) = 0;
444     virtual string getAttributeTypeName(int attributeTypeID) = 0;
445     virtual vector<TskBlackboardAttribute> getMatchingAttributes(string whereClause) = 0;
446     TskBlackboardAttribute createAttribute(uint64_t artifactID, int attributeTypeID, uint64_t objectID, string moduleName, string context,
447 		TSK_BLACKBOARD_ATTRIBUTE_VALUE_TYPE valueType, int valueInt, uint64_t valueLong, double valueDouble,
448 		string valueString, vector<unsigned char> valueBytes);
449     TskBlackboardArtifact createArtifact(uint64_t artifactID, uint64_t objID, int artifactTypeID);
450     virtual map<int, TskArtifactNames> getAllArtifactTypes();
451     virtual map<int, TskAttributeNames> getAllAttributeTypes();
452     virtual vector<int> findAttributeTypes(int artifactTypeId) = 0;
453 
454 private:
455 
456 };
457 
458 /**
459  * Contains data from a file record in the database.
460  */
461 struct TskFileRecord
462 {
463     uint64_t fileId;
464     TskImgDB::FILE_TYPES typeId;
465     std::string name;
466     uint64_t parentFileId;
467     TSK_FS_NAME_TYPE_ENUM dirType;
468     TSK_FS_META_TYPE_ENUM metaType;
469     TSK_FS_NAME_FLAG_ENUM dirFlags;
470     TSK_FS_META_FLAG_ENUM metaFlags;
471     TSK_OFF_T size;
472     time_t ctime;
473     time_t crtime;
474     time_t atime;
475     time_t mtime;
476     TSK_FS_META_MODE_ENUM mode;
477     TSK_UID_T uid;
478     TSK_GID_T gid;
479     TskImgDB::FILE_STATUS status;
480     std::string md5;
481     std::string sha1;
482     std::string sha2_256;
483     std::string sha2_512;
484     std::string fullPath;
485 };
486 
487 /**
488  * Contains data about the module return status for a given file (as recorded in the database)
489  */
490 struct TskModuleStatus
491 {
492     uint64_t file_id;
493     std::string module_name;
494     int status;
495 };
496 
497 /**
498  * Contains data about a module
499  */
500 struct TskModuleInfo
501 {
502 	int module_id;
503     std::string module_name;
504     std::string module_description;
505 };
506 
507 /**
508  * Contains data for a blackboard entry for a given file and artifact ID
509  */
510 struct TskBlackboardRecord
511 {
512     artifact_t artifactId;
513     uint64_t fileId;    ///< File that this information pertains to.
514     string attribute; ///< Name / type of the data being stored. Standard attribute names are defined in TskBlackboard
515     string source;  ///< Name of the module that added this data
516     string context; ///< Optional string that provides more context about the data.  For example, it may have "Last Printed" if the entry is a DATETIME entry about when a document was last printed.
517     TskImgDB::VALUE_TYPE valueType; ///< Type of data being stored
518     int32_t valueInt32;
519     int64_t valueInt64;
520     string valueString;
521     double valueDouble;
522     vector<unsigned char> valueByte;
523 
TskBlackboardRecordTskBlackboardRecord524     TskBlackboardRecord(artifact_t a_artifactId, uint64_t a_fileId, string a_attribute, string a_source, string a_context)
525         : artifactId(a_artifactId), fileId(a_fileId), attribute(a_attribute), source(a_source), context(a_context)
526     {
527     }
TskBlackboardRecordTskBlackboardRecord528     TskBlackboardRecord() {}
529 };
530 
531 /**
532  * Contains data about the current status for an unallocated chunk of data.
533  */
534 struct TskUnallocImgStatusRecord
535 {
536     int unallocImgId;
537     TskImgDB::UNALLOC_IMG_STATUS status;
538 };
539 
540 
541 #endif
542