1 #ifndef FUZZY_H 2 #define FUZZY_H 3 4 /* 5 * Copyright (C) ManTech International Corporation 2010 6 * Copyright (C) Kyrus 2012 7 * Copyright (C) 2013 Helmut Grohne <helmut@subdivi.de> 8 * 9 * $Id$ 10 * 11 * This program is free software; you can redistribute it and/or modify 12 * it under the terms of the GNU General Public License as published by 13 * the Free Software Foundation; either version 2 of the License, or 14 * (at your option) any later version. 15 * 16 * This program is distributed in the hope that it will be useful, 17 * but WITHOUT ANY WARRANTY; without even the implied warranty of 18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 19 * GNU General Public License for more details. 20 * 21 * You should have received a copy of the GNU General Public License 22 * along with this program; if not, write to the Free Software 23 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA 24 * 25 * Earlier versions of this code can be found at: 26 * http://ssdeep.sf.net/ 27 */ 28 29 /// \mainpage 30 /// This is the documentation for the fuzzy hashing API from ssdeep. 31 /// 32 /// There is a complete function reference in fuzzy.h. 33 /// 34 /// The most recent version of this documentation can be found 35 /// at http://ssdeep.sourceforge.net/. 36 /// 37 /// \copydoc fuzzy.h 38 /// 39 /// \version 3.0 40 /// 41 /// \author Jesse Kornblum, research@jessekornblum.com 42 /// \author Helmut Grohne, helmut@subdivi.de 43 44 /// \file fuzzy.h 45 /// \brief 46 /// These functions allow a programmer to compute the fuzzy hashes 47 /// (also called the context-triggered piecewise hashes) of 48 /// \link fuzzy_hash_buf() a buffer 49 /// of text @endlink, 50 /// \link fuzzy_hash_filename() the contents of a file on the disk @endlink, 51 /// and 52 /// @link fuzzy_hash_file() the contents of 53 /// an open file handle @endlink . 54 /// There is also a function to 55 /// @link fuzzy_compare() compute the 56 /// similarity between any two fuzzy signatures @endlink. 57 58 59 #include <stdint.h> 60 #include <stdio.h> 61 62 #ifdef __cplusplus 63 extern "C" { 64 #endif 65 66 /** 67 * @brief fuzzy_digest flag indicating to eliminate sequences of more than 68 * three identical characters 69 */ 70 #define FUZZY_FLAG_ELIMSEQ 0x1u 71 /** 72 * @brief fuzzy_digest flag indicating not to truncate the second part to 73 * SPAMSUM_LENGTH/2 characters. 74 */ 75 #define FUZZY_FLAG_NOTRUNC 0x2u 76 77 struct fuzzy_state; 78 79 /** 80 * @brief Construct a fuzzy_state object and return it. 81 * 82 * To use it call fuzzy_update and fuzzy_digest on it. It must be disposed 83 * with fuzzy_free. 84 * @return the constructed fuzzy_state or NULL on failure 85 */ 86 extern /*@only@*/ /*@null@*/ struct fuzzy_state *fuzzy_new(void); 87 88 /** 89 * @brief Create a copy of a fuzzy_state object and return it. 90 * 91 * It can be used with fuzzy_update and fuzzy_digest independently of 92 * the original. It must be disposed with fuzzy_free like the original 93 * has to be cleared in this way. 94 * @param state The fuzzy state 95 * @return the cloned fuzzy_state or NULL on failure 96 */ 97 extern /*@only@*/ /*@null@*/ struct fuzzy_state *fuzzy_clone(const struct fuzzy_state *state); 98 99 /** 100 * @brief Set fixed length of input 101 * 102 * If we know the file size to compute fuzzy digest, we can boost 103 * computation by restricting range of blocksize. 104 * @param state The fuzzy state 105 * @param total_fixed_length Total length of the data to generate digest 106 * @return 0 on success or -1 on failure 107 */ 108 extern int fuzzy_set_total_input_length(struct fuzzy_state *state, uint_least64_t total_fixed_length); 109 110 /** 111 * @brief Feed the data contained in the given buffer to the state. 112 * 113 * When an error occurs, the state is undefined. In that case it must not be 114 * passed to any function besides fuzzy_free. 115 * @param state The fuzzy state 116 * @param buffer The data to be hashes 117 * @param buffer_size The length of the given buffer 118 * @return zero on success, non-zero on error 119 */ 120 extern int fuzzy_update(struct fuzzy_state *state, 121 const unsigned char *buffer, 122 size_t buffer_size); 123 124 /** 125 * @brief Obtain the fuzzy hash from the state. 126 * 127 * This operation does not change the state at all. It reports the hash for the 128 * concatenation of the data previously fed using fuzzy_update. 129 * @param state The fuzzy state 130 * @param result Where the fuzzy hash is stored. This variable 131 * must be allocated to hold at least FUZZY_MAX_RESULT bytes. 132 * @param flags is a bitwise or of FUZZY_FLAG_* macros. The absence of flags is 133 * represented by a zero. 134 * @return zero on success, non-zero on error 135 */ 136 extern int fuzzy_digest(const struct fuzzy_state *state, 137 /*@out@*/ char *result, 138 unsigned int flags); 139 140 /** 141 * @brief Dispose a fuzzy state. 142 * @param state The fuzzy state to dispose 143 */ 144 extern void fuzzy_free(/*@only@*/ struct fuzzy_state *state); 145 146 /** 147 * @brief Compute the fuzzy hash of a buffer 148 * 149 * The computes the fuzzy hash of the first buf_len bytes of the buffer. 150 * It is the caller's responsibility to append the filename, 151 * if any, to result after computation. 152 * @param buf The data to be fuzzy hashed 153 * @param buf_len The length of the data being hashed 154 * @param result Where the fuzzy hash of buf is stored. This variable 155 * must be allocated to hold at least FUZZY_MAX_RESULT bytes. 156 * @return Returns zero on success, non-zero on error. 157 */ 158 extern int fuzzy_hash_buf(const unsigned char *buf, 159 uint32_t buf_len, 160 /*@out@*/ char *result); 161 162 /** 163 * @brief Compute the fuzzy hash of a file using an open handle 164 * 165 * Computes the fuzzy hash of the contents of the open file, starting 166 * at the beginning of the file. When finished, the file pointer is 167 * returned to its original position. If an error occurs, the file 168 * pointer's value is undefined. 169 * It is the callers's responsibility to append the filename 170 * to the result after computation. 171 * @param handle Open handle to the file to be hashed 172 * @param result Where the fuzzy hash of the file is stored. This 173 * variable must be allocated to hold at least FUZZY_MAX_RESULT bytes. 174 * @return Returns zero on success, non-zero on error 175 */ 176 extern int fuzzy_hash_file(FILE *handle, /*@out@*/ char *result); 177 178 /** 179 * @brief Compute the fuzzy hash of a stream using an open handle 180 * 181 * Computes the fuzzy hash of the contents of the open stream, starting at the 182 * current file position until reaching EOF. Unlike fuzzy_hash_file the stream 183 * is never seeked. If an error occurs, the result as well as the file position 184 * are undefined. 185 * It is the callers's responsibility to append the filename 186 * to the result after computation. 187 * @param handle Open handle to the stream to be hashed 188 * @param result Where the fuzzy hash of the file is stored. This 189 * variable must be allocated to hold at least FUZZY_MAX_RESULT bytes. 190 * @return Returns zero on success, non-zero on error 191 */ 192 extern int fuzzy_hash_stream(FILE *handle, /*@out@*/ char *result); 193 194 /** 195 * @brief Compute the fuzzy hash of a file 196 * 197 * Opens, reads, and hashes the contents of the file 'filename' 198 * The result must be allocated to hold FUZZY_MAX_RESULT characters. 199 * It is the caller's responsibility to append the filename 200 * to the result after computation. 201 * @param filename The file to be hashed 202 * @param result Where the fuzzy hash of the file is stored. This 203 * variable must be allocated to hold at least FUZZY_MAX_RESULT bytes. 204 * @return Returns zero on success, non-zero on error. 205 */ 206 extern int fuzzy_hash_filename(const char *filename, /*@out@*/ char * result); 207 208 /// Computes the match score between two fuzzy hash signatures. 209 /// @return Returns a value from zero to 100 indicating the 210 /// match score of the 211 /// two signatures. A match score of zero indicates the signatures 212 /// did not match. When an error occurs, such as if one of the 213 /// inputs is NULL, returns -1. 214 extern int fuzzy_compare(const char *sig1, const char *sig2); 215 216 /** Length of an individual fuzzy hash signature component. */ 217 #define SPAMSUM_LENGTH 64 218 219 /** The longest possible length for a fuzzy hash signature 220 * (without the filename) */ 221 #define FUZZY_MAX_RESULT (2 * SPAMSUM_LENGTH + 20) 222 223 #ifdef __cplusplus 224 } 225 #endif 226 227 #endif 228