1 // SSDEEP
2 // $Id$
3 // Copyright (C) 2012 Kyrus. See COPYING for details.
4
5 #ifdef HAVE_CONFIG_H
6 # include "config.h"
7 #endif
8
9 #include "filedata.h"
10 #include <stdio.h>
11 #include <stdarg.h>
12 #include <string.h>
13
14
valid(void) const15 bool Filedata::valid(void) const
16 {
17 // A valid fuzzy hash has the form
18 // [blocksize]:[sig1]:[sig2]
19 // with no filename at the end
20
21 // First find the block size
22 const char * sig = m_signature.c_str();
23 unsigned int block_size;
24 if (-1 == sscanf(sig, "%u:", &block_size))
25 return false;
26
27 // Move past the blocksize
28 sig = strchr(sig,':');
29 if (!sig)
30 return false;
31
32 // Move past the first colon and Look for the second colon
33 ++sig;
34 sig = strchr(sig,':');
35 if (!sig)
36 return false;
37
38 // Finally, a valid signature does *not* have a filename at the end of it
39 sig = strchr(sig,',');
40 if (sig)
41 return false;
42
43 return true;
44 }
45
46
clear_cluster(void)47 void Filedata::clear_cluster(void)
48 {
49 if (NULL == m_cluster)
50 return;
51
52 // We don't want to call the destructors on the individual elements
53 // so we have to clear the set first.
54 m_cluster->clear();
55 m_cluster = NULL;
56 }
57
58
Filedata(const TCHAR * fn,const char * sig,const char * match_file)59 Filedata::Filedata(const TCHAR * fn, const char * sig, const char * match_file)
60 {
61 m_signature = std::string(sig);
62 if (not valid())
63 throw std::bad_alloc();
64
65 m_filename = _tcsdup(fn);
66 m_cluster = NULL;
67
68 if (NULL == match_file)
69 m_has_match_file = false;
70 else
71 {
72 m_has_match_file = true;
73 m_match_file = std::string(match_file);
74 }
75 }
76
77
Filedata(const std::string & sig,const char * match_file)78 Filedata::Filedata(const std::string& sig, const char * match_file)
79 {
80 // Set the easy stuff first
81 m_cluster = NULL;
82
83 if (NULL == match_file)
84 m_has_match_file = false;
85 else
86 {
87 m_has_match_file = true;
88 m_match_file = std::string(match_file);
89 }
90
91 // If we don't have a filename included with the sig, that's ok,
92 // but we should find out now.
93 // If there is a filename, it should be immediately after the
94 // first comma and enclosed in quotation marks.
95 size_t start, stop;
96 start = sig.find_first_of(",\"");
97 if (std::string::npos == start)
98 {
99 // There is no filename. Ok. We still have a valid Filedata.
100 m_filename = _tcsdup(_TEXT("[NO FILENAME]"));
101 m_signature = std::string(sig);
102
103 // We still have to check the validity of the signature
104 if (not valid())
105 throw std::bad_alloc();
106
107 return;
108 }
109
110 // There is a filename. Ok.
111 // Advance past the comma and quotation mark.
112 start += 2;
113
114 // Look for the second quotation mark, which should be at the end
115 // of the string.
116 stop = sig.find_last_of('"');
117 if (stop != sig.size() - 1)
118 throw std::bad_alloc();
119
120 // Strip off the final quotation mark and record the filename
121 std::string tmp = sig.substr(start,(stop - start));
122
123 // Strip off the filename from the signature. Remember that "start"
124 // now points to two characters ahead of the comma
125 m_signature = sig.substr(0,start-2);
126
127 // Unescape any quotation marks in the filename
128 while (tmp.find(std::string("\\\"")) != std::string::npos)
129 tmp.replace(tmp.find(std::string("\\\"")),2,std::string("\""));
130
131 #ifndef _WIN32
132 m_filename = strdup(tmp.c_str());
133 #else
134 char * tmp2 = strdup(tmp.c_str());
135
136 // On Win32 we have to do a kludgy cast from ordinary char
137 // values to the TCHAR values we use internally. Because we may have
138 // reset the string length, get it again.
139 // The extra +1 is for the terminating newline
140 size_t i, sz = strlen(tmp2);
141 m_filename = (TCHAR *)malloc(sizeof(TCHAR) * (sz + 1));
142 if (NULL == m_filename) {
143 free (tmp2);
144 throw std::bad_alloc();
145 }
146
147 for (i = 0 ; i < sz ; i++)
148 m_filename[i] = (TCHAR)(tmp2[i]);
149 m_filename[i] = 0;
150
151 free (tmp2);
152 #endif
153 }
154
155
operator <<(std::ostream & o,const Filedata & f)156 std::ostream& operator<<(std::ostream& o, const Filedata& f)
157 {
158 return o << f.get_signature() << "," << f.get_filename() << ",";
159 }
160
161
operator ==(const Filedata & a,const Filedata & b)162 bool operator==(const Filedata& a, const Filedata& b)
163 {
164 if (a.get_signature() != b.get_signature())
165 return false;
166 if (a.has_match_file() and not b.has_match_file())
167 return false;
168 if (not a.has_match_file() and b.has_match_file())
169 return false;
170 if (a.has_match_file() and b.has_match_file())
171 {
172 if (a.get_match_file() != b.get_match_file())
173 return false;
174 }
175
176 return true;
177 }
178
179