1 // SSDEEP
2 // $Id$
3 // Copyright (C) 2012 Kyrus. See COPYING for details.
4 
5 #ifdef HAVE_CONFIG_H
6 # include "config.h"
7 #endif
8 
9 #include "filedata.h"
10 #include <stdio.h>
11 #include <stdarg.h>
12 #include <string.h>
13 
14 
valid(void) const15 bool Filedata::valid(void) const
16 {
17   // A valid fuzzy hash has the form
18   // [blocksize]:[sig1]:[sig2]
19   // with no filename at the end
20 
21   // First find the block size
22   const char * sig = m_signature.c_str();
23   unsigned int block_size;
24   if (-1 == sscanf(sig, "%u:", &block_size))
25     return false;
26 
27   // Move past the blocksize
28   sig = strchr(sig,':');
29   if (!sig)
30     return false;
31 
32   // Move past the first colon and Look for the second colon
33   ++sig;
34   sig = strchr(sig,':');
35   if (!sig)
36     return false;
37 
38   // Finally, a valid signature does *not* have a filename at the end of it
39   sig = strchr(sig,',');
40   if (sig)
41     return false;
42 
43   return true;
44 }
45 
46 
clear_cluster(void)47 void Filedata::clear_cluster(void)
48 {
49   if (NULL == m_cluster)
50     return;
51 
52   // We don't want to call the destructors on the individual elements
53   // so we have to clear the set first.
54   m_cluster->clear();
55   m_cluster = NULL;
56 }
57 
58 
Filedata(const TCHAR * fn,const char * sig,const char * match_file)59 Filedata::Filedata(const TCHAR * fn, const char * sig, const char * match_file)
60 {
61   m_signature = std::string(sig);
62   if (not valid())
63     throw std::bad_alloc();
64 
65   m_filename = _tcsdup(fn);
66   m_cluster  = NULL;
67 
68   if (NULL == match_file)
69     m_has_match_file = false;
70   else
71   {
72     m_has_match_file = true;
73     m_match_file = std::string(match_file);
74   }
75 }
76 
77 
Filedata(const std::string & sig,const char * match_file)78 Filedata::Filedata(const std::string& sig, const char * match_file)
79 {
80   // Set the easy stuff first
81   m_cluster = NULL;
82 
83   if (NULL == match_file)
84     m_has_match_file = false;
85   else
86   {
87     m_has_match_file = true;
88     m_match_file = std::string(match_file);
89   }
90 
91   // If we don't have a filename included with the sig, that's ok,
92   // but we should find out now.
93   // If there is a filename, it should be immediately after the
94   // first comma and enclosed in quotation marks.
95   size_t start, stop;
96   start = sig.find_first_of(",\"");
97   if (std::string::npos == start)
98   {
99     // There is no filename. Ok. We still have a valid Filedata.
100     m_filename  = _tcsdup(_TEXT("[NO FILENAME]"));
101     m_signature = std::string(sig);
102 
103     // We still have to check the validity of the signature
104     if (not valid())
105       throw std::bad_alloc();
106 
107     return;
108   }
109 
110   // There is a filename. Ok.
111   // Advance past the comma and quotation mark.
112   start += 2;
113 
114   // Look for the second quotation mark, which should be at the end
115   // of the string.
116   stop = sig.find_last_of('"');
117   if (stop != sig.size() - 1)
118     throw std::bad_alloc();
119 
120   // Strip off the final quotation mark and record the filename
121   std::string tmp = sig.substr(start,(stop - start));
122 
123   // Strip off the filename from the signature. Remember that "start"
124   // now points to two characters ahead of the comma
125   m_signature = sig.substr(0,start-2);
126 
127   // Unescape any quotation marks in the filename
128   while (tmp.find(std::string("\\\"")) != std::string::npos)
129     tmp.replace(tmp.find(std::string("\\\"")),2,std::string("\""));
130 
131 #ifndef _WIN32
132   m_filename = strdup(tmp.c_str());
133 #else
134   char * tmp2 = strdup(tmp.c_str());
135 
136   // On Win32 we have to do a kludgy cast from ordinary char
137   // values to the TCHAR values we use internally. Because we may have
138   // reset the string length, get it again.
139   // The extra +1 is for the terminating newline
140   size_t i, sz = strlen(tmp2);
141   m_filename = (TCHAR *)malloc(sizeof(TCHAR) * (sz + 1));
142   if (NULL == m_filename) {
143     free (tmp2);
144     throw std::bad_alloc();
145   }
146 
147   for (i = 0 ; i < sz ; i++)
148     m_filename[i] = (TCHAR)(tmp2[i]);
149   m_filename[i] = 0;
150 
151   free (tmp2);
152 #endif
153 }
154 
155 
operator <<(std::ostream & o,const Filedata & f)156 std::ostream& operator<<(std::ostream& o, const Filedata& f)
157 {
158   return o << f.get_signature() << "," << f.get_filename() << ",";
159 }
160 
161 
operator ==(const Filedata & a,const Filedata & b)162 bool operator==(const Filedata& a, const Filedata& b)
163 {
164   if (a.get_signature() != b.get_signature())
165     return false;
166   if (a.has_match_file() and not b.has_match_file())
167     return false;
168   if (not a.has_match_file() and b.has_match_file())
169     return false;
170   if (a.has_match_file() and b.has_match_file())
171   {
172     if (a.get_match_file() != b.get_match_file())
173       return false;
174   }
175 
176   return true;
177 }
178 
179