1 // ssdeep
2 // Copyright (C) 2012 Kyrus
3 // Copyright (C) 2006 ManTech International Corporation
4 //
5 // $Id$
6 //
7 // This program is free software; you can redistribute it and/or modify
8 // it under the terms of the GNU General Public License as published by
9 // the Free Software Foundation; either version 2 of the License, or
10 // (at your option) any later version.
11 //
12 // This program is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 // GNU General Public License for more details.
16 //
17 // You should have received a copy of the GNU General Public License
18 // along with this program; if not, write to the Free Software
19 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20 
21 
22 #include "ssdeep.h"
23 
try_msg(void)24 void try_msg(void)
25 {
26   fprintf (stderr,"Try `%s -h` for more information.%s", __progname, NEWLINE);
27 }
28 
29 
expanded_path(TCHAR * p)30 bool expanded_path(TCHAR *p)
31 {
32   if (_tcsncmp(p,_TEXT("\\\\?\\"),4))
33     return false;
34   return true;
35 }
36 
37 
sanity_check(state * s,int condition,const char * msg)38 void sanity_check(state *s, int condition, const char *msg)
39 {
40   if (NULL == s)
41     exit(EXIT_FAILURE);
42 
43   if (condition)
44     {
45       if (!(s->mode & mode_silent))
46 	{
47 	  print_status("%s: %s", __progname, msg);
48 	  try_msg();
49 	}
50       exit (EXIT_FAILURE);
51     }
52 }
53 
54 
55 // The basename function kept misbehaving on OS X, so I rewrote it.
56 // This function isn't perfect, nor is it designed to be. Because
57 // we're guaranteed to be working with a filename here, there's no way
58 // that s will end with a DIR_SEPARATOR (e.g. /foo/bar/). This function
59 // will not work properly for a string that ends in a DIR_SEPARATOR */
my_basename(TCHAR * s)60 int my_basename(TCHAR *s)
61 {
62   size_t len;
63   TCHAR * tmp;
64 
65   if (NULL == s)
66     return TRUE;
67 
68   tmp = _tcsrchr(s,DIR_SEPARATOR);
69 
70   if (NULL == tmp)
71     return FALSE;
72 
73   len = _tcslen(tmp);
74 
75   // We advance tmp one character to move us past the DIR_SEPARATOR
76   _tmemmove(s,tmp+1,len);
77 
78   return FALSE;
79 }
80 
81 
my_dirname(TCHAR * c)82 int my_dirname(TCHAR *c)
83 {
84   TCHAR *tmp;
85 
86   if (NULL == c)
87     return TRUE;
88 
89   // If there are no DIR_SEPARATORs in the directory name, then the
90   // directory name should be the empty string
91   tmp = _tcsrchr(c,DIR_SEPARATOR);
92   if (NULL != tmp)
93     tmp[1] = 0;
94   else
95     c[0] = 0;
96 
97   return FALSE;
98 }
99 
100 
101 
102 
103 
prepare_filename(state * s,TCHAR * fn)104 void prepare_filename(state *s, TCHAR *fn)
105 {
106   if (s->mode & mode_barename)
107   {
108     if (my_basename(fn))
109     {
110       print_error_unicode(s,fn,"Unable to shorten filename");
111       return;
112     }
113   }
114 }
115 
116 
117 
118 
119 
120 
121 // Remove the newlines, if any. Works on both DOS and *nix newlines
chop_line_tchar(TCHAR * s)122 void chop_line_tchar(TCHAR *s)
123 {
124   size_t pos = _tcslen(s);
125 
126   while (pos > 0)
127   {
128     // We split up the two checks because we can never know which
129     // condition the computer will examine if first. If pos == 0, we
130     // don't want to be checking s[pos-1] under any circumstances!
131 
132     if (!(s[pos-1] == _TEXT('\r') || s[pos-1] == _TEXT('\n')))
133       return;
134 
135     s[pos-1] = 0;
136     --pos;
137   }
138 }
139 
140 
141 // Remove the newlines, if any. Works on both DOS and *nix newlines
chop_line(char * s)142 void chop_line(char *s)
143 {
144   size_t pos = strlen(s);
145 
146   while (pos > 0)
147   {
148     // We split up the two checks because we can never know which
149     // condition the computer will examine if first. If pos == 0, we
150     // don't want to be checking s[pos-1] under any circumstances!
151 
152     if (!(s[pos-1] == _TEXT('\r') || s[pos-1] == _TEXT('\n')))
153       return;
154 
155     s[pos-1] = 0;
156     --pos;
157   }
158 }
159 
160 
161 // Shift the contents of a string so that the values after 'new_start'
162 // will now begin at location 'start'
shift_string_tchar(TCHAR * fn,unsigned int start,unsigned int new_start)163 void shift_string_tchar(TCHAR *fn, unsigned int start, unsigned int new_start)
164 {
165   size_t sz = _tcslen(fn);
166 
167   if (start > sz || new_start < start)
168     return;
169 
170   while (new_start < sz)
171     {
172       fn[start] = fn[new_start];
173       new_start++;
174       start++;
175     }
176 
177   fn[start] = 0;
178 }
179 
180 
181 
182 // Find the index of the next comma in the string s starting at index start.
183 // If there is no next comma, returns -1.
find_next_comma_tchar(TCHAR * s,unsigned int start)184 int find_next_comma_tchar(TCHAR *s, unsigned int start)
185 {
186   size_t size = _tcslen(s);
187   unsigned int pos = start;
188   int in_quote = FALSE;
189 
190   while (pos < size)
191   {
192     switch (s[pos]) {
193     case _TEXT('"'):
194       in_quote = !in_quote;
195       break;
196     case _TEXT(','):
197       if (in_quote)
198         break;
199 
200     // Although it's potentially unwise to cast an unsigned int back
201     // to an int, problems will only occur when the value is beyond
202     // the range of int. Because we're working with the index of a
203     // string that is probably less than 32,000 characters, we should
204     // be okay.
205       return (int)pos;
206     }
207     ++pos;
208   }
209   return -1;
210 }
211 
mm_magic(void)212 void mm_magic(void){MM_INIT("%s\n","\x49\x20\x64\x6f\x20\x6e\x6f\x74\x20\x62\x65\x6c\x69\x65\x76\x65\x20\x77\x65\x20\x77\x69\x6c\x6c\x20\x67\x65\x74\x20\x45\x64\x64\x69\x65\x20\x56\x61\x6e\x20\x48\x61\x6c\x65\x6e\x20\x75\x6e\x74\x69\x6c\x20\x77\x65\x20\x68\x61\x76\x65\x20\x61\x20\x74\x72\x69\x75\x6d\x70\x68\x61\x6e\x74\x20\x76\x69\x64\x65\x6f\x2e");}
213 
214 
215 // Returns the string after the nth comma in the string s. If that
216 // string is quoted, the quotes are removed. If there is no valid
217 // string to be found, returns TRUE. Otherwise, returns FALSE
find_comma_separated_string_tchar(TCHAR * s,unsigned int n)218 int find_comma_separated_string_tchar(TCHAR *s, unsigned int n)
219 {
220   int start = 0, end;
221   unsigned int count = 0;
222   while (count < n)
223   {
224     if ((start = find_next_comma_tchar(s,start)) == -1)
225       return TRUE;
226     ++count;
227     // Advance the pointer past the current comma
228     ++start;
229   }
230 
231   // It's okay if there is no next comma, it just means that this is
232   // the last comma separated value in the string
233   if ((end = find_next_comma_tchar(s,start)) == -1)
234     end = _tcslen(s);
235 
236   // Strip off the quotation marks, if necessary. We don't have to worry
237   // about uneven quotation marks (i.e quotes at the start but not the end
238   // as they are handled by the the find_next_comma function.
239   if (s[start] == _TEXT('"'))
240     ++start;
241   if (s[end - 1] == _TEXT('"'))
242     end--;
243 
244   s[end] = 0;
245   shift_string_tchar(s,0,start);
246 
247   return FALSE;
248 }
249 
250 
251 
252 // Shift the contents of a string so that the values after 'new_start'
253 // will now begin at location 'start'
shift_string(char * fn,size_t start,size_t new_start)254 void shift_string(char *fn, size_t start, size_t new_start)
255 {
256   // TODO: Can shift_string be replaced with memmove?
257   if (start > strlen(fn) || new_start < start)
258     return;
259 
260   while (new_start < strlen(fn))
261     {
262       fn[start] = fn[new_start];
263       new_start++;
264       start++;
265     }
266 
267   fn[start] = 0;
268 }
269 
270 
271 // Find the index of the next comma in the string s starting at index start.
272 // If there is no next comma, returns -1
find_next_comma(char * s,unsigned int start)273 int find_next_comma(char *s, unsigned int start)
274 {
275   size_t size=strlen(s);
276   unsigned int pos = start;
277   int in_quote = FALSE;
278 
279   while (pos < size)
280     {
281       switch (s[pos]) {
282       case '"':
283 	in_quote = !in_quote;
284 	break;
285       case ',':
286 	if (in_quote)
287 	  break;
288 
289 	// Although it's potentially unwise to cast an unsigned int back
290 	// to an int, problems will only occur when the value is beyond
291 	// the range of int. Because we're working with the index of a
292 	// string that is probably less than 32,000 characters, we should
293 	// be okay.
294 	return (int)pos;
295       }
296       ++pos;
297     }
298   return -1;
299 }
300 
301 
302 /// Returns the string after the nth comma in the string s. If that
303 /// string is quoted, the quotes are removed. If there is no valid
304 /// string to be found, returns TRUE. Otherwise, returns FALSE
find_comma_separated_string(char * s,unsigned int n)305 int find_comma_separated_string(char *s, unsigned int n)
306 {
307   int start = 0, end;
308   unsigned int count = 0;
309   while (count < n)
310     {
311       if ((start = find_next_comma(s,start)) == -1)
312 	return TRUE;
313       ++count;
314       // Advance the pointer past the current comma
315       ++start;
316     }
317 
318   // It's okay if there is no next comma, it just means that this is
319   // the last comma separated value in the string
320   if ((end = find_next_comma(s,start)) == -1)
321     end = strlen(s);
322 
323   // Strip off the quotation marks, if necessary. We don't have to worry
324   // about uneven quotation marks (i.e quotes at the start but not the end
325   // as they are handled by the the find_next_comma function.
326   if (s[start] == '"')
327     ++start;
328   if (s[end - 1] == '"')
329     end--;
330 
331   s[end] = 0;
332   shift_string(s,0,start);
333 
334   return FALSE;
335 }
336 
337 
338 
remove_escaped_quotes(char * str)339 int remove_escaped_quotes(char * str)
340 {
341   if (NULL == str)
342     return TRUE;
343 
344   size_t pos = 0;
345   while (str[pos] != 0)
346   {
347     if ('\\' == str[pos] && '"' == str[pos+1])
348       shift_string(str,pos,pos+1);
349 
350     ++pos;
351   }
352 
353   return FALSE;
354 }
355 
356 
357 
358