1 /*
2 * The contents of this file are subject to the Mozilla Public License
3 * Version 1.1 (the "License"); you may not use this file except in
4 * compliance with the License. You may obtain a copy of the License at
5 * http://www.mozilla.org/MPL/
6 *
7 * Software distributed under the License is distributed on an "AS IS"
8 * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
9 * License for the specific language governing rights and limitations
10 * under the License.
11 *
12 * The Original Code was developed for an EU.EDGE internal project and
13 * is made available according to the terms of this license.
14 *
15 * The Initial Developer of the Original Code is Istvan T. Hernadvolgyi,
16 * EU.EDGE LLC.
17 *
18 * Portions created by EU.EDGE LLC are Copyright (C) EU.EDGE LLC.
19 * All Rights Reserved.
20 *
21 * Alternatively, the contents of this file may be used under the terms
22 * of the GNU General Public License (the "GPL"), in which case the
23 * provisions of GPL are applicable instead of those above. If you wish
24 * to allow use of your version of this file only under the terms of the
25 * GPL and not to allow others to use your version of this file under the
26 * License, indicate your decision by deleting the provisions above and
27 * replace them with the notice and other provisions required by the GPL.
28 * If you do not delete the provisions above, a recipient may use your
29 * version of this file under either the License or the GPL.
30 */
31
32 // FILE COMPARISONS BY MD5 HASH VALUE - IMPLEMENTATION
33 //
34
35 #include <filei.h>
36
37 extern "C" {
38 #include <stdlib.h>
39 #include <unistd.h>
40 #include <strings.h>
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 #include <openssl/md5.h>
44 }
45
46 #include <fstream>
47
48 void* (*filei::_gbuff)(size_t) = &filei::gbuff;
49 size_t (*filei::_buffc)() = &filei::buffc;
50 void (*filei::_relbuff)(void*) = 0;
51 char filei::_buffer[__UABUFFSIZE];
52
filei(const std::string & path,bool ic,bool iw,size_t m,size_t bs)53 filei::filei(const std::string& path, bool ic, bool iw, size_t m, size_t bs)
54 throw(const char*):_path(path),_h(0) {
55 ::bzero(_md5,16); // zero out
56 calc(ic,iw,bs,m);
57 }
58
59 // in-place turn buffer into lower case
__lower_case(char * buffer,size_t n)60 static void __lower_case(char* buffer, size_t n) {
61 static int diff = 'a' - 'A';
62 for(char *p = buffer; p < buffer + n; ++p)
63 if (*p <= 'Z' && *p >= 'A') *p += diff;
64 }
65
66 // white spaces
__whitec(char c)67 static bool __whitec(char c) {
68 switch(c) {
69 case ' ':
70 case '\t':
71 case '\r':
72 case '\n':
73 return true;
74 }
75 return false;
76 }
77
78 // count contiguous white spaces from pointer
__countw(const char * p,const char * e)79 static size_t __countw(const char* p, const char* e) {
80 size_t w = 0;
81 for(;p<e; ++p, ++w) if (!__whitec(*p)) break;
82 return w;
83 }
84
85 // in-place remove contiguous white space, return number of chars removed
__remove_white(char * buffer,int n)86 static int __remove_white(char* buffer, int n) {
87 int r = 0;
88
89 for(char *p = buffer; p < buffer + n; ++p) {
90 int k = __countw(p,buffer+n);
91 if (k) {
92 // shift by k
93 for(char* pk = p; k && pk < buffer + n - k; ++pk) *pk = *(pk+k);
94 n -= k;
95 r += k;
96 }
97 }
98
99 return r;
100 }
101
calc(bool ic,bool iw,size_t bn,size_t m)102 void filei::calc(bool ic, bool iw, size_t bn, size_t m) throw(const char*) {
103
104 const char* error = 0;
105
106 char* buffer = 0;
107 size_t tot = 0;
108
109 std::ifstream is(_path.c_str());
110
111 if (!is.good()) { error = "Could not open file"; goto FINALLY; }
112
113 try {
114 buffer= static_cast<char*>((*_gbuff)(bn)); // get buffer
115 if (!buffer) throw 1;
116 } catch(...) {
117 error = "Could not allocate memory";
118 goto FINALLY;
119 }
120
121 bn = _buffc ? std::min(bn,(*_buffc)()) : bn; // get buffer size
122
123 MD5_CTX ctxt;
124 if (!MD5_Init(&ctxt)) { error = "Could not init MD5"; goto FINALLY; }
125
126
127 for(bool done=false;!done;) {
128 is.read(buffer,bn);
129 size_t n = is.gcount();
130 if (!n) break;
131
132 if (ic) __lower_case(buffer,n);
133 if (iw) {
134 n -= __remove_white(buffer,n);
135 if (!n) continue;
136 }
137
138 if (m) {
139 if (tot + n > m) {
140 n = m - tot;
141 done = true;
142 } else tot += n;
143 }
144
145 if (!MD5_Update(&ctxt,buffer,n)) {
146 error = "MD5 calc error";
147 goto FINALLY;
148 }
149 if (is.eof()) break;
150 }
151
152 if (!MD5_Final(_md5,&ctxt)) {
153 error= "MD5 calc error (final)";
154 goto FINALLY;
155 }
156
157 for(int i = 0, s = 0; i < 16; ++i, ++s) {
158 if (s >= (int)sizeof(size_t)) s = 0;
159 _h ^= ((size_t)_md5[i]) << (s << 3);
160 }
161
162 FINALLY:
163
164 // clean-up
165 is.close();
166 if (_relbuff) (*_relbuff)(buffer);
167
168 if (error) throw error;
169 }
170
fsize(const std::string & path)171 off_t filei::fsize(const std::string& path) throw(const char*) {
172 struct stat fsi;
173
174 if (::stat(path.c_str(),&fsi)) throw "Could not stat file.";
175 if (!S_ISREG(fsi.st_mode) && !S_ISLNK(fsi.st_mode)) throw "Not a file.";
176 return fsi.st_size;
177 }
178
__bytesame(std::istream & is1,std::istream & is2,char * buff1,char * buff2,size_t c1,size_t c2,size_t m)179 static bool __bytesame(
180 std::istream& is1, std::istream& is2,
181 char* buff1, char* buff2,
182 size_t c1, size_t c2, size_t m) throw(const char*) {
183
184 size_t tot1 = 0, tot2 = 0;
185
186 for(;;) {
187 is1.read(buff1,c1);
188 is2.read(buff2,c2);
189
190 size_t n1 = is1.gcount();
191 size_t n2 = is2.gcount();
192
193 if (m) {
194 if (tot1 + n1 > m) n1 = m - tot1;
195 if (tot2 + n2 > m) n2 = m - tot2;
196 }
197
198 if (n1 != n2) return false;
199
200 for(const char* p1 = buff1, * p2 = buff2; p1 < buff1 + n1; ++p1, ++p2)
201 if (*p1 != *p2) return false;
202
203 if (m) {
204 tot1 += n1;
205 tot2 += n2;
206 }
207
208 if (is1.eof()) return is2.eof();
209 }
210
211 return true;
212 }
213
__reload(std::istream & is,char * buff,size_t c,char * & p)214 static size_t __reload(std::istream& is, char* buff, size_t c, char*& p) {
215 is.read(buff,c);
216 p = buff;
217 return is.gcount();
218 }
219
__tolower(char & c)220 static void __tolower(char& c) {
221 static int diff = 'a' - 'A';
222 if (c >= 'A' && c <= 'Z') c += diff;
223 }
224
__skipws(char * & p,const char * e)225 static void __skipws(char*& p, const char* e) {
226 for(;p < e; ++p) if (!__whitec(*p)) return;
227 }
228
__same(std::istream & is1,std::istream & is2,char * buff1,char * buff2,size_t c1,size_t c2,size_t m,bool ic,bool iw)229 static bool __same(
230 std::istream& is1, std::istream& is2,
231 char* buff1, char* buff2,
232 size_t c1, size_t c2, size_t m,
233 bool ic, bool iw) {
234
235 is1.read(buff1,c1);
236 is2.read(buff2,c2);
237
238 size_t n1 = is1.gcount();
239 size_t n2 = is2.gcount();
240
241 char* p1 = buff1;
242 char* p2 = buff2;
243
244 for(;;) {
245 if (p1 == buff1+n1 && !(n1 = __reload(is1,buff1,c1,p1))) break;
246 if (p2 == buff2+n2 && !(n2 = __reload(is2,buff2,c2,p2))) break;
247
248 if (iw) {
249 __skipws(p1,buff1+n1);
250 __skipws(p2,buff2+n2);
251 if ((p1 == buff1+n1) || (p2 == buff2+n2)) continue;
252 }
253
254 if (ic) { __tolower(*p1), __tolower(*p2); }
255 if (*p1 != *p2) return false;
256 ++p1, ++p2;
257 }
258
259 if (iw) {
260 for(;p1 < buff1 + n1; ++p1) if (!__whitec(*p1)) return false;
261 for(;p2 < buff2 + n2; ++p2) if (!__whitec(*p2)) return false;
262 }
263
264 return true;
265 }
266
eq(const std::string & p1,const std::string & p2,bool ic,bool iw,size_t m,size_t bn)267 bool filei::eq(
268 const std::string& p1, const std::string& p2,
269 bool ic, bool iw, size_t m, size_t bn) throw(const char*) {
270
271 const char* error = 0;
272 char* buffer = 0;
273 bool res = false;
274
275 std::ifstream is1(p1.c_str());
276 std::ifstream is2(p2.c_str());
277
278 if (!is1.good() || !is2.good()) {
279 error = "Could not open file";
280 goto FINALLY;
281 }
282
283
284 try {
285 bn <<=1;
286 buffer = static_cast<char*>((*_gbuff)(bn)); // get buffer
287 if (!buffer) throw 1;
288
289 } catch(...) {
290 error = "Could not allocate memory";
291 goto FINALLY;
292 }
293
294 bn = _buffc ? std::min(bn,(*_buffc)()) : bn; // get buffer size
295
296 try {
297 size_t h = bn >> 1;
298 res = !iw && !ic ? __bytesame(is1,is2,buffer,buffer + h,h,bn-h,m) :
299 __same(is1,is2,buffer,buffer + h,h,bn-h,m,ic,iw);
300 } catch(const char* e) {
301 error = e;
302 goto FINALLY;
303 }
304
305
306 FINALLY:
307
308 // clean-up
309 is1.close(), is2.close();
310 if (_relbuff) (*_relbuff)(buffer);
311
312 if (error) throw error;
313
314 return res;
315 }
316
operator ()(const filei & fi1,const filei & fi2) const317 bool filei::md5cmp::operator()(const filei& fi1, const filei& fi2) const {
318 if (fi1.h() < fi2.h()) return true;
319 else if (fi1.h() > fi2.h()) return false;
320 for(const unsigned char* p1=fi1._md5, *p2=fi2._md5;
321 p1< fi1._md5 + 16; ++p1,++p2) {
322 if (*p1 < *p2) return true;
323 else if (*p1 > *p2) return false;
324 }
325 return false;
326 }
327
328
operator ()(const filei & fi1,const filei & fi2) const329 bool filei::md5eq::operator()(const filei& fi1, const filei& fi2) const {
330 if (fi1.h() != fi2.h()) return false;
331 for(const unsigned char* p1=fi1._md5, *p2=fi2._md5;
332 p1< fi1._md5 + 16; ++p1,++p2) {
333 if (*p1 != *p2) return false;
334 }
335 return true;
336 }
337
338
339