1 /*
2  * The contents of this file are subject to the Mozilla Public License
3  * Version 1.1 (the "License"); you may not use this file except in
4  * compliance with the License. You may obtain a copy of the License at
5  * http://www.mozilla.org/MPL/
6  *
7  * Software distributed under the License is distributed on an "AS IS"
8  * basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See the
9  * License for the specific language governing rights and limitations
10  * under the License.
11  *
12  * The Original Code was developed for an EU.EDGE internal project and
13  * is made available according to the terms of this license.
14  *
15  * The Initial Developer of the Original Code is Istvan T. Hernadvolgyi,
16  * EU.EDGE LLC.
17  *
18  * Portions created by EU.EDGE LLC are Copyright (C) EU.EDGE LLC.
19  * All Rights Reserved.
20  *
21  * Alternatively, the contents of this file may be used under the terms
22  * of the GNU General Public License (the "GPL"), in which case the
23  * provisions of GPL are applicable instead of those above.  If you wish
24  * to allow use of your version of this file only under the terms of the
25  * GPL and not to allow others to use your version of this file under the
26  * License, indicate your decision by deleting the provisions above and
27  * replace them with the notice and other provisions required by the GPL.
28  * If you do not delete the provisions above, a recipient may use your
29  * version of this file under either the License or the GPL.
30  */
31 
32 // FILE COMPARISONS BY MD5 HASH VALUE - IMPLEMENTATION
33 //
34 
35 #include <filei.h>
36 
37 extern "C" {
38 #include <stdlib.h>
39 #include <unistd.h>
40 #include <strings.h>
41 #include <sys/types.h>
42 #include <sys/stat.h>
43 #include <openssl/md5.h>
44 }
45 
46 #include <fstream>
47 
48 void* (*filei::_gbuff)(size_t) = &filei::gbuff;
49 size_t (*filei::_buffc)() = &filei::buffc;
50 void (*filei::_relbuff)(void*) = 0;
51 char filei::_buffer[__UABUFFSIZE];
52 
filei(const std::string & path,bool ic,bool iw,size_t m,size_t bs)53 filei::filei(const std::string& path, bool ic, bool iw, size_t m, size_t bs)
54 throw(const char*):_path(path),_h(0)  {
55    ::bzero(_md5,16); // zero out
56    calc(ic,iw,bs,m);
57 }
58 
59 // in-place turn buffer into lower case
__lower_case(char * buffer,size_t n)60 static void __lower_case(char* buffer, size_t n) {
61    static int diff = 'a' - 'A';
62    for(char *p = buffer; p < buffer + n; ++p)
63       if (*p <= 'Z' && *p >= 'A') *p += diff;
64 }
65 
66 // white spaces
__whitec(char c)67 static bool __whitec(char c) {
68    switch(c) {
69       case ' ':
70       case '\t':
71       case '\r':
72       case '\n':
73          return true;
74    }
75    return false;
76 }
77 
78 // count contiguous white spaces from pointer
__countw(const char * p,const char * e)79 static size_t __countw(const char* p, const char* e) {
80    size_t w = 0;
81    for(;p<e; ++p, ++w) if (!__whitec(*p)) break;
82    return w;
83 }
84 
85 // in-place remove contiguous white space, return number of chars removed
__remove_white(char * buffer,int n)86 static int __remove_white(char* buffer, int n) {
87    int r = 0;
88 
89    for(char *p = buffer; p < buffer + n; ++p) {
90       int k = __countw(p,buffer+n);
91       if (k) {
92          // shift by k
93          for(char* pk = p; k && pk < buffer + n - k; ++pk) *pk = *(pk+k);
94          n -= k;
95          r += k;
96       }
97    }
98 
99    return r;
100 }
101 
calc(bool ic,bool iw,size_t bn,size_t m)102 void filei::calc(bool ic, bool iw, size_t bn, size_t m) throw(const char*) {
103 
104    const char* error = 0;
105 
106    char* buffer = 0;
107    size_t tot = 0;
108 
109    std::ifstream is(_path.c_str());
110 
111    if (!is.good()) { error = "Could not open file"; goto FINALLY; }
112 
113    try {
114       buffer= static_cast<char*>((*_gbuff)(bn));   // get buffer
115       if (!buffer) throw 1;
116    } catch(...) {
117       error = "Could not allocate memory";
118       goto FINALLY;
119    }
120 
121    bn = _buffc ? std::min(bn,(*_buffc)()) : bn;  // get buffer size
122 
123    MD5_CTX ctxt;
124    if (!MD5_Init(&ctxt)) { error = "Could not init MD5"; goto FINALLY; }
125 
126 
127    for(bool done=false;!done;) {
128       is.read(buffer,bn);
129       size_t n = is.gcount();
130       if (!n) break;
131 
132       if (ic) __lower_case(buffer,n);
133       if (iw) {
134          n -=  __remove_white(buffer,n);
135          if (!n) continue;
136       }
137 
138       if (m) {
139          if (tot + n > m) {
140             n = m - tot;
141             done = true;
142          } else tot += n;
143       }
144 
145       if (!MD5_Update(&ctxt,buffer,n)) {
146           error = "MD5 calc error";
147           goto FINALLY;
148       }
149       if (is.eof()) break;
150    }
151 
152    if (!MD5_Final(_md5,&ctxt)) {
153       error= "MD5 calc error (final)";
154       goto FINALLY;
155    }
156 
157    for(int i = 0, s = 0; i < 16; ++i, ++s) {
158       if (s >= (int)sizeof(size_t)) s = 0;
159       _h ^= ((size_t)_md5[i]) << (s << 3);
160    }
161 
162 FINALLY:
163 
164    // clean-up
165    is.close();
166    if (_relbuff) (*_relbuff)(buffer);
167 
168    if (error) throw error;
169 }
170 
fsize(const std::string & path)171 off_t filei::fsize(const std::string& path) throw(const char*) {
172    struct stat fsi;
173 
174    if (::stat(path.c_str(),&fsi)) throw "Could not stat file.";
175    if (!S_ISREG(fsi.st_mode) && !S_ISLNK(fsi.st_mode)) throw "Not a file.";
176    return fsi.st_size;
177 }
178 
__bytesame(std::istream & is1,std::istream & is2,char * buff1,char * buff2,size_t c1,size_t c2,size_t m)179 static bool __bytesame(
180    std::istream& is1, std::istream& is2,
181    char* buff1, char* buff2,
182    size_t c1, size_t c2, size_t m) throw(const char*) {
183 
184    size_t tot1 = 0, tot2 = 0;
185 
186    for(;;) {
187       is1.read(buff1,c1);
188       is2.read(buff2,c2);
189 
190       size_t n1 = is1.gcount();
191       size_t n2 = is2.gcount();
192 
193       if (m) {
194          if (tot1 + n1 > m) n1 = m - tot1;
195          if (tot2 + n2 > m) n2 = m - tot2;
196       }
197 
198       if (n1 != n2) return false;
199 
200       for(const char* p1 = buff1, * p2 = buff2; p1 < buff1 + n1; ++p1, ++p2)
201          if (*p1 != *p2) return false;
202 
203       if (m) {
204          tot1 += n1;
205          tot2 += n2;
206       }
207 
208       if (is1.eof()) return is2.eof();
209    }
210 
211    return true;
212 }
213 
__reload(std::istream & is,char * buff,size_t c,char * & p)214 static size_t __reload(std::istream& is, char* buff, size_t c, char*& p) {
215    is.read(buff,c);
216    p = buff;
217    return is.gcount();
218 }
219 
__tolower(char & c)220 static void __tolower(char& c) {
221    static int diff = 'a' - 'A';
222    if (c >= 'A' && c <= 'Z')  c += diff;
223 }
224 
__skipws(char * & p,const char * e)225 static void __skipws(char*& p, const char* e) {
226    for(;p < e; ++p) if (!__whitec(*p)) return;
227 }
228 
__same(std::istream & is1,std::istream & is2,char * buff1,char * buff2,size_t c1,size_t c2,size_t m,bool ic,bool iw)229 static bool __same(
230    std::istream& is1, std::istream& is2,
231    char* buff1, char* buff2,
232    size_t c1, size_t c2, size_t m,
233    bool ic, bool iw) {
234 
235    is1.read(buff1,c1);
236    is2.read(buff2,c2);
237 
238    size_t n1 = is1.gcount();
239    size_t n2 = is2.gcount();
240 
241    char* p1 = buff1;
242    char* p2 = buff2;
243 
244    for(;;) {
245       if (p1 == buff1+n1 && !(n1 = __reload(is1,buff1,c1,p1))) break;
246       if (p2 == buff2+n2 && !(n2 = __reload(is2,buff2,c2,p2))) break;
247 
248       if (iw) {
249          __skipws(p1,buff1+n1);
250          __skipws(p2,buff2+n2);
251          if ((p1 == buff1+n1) || (p2 == buff2+n2)) continue;
252       }
253 
254       if (ic) { __tolower(*p1), __tolower(*p2); }
255       if (*p1 != *p2) return false;
256       ++p1, ++p2;
257    }
258 
259    if (iw) {
260       for(;p1 < buff1 + n1; ++p1) if (!__whitec(*p1)) return false;
261       for(;p2 < buff2 + n2; ++p2) if (!__whitec(*p2)) return false;
262    }
263 
264    return true;
265 }
266 
eq(const std::string & p1,const std::string & p2,bool ic,bool iw,size_t m,size_t bn)267 bool filei::eq(
268    const std::string& p1, const std::string& p2,
269    bool ic, bool iw, size_t m, size_t bn) throw(const char*) {
270 
271    const char* error = 0;
272    char* buffer = 0;
273    bool res = false;
274 
275    std::ifstream is1(p1.c_str());
276    std::ifstream is2(p2.c_str());
277 
278    if (!is1.good() || !is2.good()) {
279       error = "Could not open file";
280       goto FINALLY;
281    }
282 
283 
284    try {
285       bn <<=1;
286       buffer = static_cast<char*>((*_gbuff)(bn)); // get buffer
287       if (!buffer) throw 1;
288 
289    } catch(...) {
290       error = "Could not allocate memory";
291       goto FINALLY;
292    }
293 
294    bn = _buffc ? std::min(bn,(*_buffc)()) : bn; // get buffer size
295 
296    try {
297       size_t h = bn >> 1;
298       res = !iw && !ic ? __bytesame(is1,is2,buffer,buffer + h,h,bn-h,m) :
299          __same(is1,is2,buffer,buffer + h,h,bn-h,m,ic,iw);
300    } catch(const char* e) {
301       error = e;
302       goto FINALLY;
303    }
304 
305 
306 FINALLY:
307 
308    // clean-up
309    is1.close(), is2.close();
310    if (_relbuff) (*_relbuff)(buffer);
311 
312    if (error) throw error;
313 
314    return res;
315 }
316 
operator ()(const filei & fi1,const filei & fi2) const317 bool filei::md5cmp::operator()(const filei& fi1, const filei& fi2) const {
318    if (fi1.h() < fi2.h()) return true;
319    else if (fi1.h() > fi2.h()) return false;
320    for(const unsigned char* p1=fi1._md5, *p2=fi2._md5;
321       p1< fi1._md5 + 16; ++p1,++p2) {
322       if (*p1 < *p2) return true;
323       else if (*p1 > *p2) return false;
324    }
325    return false;
326 }
327 
328 
operator ()(const filei & fi1,const filei & fi2) const329 bool filei::md5eq::operator()(const filei& fi1, const filei& fi2) const {
330    if (fi1.h() != fi2.h()) return false;
331    for(const unsigned char* p1=fi1._md5, *p2=fi2._md5;
332       p1< fi1._md5 + 16; ++p1,++p2) {
333       if (*p1 != *p2) return false;
334    }
335    return true;
336 }
337 
338 
339