1 /***************************************************************************
2 cfilehasher.cpp - Calculate the TTH root and leaves for a file
3 -------------------
4 begin : Fri May 16 2008
5 copyright : (C) 2008 by Edward Sheldrake
6 email : ejs1920@yahoo.co.uk
7 ***************************************************************************/
8
9 /***************************************************************************
10 * *
11 * This program is free software; you can redistribute it and/or modify *
12 * it under the terms of the GNU General Public License as published by *
13 * the Free Software Foundation; either version 2 of the License, or *
14 * (at your option) any later version. *
15 * *
16 ***************************************************************************/
17
18 #include "cfilehasher.h"
19
20 #include "core/cbytearray.h"
21 #include "core/cbase32.h"
22 #include "core/cdir.h"
23
24 #include "hash/compat.h"
25 #include "hash/MerkleTree.h"
26
27 #include <stdio.h>
28 #include <string.h> //for memcmp()
29
30 /** */
CTreeVerificationReport(const CTreeVerificationReport & other)31 CTreeVerificationReport::CTreeVerificationReport( const CTreeVerificationReport & other )
32 {
33 filename = other.filename;
34 tthRoot = other.tthRoot;
35 filesize = other.filesize;
36 allgood = other.allgood;
37
38 if ( other.segments )
39 {
40 segments = new CList<CHashedSegment>();
41
42 CHashedSegment * hs = 0;
43
44 /* FIXME stop casting away the const once CList is fixed / replaced */
45 while ( (hs = ((CTreeVerificationReport&)other).segments->Next(hs)) != 0 )
46 {
47 segments->Add( new CHashedSegment(*hs) );
48 }
49 }
50 else
51 {
52 segments = 0;
53 }
54 }
55
56 /** */
ToString() const57 CString CTreeVerificationReport::ToString() const
58 {
59 CString s = "CTreeVerificationReport\nFileName: ";
60 s += filename;
61 s += "\nFileSize: ";
62 s += CString::number(filesize);
63 s += "\nActual TTH root: ";
64 s += tthRoot;
65 s += "\nAll hashes OK: ";
66 if ( allgood )
67 {
68 s += "YES";
69 }
70 else
71 {
72 s += "NO";
73 }
74 s += "\nNumber of segments: ";
75 s += CString::number(segments->Count());
76 // 18446744073709551615 18446744073709551615 ORQ6K7F5U2QGJOUKEKPEDJ2XXM3FTVYQ2BLWQOI ORQ6K7F5U2QGJOUKEKPEDJ2XXM3FTVYQ2BLWQOI
77 s += "\n\nStart Size Expected TTH of block Actual TTH of block\n";
78
79 CHashedSegment * segment = 0;
80
81 while ( (segment = segments->Next(segment)) != 0 )
82 {
83 s += CString::number(segment->start).RightJustify(20);
84 s += ' ';
85 s += CString::number(segment->size).RightJustify(20);
86 s += ' ';
87 s += segment->expected;
88 s += ' ';
89 s += segment->actual;
90
91 if ( segment->expected != segment->actual )
92 {
93 s += " *** MISMATCH ***";
94 }
95
96 s += "\n";
97 }
98
99 return s;
100 }
101
102 /** */
CFileHasher(const CString filename,CByteArray * workmem)103 CFileHasher::CFileHasher( const CString filename, CByteArray * workmem )
104 {
105 status = efhsNotStarted;
106 m_bStop = false;
107 m_pRootData = 0;
108 m_pLeafData = 0;
109 filesize = 0;
110 m_nProgress = 0;
111
112 if ( file.Open( filename, IO_RAW | IO_READONLY ) )
113 {
114 m_pWorkMem = workmem;
115 usingOwnMem = false;
116 filesize = CDir().getFileSize( filename, false );
117
118 if ( m_pWorkMem == 0 )
119 {
120 m_pWorkMem = new CByteArray( 1024*1024 );
121 usingOwnMem = true;
122 }
123
124 if ( m_pWorkMem == 0 )
125 {
126 printf( "CFileHasher memory allocation failure\n" );
127 status = efhsError;
128 }
129 else
130 {
131 status = efhsReady;
132 }
133 }
134 else
135 {
136 m_pWorkMem = 0;
137 printf( "CFileHasher cannot open '%s'\n", filename.Data() );
138 status = efhsError;
139 }
140 }
141
142 /** delete stuff, close file */
~CFileHasher()143 CFileHasher::~CFileHasher()
144 {
145 delete m_pRootData;
146 m_pRootData = 0;
147
148 delete m_pLeafData;
149 m_pLeafData = 0;
150
151 if ( usingOwnMem && (m_pWorkMem != 0) )
152 {
153 delete m_pWorkMem;
154 m_pWorkMem = 0;
155 }
156
157 if ( file.IsOpen() )
158 {
159 file.Close();
160 }
161 }
162
163 /** */
HashSize()164 unsigned long CFileHasher::HashSize()
165 {
166 return dcpp::TigerTree::BYTES;
167 }
168
169 /** */
GetHashRoot()170 CString CFileHasher::GetHashRoot()
171 {
172 CString s;
173
174 if ( status == efhsFinished )
175 {
176 if ( m_pRootData != 0 )
177 {
178 if ( m_pRootData->Size() == dcpp::TigerTree::BYTES )
179 {
180 CBase32::Encode( &s, m_pRootData );
181 }
182 else
183 {
184 printf("CFileHasher::GetHashRoot wrong size %lu\n", m_pRootData->Size());
185 }
186 }
187 else
188 {
189 printf("CFileHasher::GetHashRoot m_pRootData==0\n");
190 }
191 }
192 else
193 {
194 printf("CFileHasher::GetHashRoot not finished\n");
195 }
196
197 return s;
198 }
199
200 /** */
GetHashRootRaw()201 CByteArray * CFileHasher::GetHashRootRaw()
202 {
203 CByteArray * result = 0;
204
205 if ( status == efhsFinished )
206 {
207 if ( m_pRootData != 0 )
208 {
209 if ( m_pRootData->Size() == dcpp::TigerTree::BYTES )
210 {
211 result = new CByteArray();
212 result->Append( m_pRootData->Data(), m_pRootData->Size() );
213 }
214 else
215 {
216 printf("CFileHasher::GetHashRootRaw wrong size %lu\n", m_pRootData->Size());
217 }
218 }
219 else
220 {
221 printf("CFileHasher::GetHashRootRaw m_pRootData==0\n");
222 }
223 }
224 else
225 {
226 printf("CFileHasher::GetHashRootRaw not finished\n");
227 }
228
229 return result;
230 }
231
232 /** */
GetLeafData()233 CByteArray * CFileHasher::GetLeafData()
234 {
235 CByteArray * result = 0;
236
237 if ( status == efhsFinished )
238 {
239 if ( m_pLeafData != 0 )
240 {
241 result = new CByteArray();
242 result->Append( m_pLeafData->Data(), m_pLeafData->Size() );
243 }
244 else
245 {
246 printf("CFileHasher::GetLeafData m_pLeafData==0\n");
247 }
248 }
249 else
250 {
251 printf("CFileHasher::GetLeafData not finished\n");
252 }
253
254 return result;
255 }
256
257 /** */
StopHashing()258 void CFileHasher::StopHashing()
259 {
260 m_bStop = true;
261 }
262
263 /** */
ComputeHash(const eFileHasherStatus endStatus)264 void CFileHasher::ComputeHash( const eFileHasherStatus endStatus )
265 {
266 if ( status == efhsReady )
267 {
268 status = efhsWorking;
269
270 dcpp::TigerTree hasher( std::max(dcpp::TigerTree::calcBlockSize(filesize,10), ((int64_t) 64*1024) ) );
271
272 long len;
273
274 while ( ((len = file.Read( (char*)m_pWorkMem->Data(), m_pWorkMem->Size() )) > 0) && (m_bStop == false) )
275 {
276 hasher.update(m_pWorkMem->Data(),len);
277
278 m_nProgress += len;
279 }
280
281 file.Close();
282
283 if ( m_bStop )
284 {
285 status = efhsError;
286 }
287 else
288 {
289 hasher.finalize();
290
291 m_pRootData = new CByteArray();
292 m_pRootData->Append( hasher.getRoot().data, dcpp::TigerTree::BYTES );
293
294 vector<uint8_t> leafdata = hasher.getLeafData();
295 m_pLeafData = new CByteArray();
296 m_pLeafData->Append( &leafdata[0], leafdata.size() );
297
298 status = endStatus;
299 }
300
301 if ( usingOwnMem )
302 {
303 delete m_pWorkMem;
304 m_pWorkMem = 0;
305 }
306 }
307 else
308 {
309 printf("CFileHasher::ComputeHash not ready\n");
310 }
311 }
312
313 /** from DownloadManager::endData in the DC++ source */
GetBlockSize(const unsigned long leavesSize,const int64_t filesize)314 int64_t CFileHasher::GetBlockSize( const unsigned long leavesSize, const int64_t filesize )
315 {
316 int64_t blocksize = 1024;
317 while ( blocksize * leavesSize/sizeof(dcpp::TigerTree::MerkleValue) < filesize )
318 {
319 blocksize = blocksize * 2;
320 }
321 return blocksize;
322 }
323
324 /** */
ValidateHashLeaves(CString tth,CByteArray * leaves,const ulonglong filesize)325 bool CFileHasher::ValidateHashLeaves( CString tth, CByteArray * leaves, const ulonglong filesize )
326 {
327 CByteArray dst;
328
329 if ( CBase32::Decode( &dst, &tth ) != dcpp::TigerTree::BYTES )
330 {
331 printf("CFileHasher::ValidateHashLeaves: base32 decode return wrong size %lu\n",dst.Size());
332 return false;
333 }
334 else
335 {
336 return ValidateHashLeaves( &dst, leaves, filesize );
337 }
338 }
339
340 /** */
ValidateHashLeaves(CByteArray * root,CByteArray * leaves,const ulonglong filesize)341 bool CFileHasher::ValidateHashLeaves( CByteArray * root, CByteArray * leaves, const ulonglong filesize )
342 {
343 if ( (root == 0) || (leaves == 0) )
344 {
345 printf("CFileHasher::ValidateHashLeaves null pointer\n");
346 return false;
347 }
348
349 if ( root->Size() != dcpp::TigerTree::BYTES )
350 {
351 printf("CFileHasher::ValidateHashLeaves root wrong size\n");
352 return false;
353 }
354 else if ( leaves->Size() < dcpp::TigerTree::BYTES )
355 {
356 printf("CFileHasher::ValidateHashLeaves leaves too small\n");
357 return false;
358 }
359 else if ( leaves->Size() == dcpp::TigerTree::BYTES ) // optimisation
360 {
361 if ( memcmp(root->Data(),leaves->Data(),dcpp::TigerTree::BYTES) == 0 )
362 {
363 return true;
364 }
365 }
366
367 int64_t size = (int64_t) filesize;
368
369 if ( size < 0 )
370 {
371 printf("CFileHasher::ValidateHashLeaves filesize too big\n");
372 return false;
373 }
374
375 int64_t blocksize = GetBlockSize( leaves->Size(), size );
376
377 dcpp::TigerTree hasher(size,blocksize,leaves->Data());
378
379 if ( memcmp(hasher.getRoot().data,root->Data(),dcpp::TigerTree::BYTES) == 0 )
380 {
381 return true;
382 }
383 else
384 {
385 return false;
386 }
387 }
388
389 /** */
HashByteArray(CByteArray * data,unsigned long length)390 CByteArray * CFileHasher::HashByteArray( CByteArray * data, unsigned long length )
391 {
392 CByteArray * result = new CByteArray();
393
394 dcpp::TigerTree hasher;
395 hasher.update( data->Data(), length );
396 hasher.finalize();
397 result->Append( hasher.getRoot().data, dcpp::TigerTree::BYTES );
398
399 return result;
400 }
401
402 /** */
Thread()403 void CFileHasherThread::Thread()
404 {
405 Lock();
406
407 ComputeHash();
408
409 UnLock();
410
411 /* do not try to pthread_join ourself */
412 Stop(false);
413 }
414
415 /** */
CFileTreeVerifier(const CString filename,CByteArray * leaves,CByteArray * workmem)416 CFileTreeVerifier::CFileTreeVerifier( const CString filename, CByteArray * leaves, CByteArray * workmem ) : CFileHasher ( filename, workmem )
417 {
418 m_pLeaves = leaves;
419 m_pReport = new CTreeVerificationReport();
420 m_pReport->filename = filename;
421 m_nPass = 0;
422 }
423
424 /** */
~CFileTreeVerifier()425 CFileTreeVerifier::~CFileTreeVerifier()
426 {
427 delete m_pReport;
428 m_pReport = 0;
429 }
430
431 /** */
GetReport()432 CTreeVerificationReport * CFileTreeVerifier::GetReport()
433 {
434 CTreeVerificationReport * pointer = 0;
435
436 if ( GetStatus() == efhsFinished )
437 {
438 pointer = m_pReport;
439 m_pReport = 0;
440 }
441
442 return pointer;
443 }
444
445 /** */
Thread()446 void CFileTreeVerifier::Thread()
447 {
448 Lock();
449
450 m_nPass = 1;
451 ComputeHash( efhsWorking );
452
453 if ( status == efhsWorking )
454 {
455 m_nPass = 2;
456 m_nProgress = 0;
457 bool ok = true;
458
459 /* first get segment size, we need it for either method */
460 int leafcount = m_pLeaves->Size() / dcpp::TigerTree::BYTES;
461 unsigned long blocksize = 1024;
462 while ( blocksize * leafcount < filesize )
463 {
464 blocksize = blocksize * 2;
465 }
466
467 /* Can we do this? Or could two lists of tth leaves the same length be from differently structured trees? */
468 /* This should not take to long, no need to check m_bStop */
469 if ( m_pLeaves->Size() == HashLeavesDirect()->Size() )
470 {
471 printf("CFileTreeVerifier: leaf data are same length, comparing\n");
472 CByteArray expected_in;
473 CByteArray actual_in;
474
475 for ( unsigned long i = 0; i < m_pLeaves->Size(); i = i + dcpp::TigerTree::BYTES )
476 {
477 expected_in.SetSize(0);
478 actual_in.SetSize(0);
479
480 expected_in.Append( m_pLeaves->Data()+i, dcpp::TigerTree::BYTES );
481 actual_in.Append( HashLeavesDirect()->Data()+i, dcpp::TigerTree::BYTES );
482
483 CHashedSegment * segment = new CHashedSegment();
484
485 CBase32::Encode( &(segment->expected), &expected_in );
486 CBase32::Encode( &(segment->actual), &actual_in );
487
488 if ( memcmp(expected_in.Data(),actual_in.Data(),dcpp::TigerTree::BYTES) != 0 )
489 {
490 ok = false;
491 }
492
493 segment->start = i * blocksize;
494 segment->size = blocksize;
495 if ( segment->start + segment->size > filesize )
496 {
497 segment->size = filesize - segment->start;
498 }
499
500 m_pReport->segments->Add(segment);
501 }
502
503 m_pReport->tthRoot = GetHashRoot();
504 m_pReport->filesize = filesize;
505 m_pReport->allgood = ok;
506 status = efhsFinished;
507 }
508 else
509 {
510 printf("CFileTreeVerifier: need to rehash each segment of file\n");
511 /* Otherwise, we need to hash each segement of the file */
512 if ( file.Open( m_pReport->filename, IO_RAW | IO_READONLY ) )
513 {
514 CByteArray expected_in;
515
516 for ( int i = 0; i < leafcount; i++ )
517 {
518 expected_in.SetSize(0);
519 expected_in.Append( m_pLeaves->Data()+(i*dcpp::TigerTree::BYTES), dcpp::TigerTree::BYTES );
520
521 CByteArray * buffer = new CByteArray( blocksize );
522
523 long read = file.Read( (char*) buffer->Data(), blocksize );
524 if ( read > 0 )
525 {
526 CByteArray * actual_in = HashByteArray( buffer, (unsigned long)read );
527
528 CHashedSegment * segment = new CHashedSegment();
529
530 CBase32::Encode( &(segment->expected), &expected_in );
531 CBase32::Encode( &(segment->actual), actual_in );
532
533 delete actual_in;
534
535 if ( segment->expected != segment->actual )
536 {
537 ok = false;
538 }
539
540 segment->start = m_nProgress;
541 segment->size = read;
542
543 m_pReport->segments->Add(segment);
544
545 m_nProgress += read;
546 }
547 else
548 {
549 status = efhsError;
550 }
551
552 delete buffer;
553
554 if ( m_bStop || (status == efhsError) )
555 {
556 status = efhsError;
557 break;
558 }
559
560 }
561
562 file.Close();
563 if ( status == efhsWorking )
564 {
565 status = efhsFinished;
566 m_pReport->tthRoot = GetHashRoot();
567 m_pReport->filesize = filesize;
568 m_pReport->allgood = ok;
569 }
570 }
571 else
572 {
573 printf("CFileTreeVerifier: error opening file '%s'\n", m_pReport->filename.Data());
574 status = efhsError;
575 }
576 }
577 }
578
579 UnLock();
580
581 /* do not try to pthread_join ourself */
582 Stop(false);
583 }
584