1 /***************************************************************************
2        cfilehasher.cpp  -  Calculate the TTH root and leaves for a file
3                              -------------------
4     begin                : Fri May 16 2008
5     copyright            : (C) 2008 by Edward Sheldrake
6     email                : ejs1920@yahoo.co.uk
7  ***************************************************************************/
8 
9 /***************************************************************************
10  *                                                                         *
11  *   This program is free software; you can redistribute it and/or modify  *
12  *   it under the terms of the GNU General Public License as published by  *
13  *   the Free Software Foundation; either version 2 of the License, or     *
14  *   (at your option) any later version.                                   *
15  *                                                                         *
16  ***************************************************************************/
17 
18 #include "cfilehasher.h"
19 
20 #include "core/cbytearray.h"
21 #include "core/cbase32.h"
22 #include "core/cdir.h"
23 
24 #include "hash/compat.h"
25 #include "hash/MerkleTree.h"
26 
27 #include <stdio.h>
28 #include <string.h> //for memcmp()
29 
30 /** */
CTreeVerificationReport(const CTreeVerificationReport & other)31 CTreeVerificationReport::CTreeVerificationReport( const CTreeVerificationReport & other )
32 {
33 	filename = other.filename;
34 	tthRoot  = other.tthRoot;
35 	filesize = other.filesize;
36 	allgood  = other.allgood;
37 
38 	if ( other.segments )
39 	{
40 		segments = new CList<CHashedSegment>();
41 
42 		CHashedSegment * hs = 0;
43 
44 		/* FIXME stop casting away the const once CList is fixed / replaced */
45 		while ( (hs = ((CTreeVerificationReport&)other).segments->Next(hs)) != 0 )
46 		{
47 			segments->Add( new CHashedSegment(*hs) );
48 		}
49 	}
50 	else
51 	{
52 		segments = 0;
53 	}
54 }
55 
56 /** */
ToString() const57 CString CTreeVerificationReport::ToString() const
58 {
59 	CString s = "CTreeVerificationReport\nFileName: ";
60 	s += filename;
61 	s += "\nFileSize: ";
62 	s += CString::number(filesize);
63 	s += "\nActual TTH root: ";
64 	s += tthRoot;
65 	s += "\nAll hashes OK: ";
66 	if ( allgood )
67 	{
68 		s += "YES";
69 	}
70 	else
71 	{
72 		s += "NO";
73 	}
74 	s += "\nNumber of segments: ";
75 	s += CString::number(segments->Count());
76 	//        18446744073709551615 18446744073709551615 ORQ6K7F5U2QGJOUKEKPEDJ2XXM3FTVYQ2BLWQOI ORQ6K7F5U2QGJOUKEKPEDJ2XXM3FTVYQ2BLWQOI
77 	s += "\n\nStart                Size                 Expected TTH of block                   Actual TTH of block\n";
78 
79 	CHashedSegment * segment = 0;
80 
81 	while ( (segment = segments->Next(segment)) != 0 )
82 	{
83 		s += CString::number(segment->start).RightJustify(20);
84 		s += ' ';
85 		s += CString::number(segment->size).RightJustify(20);
86 		s += ' ';
87 		s += segment->expected;
88 		s += ' ';
89 		s += segment->actual;
90 
91 		if ( segment->expected != segment->actual )
92 		{
93 			s += " *** MISMATCH ***";
94 		}
95 
96 		s += "\n";
97 	}
98 
99 	return s;
100 }
101 
102 /** */
CFileHasher(const CString filename,CByteArray * workmem)103 CFileHasher::CFileHasher( const CString filename, CByteArray * workmem )
104 {
105 	status      = efhsNotStarted;
106 	m_bStop     = false;
107 	m_pRootData = 0;
108 	m_pLeafData = 0;
109 	filesize    = 0;
110 	m_nProgress = 0;
111 
112 	if ( file.Open( filename, IO_RAW | IO_READONLY ) )
113 	{
114 		m_pWorkMem = workmem;
115 		usingOwnMem = false;
116 		filesize = CDir().getFileSize( filename, false );
117 
118 		if ( m_pWorkMem == 0 )
119 		{
120 			m_pWorkMem = new CByteArray( 1024*1024 );
121 			usingOwnMem = true;
122 		}
123 
124 		if ( m_pWorkMem == 0 )
125 		{
126 			printf( "CFileHasher memory allocation failure\n" );
127 			status = efhsError;
128 		}
129 		else
130 		{
131 			status = efhsReady;
132 		}
133 	}
134 	else
135 	{
136 		m_pWorkMem = 0;
137 		printf( "CFileHasher cannot open '%s'\n", filename.Data() );
138 		status = efhsError;
139 	}
140 }
141 
142 /** delete stuff, close file */
~CFileHasher()143 CFileHasher::~CFileHasher()
144 {
145 	delete m_pRootData;
146 	m_pRootData = 0;
147 
148 	delete m_pLeafData;
149 	m_pLeafData = 0;
150 
151 	if ( usingOwnMem && (m_pWorkMem != 0) )
152 	{
153 		delete m_pWorkMem;
154 		m_pWorkMem = 0;
155 	}
156 
157 	if ( file.IsOpen() )
158 	{
159 		file.Close();
160 	}
161 }
162 
163 /** */
HashSize()164 unsigned long CFileHasher::HashSize()
165 {
166 	return dcpp::TigerTree::BYTES;
167 }
168 
169 /** */
GetHashRoot()170 CString CFileHasher::GetHashRoot()
171 {
172 	CString s;
173 
174 	if ( status == efhsFinished )
175 	{
176 		if ( m_pRootData != 0 )
177 		{
178 			if ( m_pRootData->Size() == dcpp::TigerTree::BYTES )
179 			{
180 				CBase32::Encode( &s, m_pRootData );
181 			}
182 			else
183 			{
184 				printf("CFileHasher::GetHashRoot wrong size %lu\n", m_pRootData->Size());
185 			}
186 		}
187 		else
188 		{
189 			printf("CFileHasher::GetHashRoot m_pRootData==0\n");
190 		}
191 	}
192 	else
193 	{
194 		printf("CFileHasher::GetHashRoot not finished\n");
195 	}
196 
197 	return s;
198 }
199 
200 /** */
GetHashRootRaw()201 CByteArray * CFileHasher::GetHashRootRaw()
202 {
203 	CByteArray * result = 0;
204 
205 	if ( status == efhsFinished )
206 	{
207 		if ( m_pRootData != 0 )
208 		{
209 			if ( m_pRootData->Size() == dcpp::TigerTree::BYTES )
210 			{
211 				result = new CByteArray();
212 				result->Append( m_pRootData->Data(), m_pRootData->Size() );
213 			}
214 			else
215 			{
216 				printf("CFileHasher::GetHashRootRaw wrong size %lu\n", m_pRootData->Size());
217 			}
218 		}
219 		else
220 		{
221 			printf("CFileHasher::GetHashRootRaw m_pRootData==0\n");
222 		}
223 	}
224 	else
225 	{
226 		printf("CFileHasher::GetHashRootRaw not finished\n");
227 	}
228 
229 	return result;
230 }
231 
232 /** */
GetLeafData()233 CByteArray * CFileHasher::GetLeafData()
234 {
235 	CByteArray * result = 0;
236 
237 	if ( status == efhsFinished )
238 	{
239 		if ( m_pLeafData != 0 )
240 		{
241 			result = new CByteArray();
242 			result->Append( m_pLeafData->Data(), m_pLeafData->Size() );
243 		}
244 		else
245 		{
246 			printf("CFileHasher::GetLeafData m_pLeafData==0\n");
247 		}
248 	}
249 	else
250 	{
251 		printf("CFileHasher::GetLeafData not finished\n");
252 	}
253 
254 	return result;
255 }
256 
257 /** */
StopHashing()258 void CFileHasher::StopHashing()
259 {
260 	m_bStop = true;
261 }
262 
263 /** */
ComputeHash(const eFileHasherStatus endStatus)264 void CFileHasher::ComputeHash( const eFileHasherStatus endStatus )
265 {
266 	if ( status == efhsReady )
267 	{
268 		status = efhsWorking;
269 
270 		dcpp::TigerTree hasher( std::max(dcpp::TigerTree::calcBlockSize(filesize,10), ((int64_t) 64*1024) ) );
271 
272 		long len;
273 
274 		while ( ((len = file.Read( (char*)m_pWorkMem->Data(), m_pWorkMem->Size() )) > 0) && (m_bStop == false) )
275 		{
276 			hasher.update(m_pWorkMem->Data(),len);
277 
278 			m_nProgress += len;
279 		}
280 
281 		file.Close();
282 
283 		if ( m_bStop )
284 		{
285 			status = efhsError;
286 		}
287 		else
288 		{
289 			hasher.finalize();
290 
291 			m_pRootData = new CByteArray();
292 			m_pRootData->Append( hasher.getRoot().data, dcpp::TigerTree::BYTES );
293 
294 			vector<uint8_t> leafdata = hasher.getLeafData();
295 			m_pLeafData = new CByteArray();
296 			m_pLeafData->Append( &leafdata[0], leafdata.size() );
297 
298 			status = endStatus;
299 		}
300 
301 		if ( usingOwnMem )
302 		{
303 			delete m_pWorkMem;
304 			m_pWorkMem = 0;
305 		}
306 	}
307 	else
308 	{
309 		printf("CFileHasher::ComputeHash not ready\n");
310 	}
311 }
312 
313 /** from DownloadManager::endData in the DC++ source */
GetBlockSize(const unsigned long leavesSize,const int64_t filesize)314 int64_t CFileHasher::GetBlockSize( const unsigned long leavesSize, const int64_t filesize )
315 {
316 	int64_t blocksize = 1024;
317 	while ( blocksize * leavesSize/sizeof(dcpp::TigerTree::MerkleValue) < filesize )
318 	{
319 		blocksize = blocksize * 2;
320 	}
321 	return blocksize;
322 }
323 
324 /** */
ValidateHashLeaves(CString tth,CByteArray * leaves,const ulonglong filesize)325 bool CFileHasher::ValidateHashLeaves( CString tth, CByteArray * leaves, const ulonglong filesize )
326 {
327 	CByteArray dst;
328 
329 	if ( CBase32::Decode( &dst, &tth ) != dcpp::TigerTree::BYTES )
330 	{
331 		printf("CFileHasher::ValidateHashLeaves: base32 decode return wrong size %lu\n",dst.Size());
332 		return false;
333 	}
334 	else
335 	{
336 		return ValidateHashLeaves( &dst, leaves, filesize );
337 	}
338 }
339 
340 /** */
ValidateHashLeaves(CByteArray * root,CByteArray * leaves,const ulonglong filesize)341 bool CFileHasher::ValidateHashLeaves( CByteArray * root, CByteArray * leaves, const ulonglong filesize )
342 {
343 	if ( (root == 0) || (leaves == 0) )
344 	{
345 		printf("CFileHasher::ValidateHashLeaves null pointer\n");
346 		return false;
347 	}
348 
349 	if ( root->Size() != dcpp::TigerTree::BYTES )
350 	{
351 		printf("CFileHasher::ValidateHashLeaves root wrong size\n");
352 		return false;
353 	}
354 	else if ( leaves->Size() < dcpp::TigerTree::BYTES )
355 	{
356 		printf("CFileHasher::ValidateHashLeaves leaves too small\n");
357 		return false;
358 	}
359 	else if ( leaves->Size() == dcpp::TigerTree::BYTES ) // optimisation
360 	{
361 		if ( memcmp(root->Data(),leaves->Data(),dcpp::TigerTree::BYTES) == 0 )
362 		{
363 			return true;
364 		}
365 	}
366 
367 	int64_t size = (int64_t) filesize;
368 
369 	if ( size < 0 )
370 	{
371 		printf("CFileHasher::ValidateHashLeaves filesize too big\n");
372 		return false;
373 	}
374 
375 	int64_t blocksize = GetBlockSize( leaves->Size(), size );
376 
377 	dcpp::TigerTree hasher(size,blocksize,leaves->Data());
378 
379 	if ( memcmp(hasher.getRoot().data,root->Data(),dcpp::TigerTree::BYTES) == 0 )
380 	{
381 		return true;
382 	}
383 	else
384 	{
385 		return false;
386 	}
387 }
388 
389 /** */
HashByteArray(CByteArray * data,unsigned long length)390 CByteArray * CFileHasher::HashByteArray( CByteArray * data, unsigned long length )
391 {
392 	CByteArray * result = new CByteArray();
393 
394 	dcpp::TigerTree hasher;
395 	hasher.update( data->Data(), length );
396 	hasher.finalize();
397 	result->Append( hasher.getRoot().data, dcpp::TigerTree::BYTES );
398 
399 	return result;
400 }
401 
402 /** */
Thread()403 void CFileHasherThread::Thread()
404 {
405 	Lock();
406 
407 	ComputeHash();
408 
409 	UnLock();
410 
411 	/* do not try to pthread_join ourself */
412 	Stop(false);
413 }
414 
415 /** */
CFileTreeVerifier(const CString filename,CByteArray * leaves,CByteArray * workmem)416 CFileTreeVerifier::CFileTreeVerifier( const CString filename, CByteArray * leaves, CByteArray * workmem ) : CFileHasher ( filename, workmem )
417 {
418 	m_pLeaves = leaves;
419 	m_pReport = new CTreeVerificationReport();
420 	m_pReport->filename = filename;
421 	m_nPass = 0;
422 }
423 
424 /** */
~CFileTreeVerifier()425 CFileTreeVerifier::~CFileTreeVerifier()
426 {
427 	delete m_pReport;
428 	m_pReport = 0;
429 }
430 
431 /** */
GetReport()432 CTreeVerificationReport * CFileTreeVerifier::GetReport()
433 {
434 	CTreeVerificationReport * pointer = 0;
435 
436 	if ( GetStatus() == efhsFinished )
437 	{
438 		pointer = m_pReport;
439 		m_pReport = 0;
440 	}
441 
442 	return pointer;
443 }
444 
445 /** */
Thread()446 void CFileTreeVerifier::Thread()
447 {
448 	Lock();
449 
450 	m_nPass = 1;
451 	ComputeHash( efhsWorking );
452 
453 	if ( status == efhsWorking )
454 	{
455 		m_nPass = 2;
456 		m_nProgress = 0;
457 		bool ok = true;
458 
459 		/* first get segment size, we need it for either method */
460 		int leafcount = m_pLeaves->Size() / dcpp::TigerTree::BYTES;
461 		unsigned long blocksize = 1024;
462 		while ( blocksize * leafcount < filesize )
463 		{
464 			blocksize = blocksize * 2;
465 		}
466 
467 		/* Can we do this? Or could two lists of tth leaves the same length be from differently structured trees? */
468 		/* This should not take to long, no need to check m_bStop */
469 		if ( m_pLeaves->Size() == HashLeavesDirect()->Size() )
470 		{
471 			printf("CFileTreeVerifier: leaf data are same length, comparing\n");
472 			CByteArray expected_in;
473 			CByteArray actual_in;
474 
475 			for ( unsigned long i = 0; i < m_pLeaves->Size(); i = i + dcpp::TigerTree::BYTES )
476 			{
477 				expected_in.SetSize(0);
478 				actual_in.SetSize(0);
479 
480 				expected_in.Append( m_pLeaves->Data()+i, dcpp::TigerTree::BYTES );
481 				actual_in.Append( HashLeavesDirect()->Data()+i, dcpp::TigerTree::BYTES );
482 
483 				CHashedSegment * segment = new CHashedSegment();
484 
485 				CBase32::Encode( &(segment->expected), &expected_in );
486 				CBase32::Encode( &(segment->actual), &actual_in );
487 
488 				if ( memcmp(expected_in.Data(),actual_in.Data(),dcpp::TigerTree::BYTES) != 0 )
489 				{
490 					ok = false;
491 				}
492 
493 				segment->start = i * blocksize;
494 				segment->size = blocksize;
495 				if ( segment->start + segment->size > filesize )
496 				{
497 					segment->size = filesize - segment->start;
498 				}
499 
500 				m_pReport->segments->Add(segment);
501 			}
502 
503 			m_pReport->tthRoot  = GetHashRoot();
504 			m_pReport->filesize = filesize;
505 			m_pReport->allgood  = ok;
506 			status = efhsFinished;
507 		}
508 		else
509 		{
510 			printf("CFileTreeVerifier: need to rehash each segment of file\n");
511 			/* Otherwise, we need to hash each segement of the file */
512 			if ( file.Open( m_pReport->filename, IO_RAW | IO_READONLY ) )
513 			{
514 				CByteArray expected_in;
515 
516 				for ( int i = 0; i < leafcount; i++ )
517 				{
518 					expected_in.SetSize(0);
519 					expected_in.Append( m_pLeaves->Data()+(i*dcpp::TigerTree::BYTES), dcpp::TigerTree::BYTES );
520 
521 					CByteArray * buffer = new CByteArray( blocksize );
522 
523 					long read = file.Read( (char*) buffer->Data(), blocksize );
524 					if ( read > 0 )
525 					{
526 						CByteArray * actual_in = HashByteArray( buffer, (unsigned long)read );
527 
528 						CHashedSegment * segment = new CHashedSegment();
529 
530 						CBase32::Encode( &(segment->expected), &expected_in );
531 						CBase32::Encode( &(segment->actual), actual_in );
532 
533 						delete actual_in;
534 
535 						if ( segment->expected != segment->actual )
536 						{
537 							ok = false;
538 						}
539 
540 						segment->start = m_nProgress;
541 						segment->size = read;
542 
543 						m_pReport->segments->Add(segment);
544 
545 						m_nProgress += read;
546 					}
547 					else
548 					{
549 						status = efhsError;
550 					}
551 
552 					delete buffer;
553 
554 					if ( m_bStop || (status == efhsError) )
555 					{
556 						status = efhsError;
557 						break;
558 					}
559 
560 				}
561 
562 				file.Close();
563 				if ( status == efhsWorking )
564 				{
565 					status = efhsFinished;
566 					m_pReport->tthRoot  = GetHashRoot();
567 					m_pReport->filesize = filesize;
568 					m_pReport->allgood  = ok;
569 				}
570 			}
571 			else
572 			{
573 				printf("CFileTreeVerifier: error opening file '%s'\n", m_pReport->filename.Data());
574 				status = efhsError;
575 			}
576 		}
577 	}
578 
579 	UnLock();
580 
581 	/* do not try to pthread_join ourself */
582 	Stop(false);
583 }
584