1 // This file is part of par2cmdline (a PAR 2.0 compatible file verification and
2 // repair tool). See http://parchive.sourceforge.net for details of PAR 2.0.
3 //
4 // Copyright (c) 2003 Peter Brian Clements
5 //
6 // par2cmdline is free software; you can redistribute it and/or modify
7 // it under the terms of the GNU General Public License as published by
8 // the Free Software Foundation; either version 2 of the License, or
9 // (at your option) any later version.
10 //
11 // par2cmdline is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 //
16 // You should have received a copy of the GNU General Public License
17 // along with this program; if not, write to the Free Software
18 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
19
20 #ifndef __VERIFICATIONHASHTABLE_H__
21 #define __VERIFICATIONHASHTABLE_H__
22
23 class Par2RepairerSourceFile;
24 class VerificationHashTable;
25
26 // The VerificationHashEntry objects form the nodes of a binary trees stored
27 // in a VerificationHashTable object.
28
29 // There is one VerificationHashEntry object for each data block in the original
30 // source files.
31
32 class VerificationHashEntry
33 {
34 public:
VerificationHashEntry(Par2RepairerSourceFile * _sourcefile,DataBlock * _datablock,bool _firstblock,const FILEVERIFICATIONENTRY * _verificationentry)35 VerificationHashEntry(Par2RepairerSourceFile *_sourcefile,
36 DataBlock *_datablock,
37 bool _firstblock,
38 const FILEVERIFICATIONENTRY *_verificationentry)
39 {
40 sourcefile = _sourcefile;
41 datablock = _datablock;
42 firstblock = _firstblock;
43
44 crc = _verificationentry->crc;
45 hash = _verificationentry->hash;
46
47 left = right = same = next = 0;
48 }
49
~VerificationHashEntry(void)50 ~VerificationHashEntry(void)
51 {
52 delete left;
53 delete right;
54 delete same;
55 }
56
57 // Insert the current object is a child of the specified parent
58 void Insert(VerificationHashEntry **parent);
59
60 // Search (starting at the specified parent) for an object with a matching crc
61 static const VerificationHashEntry* Search(const VerificationHashEntry *entry, u32 crc);
62
63 // Search (starting at the specified parent) for an object with a matching hash
64 static const VerificationHashEntry* Search(const VerificationHashEntry *entry, const MD5Hash &hash);
65
66 // Comparison operators for searching
67 bool operator <(const VerificationHashEntry &r) const
68 {
69 return (crc < r.crc) || ((crc == r.crc) && (hash < r.hash));
70 }
71 bool operator >(const VerificationHashEntry &r) const
72 {
73 return (crc > r.crc) || ((crc == r.crc) && (hash > r.hash));
74 }
75 bool operator ==(const VerificationHashEntry &r) const
76 {
77 return (crc == r.crc) && (hash == r.hash);
78 }
79 bool operator <=(const VerificationHashEntry &r) const {return !operator>(r);}
80 bool operator >=(const VerificationHashEntry &r) const {return !operator<(r);}
81 bool operator !=(const VerificationHashEntry &r) const {return !operator==(r);}
82
83 // Data
SourceFile(void)84 Par2RepairerSourceFile* SourceFile(void) const {return sourcefile;}
GetDataBlock(void)85 const DataBlock* GetDataBlock(void) const {return datablock;}
FirstBlock(void)86 bool FirstBlock(void) const {return firstblock;}
87
88 // Set/Check the associated datablock
89 void SetBlock(DiskFile *diskfile, u64 offset) const;
90 bool IsSet(void) const;
91
Checksum(void)92 u32 Checksum(void) const {return crc;}
Hash(void)93 const MD5Hash& Hash(void) const {return hash;}
94
Same(void)95 VerificationHashEntry* Same(void) const {return same;}
Next(void)96 VerificationHashEntry* Next(void) const {return next;}
Next(VerificationHashEntry * _next)97 void Next(VerificationHashEntry *_next) {next = _next;}
98
99 protected:
100 // Data
101 Par2RepairerSourceFile *sourcefile;
102 DataBlock *datablock;
103 bool firstblock;
104
105 u32 crc;
106 MD5Hash hash;
107
108 protected:
109 // Binary tree
110 VerificationHashEntry *left;
111 VerificationHashEntry *right;
112
113 // Linked list of entries with the same crc and hash
114 VerificationHashEntry *same;
115
116 // Linked list of entries in sequence for same file
117 VerificationHashEntry *next;
118 };
119
SetBlock(DiskFile * diskfile,u64 offset)120 inline void VerificationHashEntry::SetBlock(DiskFile *diskfile, u64 offset) const
121 {
122 datablock->SetLocation(diskfile, offset);
123 }
124
IsSet(void)125 inline bool VerificationHashEntry::IsSet(void) const
126 {
127 return datablock->IsSet();
128 }
129
130 // Insert a new entry in the tree
Insert(VerificationHashEntry ** parent)131 inline void VerificationHashEntry::Insert(VerificationHashEntry **parent)
132 {
133 while (*parent)
134 {
135 if (**parent < *this)
136 {
137 parent = &(*parent)->right;
138 }
139 else if (**parent > *this)
140 {
141 parent = &(*parent)->left;
142 }
143 else
144 {
145 break;
146 }
147 }
148
149 while (*parent)
150 {
151 parent = &(*parent)->same;
152 }
153
154 *parent = this;
155 }
156
157 // Search the tree for an entry with the correct crc
Search(const VerificationHashEntry * entry,u32 crc)158 inline const VerificationHashEntry* VerificationHashEntry::Search(const VerificationHashEntry *entry, u32 crc)
159 {
160 while (entry)
161 {
162 if (entry->crc < crc)
163 {
164 entry = entry->right;
165 }
166 else if (entry->crc > crc)
167 {
168 entry = entry->left;
169 }
170 else
171 {
172 break;
173 }
174 }
175
176 return entry;
177 }
178
179 // Search the tree for an entry with the correct hash
Search(const VerificationHashEntry * entry,const MD5Hash & hash)180 inline const VerificationHashEntry* VerificationHashEntry::Search(const VerificationHashEntry *entry, const MD5Hash &hash)
181 {
182 u32 crc = entry->crc;
183
184 while (entry)
185 {
186 if ((entry->crc < crc) || ((entry->crc == crc) && (entry->hash < hash)))
187 {
188 entry = entry->right;
189 }
190 else if ((entry->crc > crc) || ((entry->crc == crc) && (entry->hash > hash)))
191 {
192 entry = entry->left;
193 }
194 else
195 {
196 break;
197 }
198 }
199
200 return entry;
201 }
202
203 // The VerificationHashTable object contains all of the VerificationHashEntry objects
204 // and is used to find matches for blocks of data in a target file that is being
205 // scanned.
206
207 // It is initialised by loading data from all available verification packets for the
208 // source files.
209
210 class VerificationHashTable
211 {
212 public:
213 VerificationHashTable(void);
214 ~VerificationHashTable(void);
215
216 void SetLimit(u32 limit);
217
218 // Load the data from the verification packet
219 void Load(Par2RepairerSourceFile *sourcefile, u64 blocksize);
220
221 // Try to find a match.
222 // nextentry - The entry which we expect to find next. This is used
223 // when a sequence of matches are found.
224 // sourcefile - Which source file we would prefer to find a match for
225 // if there are more than one possible match (with the
226 // same crc and hash).
227 // checksummer - Provides the crc and hash values being tested.
228 // duplicate - Set on return if the match would have been valid except
229 // for the fact that the block has already been found.
230 const VerificationHashEntry* FindMatch(const VerificationHashEntry *nextentry,
231 const Par2RepairerSourceFile *sourcefile,
232 FileCheckSummer &checksummer,
233 bool &duplicate) const;
234
235 // Look up based on the block crc
236 const VerificationHashEntry* Lookup(u32 crc) const;
237
238 // Continue lookup based on the block hash
239 const VerificationHashEntry* Lookup(const VerificationHashEntry *entry,
240 const MD5Hash &hash);
241
242 protected:
243 VerificationHashEntry **hashtable;
244 unsigned int hashmask;
245 };
246
247 // Search for an entry with the specified crc
Lookup(u32 crc)248 inline const VerificationHashEntry* VerificationHashTable::Lookup(u32 crc) const
249 {
250 if (hashmask)
251 {
252 return VerificationHashEntry::Search(hashtable[crc & hashmask], crc);
253 }
254
255 return 0;
256 }
257
258 // Search for an entry with the specified hash
Lookup(const VerificationHashEntry * entry,const MD5Hash & hash)259 inline const VerificationHashEntry* VerificationHashTable::Lookup(const VerificationHashEntry *entry,
260 const MD5Hash &hash)
261 {
262 return VerificationHashEntry::Search(entry, hash);
263 }
264
FindMatch(const VerificationHashEntry * suggestedentry,const Par2RepairerSourceFile * sourcefile,FileCheckSummer & checksummer,bool & duplicate)265 inline const VerificationHashEntry* VerificationHashTable::FindMatch(const VerificationHashEntry *suggestedentry,
266 const Par2RepairerSourceFile *sourcefile,
267 FileCheckSummer &checksummer,
268 bool &duplicate) const
269 {
270 duplicate = false;
271
272 // Get the current checksum from the checksummer
273 u32 crc = checksummer.Checksum();
274
275 MD5Hash hash;
276 bool havehash = false;
277
278 // Do we know what the next entry should be
279 if (0 != suggestedentry)
280 {
281 // Is the suggested match supposed to be the last one in the file
282 if (suggestedentry->Next() == 0)
283 {
284 // How long should the last block be
285 u64 length = suggestedentry->GetDataBlock()->GetLength();
286
287 // Get a short checksum from the checksummer
288 u32 checksum = checksummer.ShortChecksum(length);
289
290 // Is the checksum correct
291 if (checksum == suggestedentry->Checksum())
292 {
293 // Get a short hash from the checksummer
294 hash = checksummer.ShortHash(length);
295
296 // If the hash matches as well, then return it
297 if (hash == suggestedentry->Hash())
298 {
299 return suggestedentry;
300 }
301 }
302 }
303 // If the suggested entry has not already been found, compare the checksum
304 else if (!suggestedentry->IsSet() && suggestedentry->Checksum() == crc)
305 {
306 // Get the hash value from the checksummer
307 havehash = true;
308 hash = checksummer.Hash();
309
310 // If the hash value matches, then return it.
311 if (hash == suggestedentry->Hash())
312 {
313 return suggestedentry;
314 }
315 }
316 }
317
318 // Look for other possible matches for the checksum
319 const VerificationHashEntry *nextentry = VerificationHashEntry::Search(hashtable[crc & hashmask], crc);
320 if (0 == nextentry)
321 return 0;
322
323 // If we don't have the hash yet, get it
324 if (!havehash)
325 {
326 hash = checksummer.Hash();
327 }
328
329 // Look for an entry with a matching hash
330 nextentry = VerificationHashEntry::Search(nextentry, hash);
331 if (0 == nextentry)
332 return 0;
333
334 // Is there one match with the same checksum and hash, or many
335 if (nextentry->Same() == 0)
336 {
337 // If the match is for a block that is part of a target file
338 // for which we already have a complete match, then don't
339 // return it.
340 if (nextentry->SourceFile()->GetCompleteFile() != 0)
341 {
342 duplicate = true;
343 return 0;
344 }
345
346 // If we are close to the end of the file and the block
347 // length is wrong, don't return it because it is an
348 // invalid match
349 if (checksummer.ShortBlock() && checksummer.BlockLength() != nextentry->GetDataBlock()->GetLength())
350 {
351 return 0;
352 }
353
354 // If the match was at the start of the file and it is the first
355 // block for a target file, then return it.
356 if (nextentry->FirstBlock() && checksummer.Offset() == 0)
357 {
358 return nextentry;
359 }
360
361 // Was this match actually the one which had originally been suggested
362 // but which has presumably already been found
363 if (nextentry == suggestedentry)
364 {
365 // Was the existing match in the same file as the new match
366 if (nextentry->IsSet() &&
367 nextentry->GetDataBlock()->GetDiskFile() == checksummer.GetDiskFile())
368 {
369 // Yes. Don't return it
370 duplicate = true;
371 return 0;
372 }
373 else
374 {
375 // No, it was in a different file. Return it.
376 // This ensures that we can find a perfect match for a target
377 // file even if some of the blocks had already been found
378 // in a different file.
379 return nextentry;
380 }
381 }
382 else
383 {
384 // return it only if it has not already been used
385 if (nextentry->IsSet())
386 {
387 duplicate = true;
388 return 0;
389 }
390
391 return nextentry;
392 }
393 }
394
395 // Do we prefer to match entries for a particular source file
396 if (0 != sourcefile)
397 {
398 const VerificationHashEntry *currententry = nextentry;
399 nextentry = 0;
400
401 // We don't want entries for the wrong source file, ones that
402 // have already been matched, or ones that are the wrong length
403 while (currententry && (currententry->SourceFile() != sourcefile ||
404 currententry->IsSet() ||
405 ((checksummer.ShortBlock() && checksummer.BlockLength()) != (currententry->GetDataBlock()->GetLength()))
406 )
407 )
408 {
409 // If we found an unused entry (which was presumably for the wrong
410 // source file) remember it (providing it is the correct length).
411 if (0 == nextentry && !(currententry->IsSet() ||
412 ((checksummer.ShortBlock() && checksummer.BlockLength()) != (currententry->GetDataBlock()->GetLength()))
413 )
414 )
415 {
416 nextentry = currententry;
417 }
418
419 currententry = currententry->Same();
420 }
421
422 // If we found an unused entry for the source file we want, return it
423 if (0 != currententry)
424 return currententry;
425 }
426
427 // Check for an unused entry which is the correct length
428 while (nextentry && (nextentry->IsSet() ||
429 ((checksummer.ShortBlock()) && (checksummer.BlockLength() != nextentry->GetDataBlock()->GetLength()))
430 )
431 )
432 {
433 nextentry = nextentry->Same();
434 }
435
436 // Return what we have found
437 if (nextentry == 0)
438 {
439 duplicate = true;
440 }
441
442 return nextentry;
443 }
444
445 #endif // __VERIFICATIONHASHTABLE_H__
446