1 //  This file is part of par2cmdline (a PAR 2.0 compatible file verification and
2 //  repair tool). See http://parchive.sourceforge.net for details of PAR 2.0.
3 //
4 //  Copyright (c) 2003 Peter Brian Clements
5 //
6 //  par2cmdline is free software; you can redistribute it and/or modify
7 //  it under the terms of the GNU General Public License as published by
8 //  the Free Software Foundation; either version 2 of the License, or
9 //  (at your option) any later version.
10 //
11 //  par2cmdline is distributed in the hope that it will be useful,
12 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
13 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 //  GNU General Public License for more details.
15 //
16 //  You should have received a copy of the GNU General Public License
17 //  along with this program; if not, write to the Free Software
18 //  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
19 
20 #ifndef __VERIFICATIONHASHTABLE_H__
21 #define __VERIFICATIONHASHTABLE_H__
22 
23 class Par2RepairerSourceFile;
24 class VerificationHashTable;
25 
26 // The VerificationHashEntry objects form the nodes of a binary trees stored
27 // in a VerificationHashTable object.
28 
29 // There is one VerificationHashEntry object for each data block in the original
30 // source files.
31 
32 class VerificationHashEntry
33 {
34 public:
VerificationHashEntry(Par2RepairerSourceFile * _sourcefile,DataBlock * _datablock,bool _firstblock,const FILEVERIFICATIONENTRY * _verificationentry)35   VerificationHashEntry(Par2RepairerSourceFile *_sourcefile,
36                         DataBlock *_datablock,
37                         bool _firstblock,
38                         const FILEVERIFICATIONENTRY *_verificationentry)
39   {
40     sourcefile = _sourcefile;
41     datablock = _datablock;
42     firstblock = _firstblock;
43 
44     crc = _verificationentry->crc;
45     hash = _verificationentry->hash;
46 
47     left = right = same = next = 0;
48   }
49 
~VerificationHashEntry(void)50   ~VerificationHashEntry(void)
51   {
52     delete left;
53     delete right;
54     delete same;
55   }
56 
57   // Insert the current object is a child of the specified parent
58   void Insert(VerificationHashEntry **parent);
59 
60   // Search (starting at the specified parent) for an object with a matching crc
61   static const VerificationHashEntry* Search(const VerificationHashEntry *entry, u32 crc);
62 
63   // Search (starting at the specified parent) for an object with a matching hash
64   static const VerificationHashEntry* Search(const VerificationHashEntry *entry, const MD5Hash &hash);
65 
66   // Comparison operators for searching
67   bool operator <(const VerificationHashEntry &r) const
68   {
69     return (crc < r.crc) || ((crc == r.crc) && (hash < r.hash));
70   }
71   bool operator >(const VerificationHashEntry &r) const
72   {
73     return (crc > r.crc) || ((crc == r.crc) && (hash > r.hash));
74   }
75   bool operator ==(const VerificationHashEntry &r) const
76   {
77     return (crc == r.crc) && (hash == r.hash);
78   }
79   bool operator <=(const VerificationHashEntry &r) const {return !operator>(r);}
80   bool operator >=(const VerificationHashEntry &r) const {return !operator<(r);}
81   bool operator !=(const VerificationHashEntry &r) const {return !operator==(r);}
82 
83   // Data
SourceFile(void)84   Par2RepairerSourceFile* SourceFile(void) const {return sourcefile;}
GetDataBlock(void)85   const DataBlock* GetDataBlock(void) const {return datablock;}
FirstBlock(void)86   bool FirstBlock(void) const {return firstblock;}
87 
88   // Set/Check the associated datablock
89   void SetBlock(DiskFile *diskfile, u64 offset) const;
90   bool IsSet(void) const;
91 
Checksum(void)92   u32 Checksum(void) const {return crc;}
Hash(void)93   const MD5Hash& Hash(void) const {return hash;}
94 
Same(void)95   VerificationHashEntry* Same(void) const {return same;}
Next(void)96   VerificationHashEntry* Next(void) const {return next;}
Next(VerificationHashEntry * _next)97   void Next(VerificationHashEntry *_next) {next = _next;}
98 
99 protected:
100   // Data
101   Par2RepairerSourceFile       *sourcefile;
102   DataBlock                    *datablock;
103   bool                          firstblock;
104 
105   u32                           crc;
106   MD5Hash                       hash;
107 
108 protected:
109   // Binary tree
110   VerificationHashEntry *left;
111   VerificationHashEntry *right;
112 
113   // Linked list of entries with the same crc and hash
114   VerificationHashEntry *same;
115 
116   // Linked list of entries in sequence for same file
117   VerificationHashEntry *next;
118 };
119 
SetBlock(DiskFile * diskfile,u64 offset)120 inline void VerificationHashEntry::SetBlock(DiskFile *diskfile, u64 offset) const
121 {
122   datablock->SetLocation(diskfile, offset);
123 }
124 
IsSet(void)125 inline bool VerificationHashEntry::IsSet(void) const
126 {
127   return datablock->IsSet();
128 }
129 
130 // Insert a new entry in the tree
Insert(VerificationHashEntry ** parent)131 inline void VerificationHashEntry::Insert(VerificationHashEntry **parent)
132 {
133   while (*parent)
134   {
135     if (**parent < *this)
136     {
137       parent = &(*parent)->right;
138     }
139     else if (**parent > *this)
140     {
141       parent = &(*parent)->left;
142     }
143     else
144     {
145       break;
146     }
147   }
148 
149   while (*parent)
150   {
151     parent = &(*parent)->same;
152   }
153 
154   *parent = this;
155 }
156 
157 // Search the tree for an entry with the correct crc
Search(const VerificationHashEntry * entry,u32 crc)158 inline const VerificationHashEntry* VerificationHashEntry::Search(const VerificationHashEntry *entry, u32 crc)
159 {
160   while (entry)
161   {
162     if (entry->crc < crc)
163     {
164       entry = entry->right;
165     }
166     else if (entry->crc > crc)
167     {
168       entry = entry->left;
169     }
170     else
171     {
172       break;
173     }
174   }
175 
176   return entry;
177 }
178 
179 // Search the tree for an entry with the correct hash
Search(const VerificationHashEntry * entry,const MD5Hash & hash)180 inline const VerificationHashEntry* VerificationHashEntry::Search(const VerificationHashEntry *entry, const MD5Hash &hash)
181 {
182   u32 crc = entry->crc;
183 
184   while (entry)
185   {
186     if ((entry->crc < crc) || ((entry->crc == crc) && (entry->hash < hash)))
187     {
188       entry = entry->right;
189     }
190     else if ((entry->crc > crc) || ((entry->crc == crc) && (entry->hash > hash)))
191     {
192       entry = entry->left;
193     }
194     else
195     {
196       break;
197     }
198   }
199 
200   return entry;
201 }
202 
203 // The VerificationHashTable object contains all of the VerificationHashEntry objects
204 // and is used to find matches for blocks of data in a target file that is being
205 // scanned.
206 
207 // It is initialised by loading data from all available verification packets for the
208 // source files.
209 
210 class VerificationHashTable
211 {
212 public:
213   VerificationHashTable(void);
214   ~VerificationHashTable(void);
215 
216   void SetLimit(u32 limit);
217 
218   // Load the data from the verification packet
219   void Load(Par2RepairerSourceFile *sourcefile, u64 blocksize);
220 
221   // Try to find a match.
222   //   nextentry   - The entry which we expect to find next. This is used
223   //                 when a sequence of matches are found.
224   //   sourcefile  - Which source file we would prefer to find a match for
225   //                 if there are more than one possible match (with the
226   //                 same crc and hash).
227   //   checksummer - Provides the crc and hash values being tested.
228   //   duplicate   - Set on return if the match would have been valid except
229   //                 for the fact that the block has already been found.
230   const VerificationHashEntry* FindMatch(const VerificationHashEntry *nextentry,
231                                          const Par2RepairerSourceFile *sourcefile,
232                                          FileCheckSummer &checksummer,
233                                          bool &duplicate) const;
234 
235   // Look up based on the block crc
236   const VerificationHashEntry* Lookup(u32 crc) const;
237 
238   // Continue lookup based on the block hash
239   const VerificationHashEntry* Lookup(const VerificationHashEntry *entry,
240                                       const MD5Hash &hash);
241 
242 protected:
243   VerificationHashEntry **hashtable;
244   unsigned int hashmask;
245 };
246 
247 // Search for an entry with the specified crc
Lookup(u32 crc)248 inline const VerificationHashEntry* VerificationHashTable::Lookup(u32 crc) const
249 {
250   if (hashmask)
251   {
252     return VerificationHashEntry::Search(hashtable[crc & hashmask], crc);
253   }
254 
255   return 0;
256 }
257 
258 // Search for an entry with the specified hash
Lookup(const VerificationHashEntry * entry,const MD5Hash & hash)259 inline const VerificationHashEntry* VerificationHashTable::Lookup(const VerificationHashEntry *entry,
260                                                                   const MD5Hash &hash)
261 {
262   return VerificationHashEntry::Search(entry, hash);
263 }
264 
FindMatch(const VerificationHashEntry * suggestedentry,const Par2RepairerSourceFile * sourcefile,FileCheckSummer & checksummer,bool & duplicate)265 inline const VerificationHashEntry* VerificationHashTable::FindMatch(const VerificationHashEntry *suggestedentry,
266                                                                      const Par2RepairerSourceFile *sourcefile,
267                                                                      FileCheckSummer &checksummer,
268                                                                      bool &duplicate) const
269 {
270   duplicate = false;
271 
272   // Get the current checksum from the checksummer
273   u32 crc = checksummer.Checksum();
274 
275   MD5Hash hash;
276   bool havehash = false;
277 
278   // Do we know what the next entry should be
279   if (0 != suggestedentry)
280   {
281     // Is the suggested match supposed to be the last one in the file
282     if (suggestedentry->Next() == 0)
283     {
284       // How long should the last block be
285       u64 length = suggestedentry->GetDataBlock()->GetLength();
286 
287       // Get a short checksum from the checksummer
288       u32 checksum = checksummer.ShortChecksum(length);
289 
290       // Is the checksum correct
291       if (checksum == suggestedentry->Checksum())
292       {
293         // Get a short hash from the checksummer
294         hash = checksummer.ShortHash(length);
295 
296         // If the hash matches as well, then return it
297         if (hash == suggestedentry->Hash())
298         {
299           return suggestedentry;
300         }
301       }
302     }
303     // If the suggested entry has not already been found, compare the checksum
304     else if (!suggestedentry->IsSet() && suggestedentry->Checksum() == crc)
305     {
306       // Get the hash value from the checksummer
307       havehash = true;
308       hash = checksummer.Hash();
309 
310       // If the hash value matches, then return it.
311       if (hash == suggestedentry->Hash())
312       {
313         return suggestedentry;
314       }
315     }
316   }
317 
318   // Look for other possible matches for the checksum
319   const VerificationHashEntry *nextentry = VerificationHashEntry::Search(hashtable[crc & hashmask], crc);
320   if (0 == nextentry)
321     return 0;
322 
323   // If we don't have the hash yet, get it
324   if (!havehash)
325   {
326     hash = checksummer.Hash();
327   }
328 
329   // Look for an entry with a matching hash
330   nextentry = VerificationHashEntry::Search(nextentry, hash);
331   if (0 == nextentry)
332     return 0;
333 
334   // Is there one match with the same checksum and hash, or many
335   if (nextentry->Same() == 0)
336   {
337     // If the match is for a block that is part of a target file
338     // for which we already have a complete match, then don't
339     // return it.
340     if (nextentry->SourceFile()->GetCompleteFile() != 0)
341     {
342       duplicate = true;
343       return 0;
344     }
345 
346     // If we are close to the end of the file and the block
347     // length is wrong, don't return it because it is an
348     // invalid match
349     if (checksummer.ShortBlock() && checksummer.BlockLength() != nextentry->GetDataBlock()->GetLength())
350     {
351       return 0;
352     }
353 
354     // If the match was at the start of the file and it is the first
355     // block for a target file, then return it.
356     if (nextentry->FirstBlock() && checksummer.Offset() == 0)
357     {
358       return nextentry;
359     }
360 
361     // Was this match actually the one which had originally been suggested
362     // but which has presumably already been found
363     if (nextentry == suggestedentry)
364     {
365       // Was the existing match in the same file as the new match
366       if (nextentry->IsSet() &&
367           nextentry->GetDataBlock()->GetDiskFile() == checksummer.GetDiskFile())
368       {
369         // Yes. Don't return it
370         duplicate = true;
371         return 0;
372       }
373       else
374       {
375         // No, it was in a different file. Return it.
376         // This ensures that we can find a perfect match for a target
377         // file even if some of the blocks had already been found
378         // in a different file.
379         return nextentry;
380       }
381     }
382     else
383     {
384       // return it only if it has not already been used
385       if (nextentry->IsSet())
386       {
387         duplicate = true;
388         return 0;
389       }
390 
391       return nextentry;
392     }
393   }
394 
395   // Do we prefer to match entries for a particular source file
396   if (0 != sourcefile)
397   {
398     const VerificationHashEntry *currententry = nextentry;
399     nextentry = 0;
400 
401     // We don't want entries for the wrong source file, ones that
402     // have already been matched, or ones that are the wrong length
403     while (currententry && (currententry->SourceFile() != sourcefile ||
404                             currententry->IsSet() ||
405                             ((checksummer.ShortBlock() && checksummer.BlockLength()) != (currententry->GetDataBlock()->GetLength()))
406                            )
407           )
408     {
409       // If we found an unused entry (which was presumably for the wrong
410       // source file) remember it (providing it is the correct length).
411       if (0 == nextentry && !(currententry->IsSet() ||
412                              ((checksummer.ShortBlock() && checksummer.BlockLength()) != (currententry->GetDataBlock()->GetLength()))
413                              )
414          )
415       {
416         nextentry = currententry;
417       }
418 
419       currententry = currententry->Same();
420     }
421 
422     // If we found an unused entry for the source file we want, return it
423     if (0 != currententry)
424       return currententry;
425   }
426 
427   // Check for an unused entry which is the correct length
428   while (nextentry && (nextentry->IsSet() ||
429                        ((checksummer.ShortBlock()) && (checksummer.BlockLength() != nextentry->GetDataBlock()->GetLength()))
430                       )
431         )
432   {
433     nextentry = nextentry->Same();
434   }
435 
436   // Return what we have found
437   if (nextentry == 0)
438   {
439     duplicate = true;
440   }
441 
442   return nextentry;
443 }
444 
445 #endif // __VERIFICATIONHASHTABLE_H__
446