1 //  This file is part of par2cmdline (a PAR 2.0 compatible file verification and
2 //  repair tool). See http://parchive.sourceforge.net for details of PAR 2.0.
3 //
4 //  Copyright (c) 2003 Peter Brian Clements
5 //
6 //  par2cmdline is free software; you can redistribute it and/or modify
7 //  it under the terms of the GNU General Public License as published by
8 //  the Free Software Foundation; either version 2 of the License, or
9 //  (at your option) any later version.
10 //
11 //  par2cmdline is distributed in the hope that it will be useful,
12 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
13 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 //  GNU General Public License for more details.
15 //
16 //  You should have received a copy of the GNU General Public License
17 //  along with this program; if not, write to the Free Software
18 //  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
19 
20 #ifndef __FILECHECKSUMMER_H__
21 #define __FILECHECKSUMMER_H__
22 
23 // This source file defines the FileCheckSummer object which is used
24 // when scanning a data file to find blocks of undamaged data.
25 //
26 // The object uses a "window" into the data file and slides that window
27 // along the file computing the CRC of the data in that window as it
28 // goes. If the computed CRC matches the value for a block of data
29 // from a target data file, then the MD5 Hash value is also computed
30 // and compared with the value for that block of data. When a match
31 // has been confirmed, the object jumps forward to where the next
32 // block of data is expected to start. Whilst the file is being scanned
33 // the object also computes the MD5 Hash of the whole file and of
34 // the first 16k of the file for later tests.
35 
36 class FileCheckSummer
37 {
38 public:
39   FileCheckSummer(DiskFile   *diskfile,
40                   u64         blocksize,
41                   const u32 (&windowtable)[256],
42                   u32         windowmask);
43   ~FileCheckSummer(void);
44 
45   // Start reading the file at the beginning
46   bool Start(void);
47 
48   // Jump ahead the specified distance
49   bool Jump(u64 distance);
50 
51   // Step forward one byte
52   bool Step(void);
53 
54   // Return the current checksum
55   u32 Checksum(void) const;
56 
57   // Compute and return the current hash
58   MD5Hash Hash(void);
59 
60   // Compute short values of checksum and hash
61   u32 ShortChecksum(u64 blocklength);
62   MD5Hash ShortHash(u64 blocklength);
63 
64   // Do we have less than a full block of data
65   bool ShortBlock(void) const;
66   u64 BlockLength(void) const;
67 
68   // Return the current file offset
69   u64 Offset(void) const;
70 
71   // Return the full file hash and the 16k file hash
72   void GetFileHashes(MD5Hash &hashfull, MD5Hash &hash16k) const;
73 
74   // Which disk file is this
GetDiskFile(void)75   const DiskFile* GetDiskFile(void) const {return diskfile;}
76 
77 protected:
78   DiskFile   *diskfile;
79   u64         blocksize;
80   const u32 (&windowtable)[256];
81   u32         windowmask;
82 
83   u64         filesize;
84 
85   u64         currentoffset; // file offset for current window position
86   char       *buffer;        // buffer for reading from the file
87   char       *outpointer;    // position in buffer of scan window
88   char       *inpointer;     // &outpointer[blocksize];
89   char       *tailpointer;   // after last valid data in buffer
90 
91   // File offset for next read
92   u64         readoffset;
93 
94   // The current checksum
95   u32         checksum;
96 
97   // MD5 hash of whole file and of first 16k
98   MD5Context  contextfull;
99   MD5Context  context16k;
100 
101 protected:
102   //void ComputeCurrentCRC(void);
103   void UpdateHashes(u64 offset, const void *buffer, size_t length);
104 
105   //// Fill the buffers with more data from disk
106   bool Fill(void);
107 };
108 
109 // Return the current checksum
110 
Checksum(void)111 inline u32 FileCheckSummer::Checksum(void) const
112 {
113   return checksum;
114 }
115 
116 // Return the current block length
117 
BlockLength(void)118 inline u64 FileCheckSummer::BlockLength(void) const
119 {
120   return min(blocksize, filesize-currentoffset);
121 }
122 
123 // Return whether or not the current block is a short one.
ShortBlock(void)124 inline bool FileCheckSummer::ShortBlock(void) const
125 {
126   return BlockLength() < blocksize;
127 }
128 
129 // Return the current file offset
Offset(void)130 inline u64 FileCheckSummer::Offset(void) const
131 {
132   return currentoffset;
133 }
134 
135 // Step forward one byte
Step(void)136 inline bool FileCheckSummer::Step(void)
137 {
138   // Are we already at the end of the file
139   if (currentoffset >= filesize)
140     return false;
141 
142   // Advance the file offset and check to see if
143   // we have reached the end of the file
144   if (++currentoffset >= filesize)
145   {
146     currentoffset = filesize;
147     tailpointer = outpointer = buffer;
148     memset(buffer, 0, (size_t)blocksize);
149     checksum = 0;
150 
151     return true;
152   }
153 
154   // Get the incoming and outgoing characters
155   char inch = *inpointer++;
156   char outch = *outpointer++;
157 
158   // Update the checksum
159   checksum = windowmask ^ CRCSlideChar(windowmask ^ checksum, inch, outch, windowtable);
160 
161   // Can the window slide further
162   if (outpointer < &buffer[blocksize])
163     return true;
164 
165   assert(outpointer == &buffer[blocksize]);
166 
167   // Copy the data back to the beginning of the buffer
168   memmove(buffer, outpointer, (size_t)blocksize);
169   inpointer = outpointer;
170   outpointer = buffer;
171   tailpointer -= blocksize;
172 
173   // Fill the rest of the buffer
174   return Fill();
175 }
176 
177 
178 #endif // __FILECHECKSUMMER_H__
179