1 //  This file is part of par2cmdline (a PAR 2.0 compatible file verification and
2 //  repair tool). See http://parchive.sourceforge.net for details of PAR 2.0.
3 //
4 //  Copyright (c) 2003 Peter Brian Clements
5 //
6 //  par2cmdline is free software; you can redistribute it and/or modify
7 //  it under the terms of the GNU General Public License as published by
8 //  the Free Software Foundation; either version 2 of the License, or
9 //  (at your option) any later version.
10 //
11 //  par2cmdline is distributed in the hope that it will be useful,
12 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
13 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 //  GNU General Public License for more details.
15 //
16 //  You should have received a copy of the GNU General Public License
17 //  along with this program; if not, write to the Free Software
18 //  Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
19 
20 #include "par2cmdline.h"
21 
22 #ifdef _MSC_VER
23 #ifdef _DEBUG
24 #undef THIS_FILE
25 static char THIS_FILE[]=__FILE__;
26 #define new DEBUG_NEW
27 #endif
28 #endif
29 
30 static u32 smartpar11 = 0x03000101;
31 
Par1Repairer(void)32 Par1Repairer::Par1Repairer(void)
33 {
34   filelist = 0;
35   filelistsize = 0;
36 
37   blocksize = 0;
38 
39   completefilecount = 0;
40   renamedfilecount = 0;
41   damagedfilecount = 0;
42   missingfilecount = 0;
43 
44   inputbuffer = 0;
45   outputbuffer = 0;
46 
47   noiselevel = CommandLine::nlNormal;
48 }
49 
~Par1Repairer(void)50 Par1Repairer::~Par1Repairer(void)
51 {
52   map<u32,DataBlock*>::iterator i = recoveryblocks.begin();
53   while (i != recoveryblocks.end())
54   {
55     DataBlock *datablock = i->second;
56     delete datablock;
57 
58     ++i;
59   }
60 
61   vector<Par1RepairerSourceFile*>::iterator sourceiterator = sourcefiles.begin();
62   while (sourceiterator != sourcefiles.end())
63   {
64     Par1RepairerSourceFile *sourcefile = *sourceiterator;
65     delete sourcefile;
66     ++sourceiterator;
67   }
68 
69   sourceiterator = extrafiles.begin();
70   while (sourceiterator != extrafiles.end())
71   {
72     Par1RepairerSourceFile *sourcefile = *sourceiterator;
73     delete sourcefile;
74     ++sourceiterator;
75   }
76 
77   delete [] filelist;
78 }
79 
Process(const CommandLine & commandline,bool dorepair)80 Result Par1Repairer::Process(const CommandLine &commandline, bool dorepair)
81 {
82   // How noisy should we be
83   noiselevel = commandline.GetNoiseLevel();
84 
85   // Get filesnames from the command line
86   string par1filename = commandline.GetParFilename();
87   const list<CommandLine::ExtraFile> &extrafiles = commandline.GetExtraFiles();
88 
89   // Determine the searchpath from the location of the main PAR file
90   string name;
91   DiskFile::SplitFilename(par1filename, searchpath, name);
92 
93   // Load the main PAR file
94   if (!LoadRecoveryFile(searchpath + name))
95     return eLogicError;
96 
97   // Load other PAR files related to the main PAR file
98   if (!LoadOtherRecoveryFiles(par1filename))
99     return eLogicError;
100 
101   // Load any extra PAR files specified on the command line
102   if (!LoadExtraRecoveryFiles(extrafiles))
103     return eLogicError;
104 
105   if (noiselevel > CommandLine::nlQuiet)
106     cout << endl << "Verifying source files:" << endl << endl;
107 
108   // Check for the existence of and verify each of the source files
109   if (!VerifySourceFiles())
110     return eFileIOError;
111 
112   if (completefilecount<sourcefiles.size())
113   {
114     if (noiselevel > CommandLine::nlQuiet)
115       cout << endl << "Scanning extra files:" << endl << endl;
116 
117     // Check any other files specified on the command line to see if they are
118     // actually copies of the source files that have the wrong filename
119     if (!VerifyExtraFiles(extrafiles))
120       return eLogicError;
121   }
122 
123   // Find out how much data we have found
124   UpdateVerificationResults();
125 
126   if (noiselevel > CommandLine::nlSilent)
127     cout << endl;
128 
129   // Check the verification results and report the details
130   if (!CheckVerificationResults())
131     return eRepairNotPossible;
132 
133   // Are any of the files incomplete
134   if (completefilecount<sourcefiles.size())
135   {
136     // Do we want to carry out a repair
137     if (dorepair)
138     {
139       if (noiselevel > CommandLine::nlSilent)
140         cout << endl;
141 
142       // Rename any damaged or missnamed target files.
143       if (!RenameTargetFiles())
144         return eFileIOError;
145 
146       // Are we still missing any files
147       if (completefilecount<sourcefiles.size())
148       {
149         // Work out which files are being repaired, create them, and allocate
150         // target DataBlocks to them, and remember them for later verification.
151         if (!CreateTargetFiles())
152           return eFileIOError;
153 
154         // Work out which data blocks are available, which need to be recreated,
155         // and compute the appropriate Reed Solomon matrix.
156         if (!ComputeRSmatrix())
157         {
158           // Delete all of the partly reconstructed files
159           DeleteIncompleteTargetFiles();
160           return eFileIOError;
161         }
162 
163         // Allocate memory buffers for reading and writing data to disk.
164         if (!AllocateBuffers(commandline.GetMemoryLimit()))
165         {
166           // Delete all of the partly reconstructed files
167           DeleteIncompleteTargetFiles();
168           return eMemoryError;
169         }
170         if (noiselevel > CommandLine::nlSilent)
171           cout << endl;
172 
173         // Set the total amount of data to be processed.
174         progress = 0;
175         totaldata = blocksize * sourcefiles.size() * verifylist.size();
176 
177         // Start at an offset of 0 within a block.
178         u64 blockoffset = 0;
179         while (blockoffset < blocksize) // Continue until the end of the block.
180         {
181           // Work out how much data to process this time.
182           size_t blocklength = (size_t)min((u64)chunksize, blocksize-blockoffset);
183 
184           // Read source data, process it through the RS matrix and write it to disk.
185           if (!ProcessData(blockoffset, blocklength))
186           {
187             // Delete all of the partly reconstructed files
188             DeleteIncompleteTargetFiles();
189             return eFileIOError;
190           }
191 
192           // Advance to the need offset within each block
193           blockoffset += blocklength;
194         }
195 
196         if (noiselevel > CommandLine::nlSilent)
197           cout << endl << "Verifying repaired files:" << endl << endl;
198 
199         // Verify that all of the reconstructed target files are now correct
200         if (!VerifyTargetFiles())
201         {
202           // Delete all of the partly reconstructed files
203           DeleteIncompleteTargetFiles();
204           return eFileIOError;
205         }
206       }
207 
208       // Are all of the target files now complete?
209       if (completefilecount<sourcefiles.size())
210       {
211         cerr << "Repair Failed." << endl;
212         return eRepairFailed;
213       }
214       else
215       {
216         if (noiselevel > CommandLine::nlSilent)
217           cout << endl << "Repair complete." << endl;
218       }
219     }
220     else
221     {
222       return eRepairPossible;
223     }
224   }
225 
226   return eSuccess;
227 }
228 
LoadRecoveryFile(string filename)229 bool Par1Repairer::LoadRecoveryFile(string filename)
230 {
231   // Skip the file if it has already been processed
232   if (diskfilemap.Find(filename) != 0)
233   {
234     return true;
235   }
236 
237   DiskFile *diskfile = new DiskFile;
238 
239   // Open the file
240   if (!diskfile->Open(filename))
241   {
242     // If we could not open the file, ignore the error and
243     // proceed to the next file
244     delete diskfile;
245     return true;
246   }
247 
248   if (noiselevel > CommandLine::nlSilent)
249   {
250     string path;
251     string name;
252     DiskFile::SplitFilename(filename, path, name);
253     cout << "Loading \"" << name << "\"." << endl;
254   }
255 
256   bool havevolume = false;
257   u32 volumenumber = 0;
258 
259   // How big is the file
260   u64 filesize = diskfile->FileSize();
261   if (filesize >= sizeof(PAR1FILEHEADER))
262   {
263     // Allocate a buffer to read data into
264     size_t buffersize = (size_t)min((u64)1048576, filesize);
265     u8 *buffer = new u8[buffersize];
266 
267     do
268     {
269       PAR1FILEHEADER fileheader;
270       if (!diskfile->Read(0, &fileheader, sizeof(fileheader)))
271         break;
272 
273       // Is this really a PAR file?
274       if (fileheader.magic != par1_magic)
275         break;
276 
277       // Is the version number correct?
278       if (fileheader.fileversion != 0x00010000)
279         break;
280 
281       ignore16kfilehash = (fileheader.programversion == smartpar11);
282 
283       // Prepare to carry out MD5 Hash check of the Control Hash
284       MD5Context context;
285       u64 offset = offsetof(PAR1FILEHEADER, sethash);
286 
287       // Process until the end of the file is reached
288       while (offset < filesize)
289       {
290         // How much data should we read?
291         size_t want = (size_t)min((u64)buffersize, filesize-offset);
292         if (!diskfile->Read(offset, buffer, want))
293           break;
294 
295         context.Update(buffer, want);
296 
297         offset += want;
298       }
299 
300       // Did we read the whole file
301       if (offset < filesize)
302         break;
303 
304       // Compute the hash value
305       MD5Hash hash;
306       context.Final(hash);
307 
308       // Is it correct?
309       if (hash != fileheader.controlhash)
310         break;
311 
312       // Check that the volume number is ok
313       if (fileheader.volumenumber >= 256)
314         break;
315 
316       // Are there any files?
317       if (fileheader.numberoffiles == 0 ||
318           fileheader.filelistoffset < sizeof(PAR1FILEHEADER) ||
319           fileheader.filelistsize == 0)
320         break;
321 
322       // Verify that the file list and data offsets are ok
323       if ((fileheader.filelistoffset + fileheader.filelistsize > filesize)
324           ||
325           (fileheader.datasize && (fileheader.dataoffset < sizeof(fileheader) || fileheader.dataoffset + fileheader.datasize > filesize))
326           ||
327           (fileheader.datasize && (((fileheader.filelistoffset <= fileheader.dataoffset) && (fileheader.dataoffset < fileheader.filelistoffset+fileheader.filelistsize)) || fileheader.dataoffset <= (fileheader.filelistoffset && (fileheader.filelistoffset) < (fileheader.dataoffset + fileheader.datasize)))))
328         break;
329 
330       // Check the size of the file list
331       if (fileheader.filelistsize > 200000)
332         break;
333 
334       // If we already have a copy of the file list, make sure this one has the same size
335       if (filelist != 0 && filelistsize != fileheader.filelistsize)
336         break;
337 
338       // Allocate a buffer to hold a copy of the file list
339       unsigned char *temp = new unsigned char[(size_t)fileheader.filelistsize];
340 
341       // Read the file list into the buffer
342       if (!diskfile->Read(fileheader.filelistoffset, temp, (size_t)fileheader.filelistsize))
343       {
344         delete [] temp;
345         break;
346       }
347 
348       // If we already have a copy of the file list, make sure this copy is identical
349       if (filelist != 0)
350       {
351         bool match = (0 == memcmp(filelist, temp, filelistsize));
352         delete [] temp;
353 
354         if (!match)
355           break;
356       }
357       else
358       {
359         // Prepare to scan the file list
360         unsigned char *current = temp;
361         size_t remaining = (size_t)fileheader.filelistsize;
362         unsigned int fileindex = 0;
363 
364         // Allocate a buffer to copy each file entry into so that
365         // all fields will be correctly aligned in memory.
366         PAR1FILEENTRY *fileentry = (PAR1FILEENTRY*)new u64[(remaining + sizeof(u64)-1)/sizeof(u64)];
367 
368         // Process until we run out of files or data
369         while (remaining > 0 && fileindex < fileheader.numberoffiles)
370         {
371           // Copy fixed portion of file entry
372           memcpy((void*)fileentry, (void*)current, sizeof(PAR1FILEENTRY));
373 
374           // Is there enough data remaining
375           if (remaining < sizeof(fileentry->entrysize) ||
376               remaining < fileentry->entrysize)
377             break;
378 
379           // Check the length of the filename
380           if (fileentry->entrysize <= sizeof(PAR1FILEENTRY))
381             break;
382 
383           // Check the file size
384           if (blocksize < fileentry->filesize)
385             blocksize = fileentry->filesize;
386 
387           // Copy whole of file entry
388           memcpy((void*)fileentry, (void*)current, (size_t)(u64)fileentry->entrysize);
389 
390           // Create source file and add it to the appropriate list
391           Par1RepairerSourceFile *sourcefile = new Par1RepairerSourceFile(fileentry, searchpath);
392           if (fileentry->status & INPARITYVOLUME)
393           {
394             sourcefiles.push_back(sourcefile);
395           }
396           else
397           {
398             extrafiles.push_back(sourcefile);
399           }
400 
401           remaining -= (size_t)fileentry->entrysize;
402           current += (size_t)fileentry->entrysize;
403 
404           fileindex++;
405         }
406 
407         delete [] (u64*)fileentry;
408 
409         // Did we find the correct number of files
410         if (fileindex < fileheader.numberoffiles)
411         {
412           vector<Par1RepairerSourceFile*>::iterator i = sourcefiles.begin();
413           while (i != sourcefiles.end())
414           {
415             Par1RepairerSourceFile *sourcefile = *i;
416             delete sourcefile;
417             ++i;
418           }
419           sourcefiles.clear();
420 
421           i = extrafiles.begin();
422           while (i != extrafiles.end())
423           {
424             Par1RepairerSourceFile *sourcefile = *i;
425             delete sourcefile;
426             ++i;
427           }
428           extrafiles.clear();
429 
430           delete [] temp;
431           break;
432         }
433 
434         filelist = temp;
435         filelistsize = (u32)fileheader.filelistsize;
436       }
437 
438       // Is this a recovery volume?
439       if (fileheader.volumenumber > 0)
440       {
441         // Make sure there is data and that it is the correct size
442         if (fileheader.dataoffset == 0 || fileheader.datasize != blocksize)
443           break;
444 
445         // What volume number is this?
446         volumenumber = (u32)(fileheader.volumenumber - 1);
447 
448         // Do we already have this volume?
449         if (recoveryblocks.find(volumenumber) == recoveryblocks.end())
450         {
451           // Create a data block
452           DataBlock *datablock = new DataBlock;
453           datablock->SetLength(blocksize);
454           datablock->SetLocation(diskfile, fileheader.dataoffset);
455 
456           // Store it in the map
457           recoveryblocks.insert(pair<u32, DataBlock*>(volumenumber, datablock));
458 
459           havevolume = true;
460         }
461       }
462     } while (false);
463 
464     delete [] buffer;
465   }
466 
467   // We have finished with the file for now
468   diskfile->Close();
469 
470   if (noiselevel > CommandLine::nlQuiet)
471   {
472     if (havevolume)
473     {
474       cout << "Loaded recovery volume " << volumenumber << endl;
475     }
476     else
477     {
478       cout << "No new recovery volumes found" << endl;
479     }
480   }
481 
482   // Remember that the file was processed
483   bool success = diskfilemap.Insert(diskfile);
484   assert(success);
485 
486   return true;
487 }
488 
LoadOtherRecoveryFiles(string filename)489 bool Par1Repairer::LoadOtherRecoveryFiles(string filename)
490 {
491   // Split the original PAR filename into path and name parts
492   string path;
493   string name;
494   DiskFile::SplitFilename(filename, path, name);
495 
496   // Find the file extension
497   string::size_type where = name.find_last_of('.');
498   if (where != string::npos)
499   {
500     // remove it
501     name = name.substr(0, where);
502   }
503 
504   // Search for additional PAR files
505   string wildcard = name + ".???";
506   list<string> *files = DiskFile::FindFiles(path, wildcard);
507 
508   for (list<string>::const_iterator s=files->begin(); s!=files->end(); ++s)
509   {
510     string filename = *s;
511 
512     // Find the file extension
513     where = filename.find_last_of('.');
514     if (where != string::npos)
515     {
516       string tail = filename.substr(where+1);
517 
518       // Check the the file extension is the correct form
519       if ((tail[0] == 'P' || tail[0] == 'p') &&
520           (
521             ((tail[1] == 'A' || tail[1] == 'a') && (tail[2] == 'R' || tail[2] == 'r'))
522             ||
523             (isdigit(tail[1]) && isdigit(tail[2]))
524           ))
525       {
526         LoadRecoveryFile(filename);
527       }
528     }
529   }
530 
531   delete files;
532 
533   return true;
534 }
535 
536 // Load packets from any other PAR files whose names are given on the command line
LoadExtraRecoveryFiles(const list<CommandLine::ExtraFile> & extrafiles)537 bool Par1Repairer::LoadExtraRecoveryFiles(const list<CommandLine::ExtraFile> &extrafiles)
538 {
539   for (ExtraFileIterator i=extrafiles.begin(); i!=extrafiles.end(); i++)
540   {
541     string filename = i->FileName();
542 
543     // Find the file extension
544     string::size_type where = filename.find_last_of('.');
545     if (where != string::npos)
546     {
547       string tail = filename.substr(where+1);
548 
549       // Check the the file extension is the correct form
550       if ((tail[0] == 'P' || tail[0] == 'p') &&
551           (
552             ((tail[1] == 'A' || tail[1] == 'a') && (tail[2] == 'R' || tail[2] == 'r'))
553             ||
554             (isdigit(tail[1]) && isdigit(tail[2]))
555           ))
556       {
557         LoadRecoveryFile(filename);
558       }
559     }
560   }
561 
562   return true;
563 }
564 
565 // Attempt to verify all of the source files
VerifySourceFiles(void)566 bool Par1Repairer::VerifySourceFiles(void)
567 {
568   bool finalresult = true;
569 
570   u32 filenumber = 0;
571   vector<Par1RepairerSourceFile*>::iterator sourceiterator = sourcefiles.begin();
572   while (sourceiterator != sourcefiles.end())
573   {
574     Par1RepairerSourceFile *sourcefile = *sourceiterator;
575 
576     string filename = sourcefile->FileName();
577 
578     // Check to see if we have already used this file
579     if (diskfilemap.Find(filename) != 0)
580     {
581       // The file has already been used!
582 
583       cerr << "Source file " << filenumber+1 << " is a duplicate." << endl;
584 
585       return false;
586     }
587 
588     DiskFile *diskfile = new DiskFile;
589 
590     // Does the target file exist
591     if (diskfile->Open(filename))
592     {
593       // Yes. Record that fact.
594       sourcefile->SetTargetExists(true);
595 
596       // Remember that the DiskFile is the target file
597       sourcefile->SetTargetFile(diskfile);
598 
599       // Remember that we have processed this file
600       bool success = diskfilemap.Insert(diskfile);
601       assert(success);
602 
603       // Do the actual verification
604       if (!VerifyDataFile(diskfile, sourcefile))
605         finalresult = false;
606 
607       // We have finished with the file for now
608       diskfile->Close();
609 
610       // Find out how much data we have found
611       UpdateVerificationResults();
612     }
613     else
614     {
615       // The file does not exist.
616       delete diskfile;
617 
618       if (noiselevel > CommandLine::nlSilent)
619       {
620         string path;
621         string name;
622         DiskFile::SplitFilename(filename, path, name);
623 
624         cout << "Target: \"" << name << "\" - missing." << endl;
625       }
626     }
627 
628     ++sourceiterator;
629     ++filenumber;
630   }
631 
632   return finalresult;
633 }
634 
635 // Scan any extra files specified on the command line
VerifyExtraFiles(const list<CommandLine::ExtraFile> & extrafiles)636 bool Par1Repairer::VerifyExtraFiles(const list<CommandLine::ExtraFile> &extrafiles)
637 {
638   for (ExtraFileIterator i=extrafiles.begin();
639        i!=extrafiles.end() && completefilecount<sourcefiles.size();
640        ++i)
641   {
642     string filename = i->FileName();
643 
644     bool skip = false;
645 
646     // Find the file extension
647     string::size_type where = filename.find_last_of('.');
648     if (where != string::npos)
649     {
650       string tail = filename.substr(where+1);
651 
652       // Check the the file extension is the correct form
653       if ((tail[0] == 'P' || tail[0] == 'p') &&
654           (
655             ((tail[1] == 'A' || tail[1] == 'a') && (tail[2] == 'R' || tail[2] == 'r'))
656             ||
657             (isdigit(tail[1]) && isdigit(tail[2]))
658           ))
659       {
660         skip = true;
661       }
662     }
663 
664     if (!skip)
665     {
666       filename = DiskFile::GetCanonicalPathname(filename);
667 
668       // Has this file already been dealt with
669       if (diskfilemap.Find(filename) == 0)
670       {
671         DiskFile *diskfile = new DiskFile;
672 
673         // Does the file exist
674         if (!diskfile->Open(filename))
675         {
676           delete diskfile;
677           continue;
678         }
679 
680         // Remember that we have processed this file
681         bool success = diskfilemap.Insert(diskfile);
682         assert(success);
683 
684         // Do the actual verification
685         VerifyDataFile(diskfile, 0);
686         // Ignore errors
687 
688         // We have finished with the file for now
689         diskfile->Close();
690 
691         // Find out how much data we have found
692         UpdateVerificationResults();
693       }
694     }
695   }
696 
697   return true;
698 }
699 
700 
VerifyDataFile(DiskFile * diskfile,Par1RepairerSourceFile * sourcefile)701 bool Par1Repairer::VerifyDataFile(DiskFile *diskfile, Par1RepairerSourceFile *sourcefile)
702 {
703   Par1RepairerSourceFile *match = 0;
704 
705   string path;
706   string name;
707   DiskFile::SplitFilename(diskfile->FileName(), path, name);
708 
709   // How big is the file we are checking
710   u64 filesize = diskfile->FileSize();
711 
712   if (filesize == 0)
713     return true;
714 
715   // Search for the first file that is the correct size
716   vector<Par1RepairerSourceFile*>::iterator sourceiterator = sourcefiles.begin();
717   while (sourceiterator != sourcefiles.end() &&
718          filesize != (*sourceiterator)->FileSize())
719   {
720     ++sourceiterator;
721   }
722 
723   // Are there any files that are the correct size?
724   if (sourceiterator != sourcefiles.end())
725   {
726     // Allocate a buffer to compute the file hash
727     size_t buffersize = (size_t)min((u64)1048576, filesize);
728     char *buffer = new char[buffersize];
729 
730     // Read the first 16k of the file
731     size_t want = (size_t)min((u64)16384, filesize);
732     if (!diskfile->Read(0, buffer, want))
733     {
734       delete [] buffer;
735       return false;
736     }
737 
738     // Compute the MD5 hash of the first 16k
739     MD5Context contextfull;
740     contextfull.Update(buffer, want);
741     MD5Context context16k = contextfull;
742     MD5Hash hash16k;
743     context16k.Final(hash16k);
744 
745     if (!ignore16kfilehash)
746     {
747       // Search for the first file that has the correct 16k hash
748       while (sourceiterator != sourcefiles.end() &&
749             (filesize != (*sourceiterator)->FileSize() ||
750               hash16k != (*sourceiterator)->Hash16k()))
751       {
752         ++sourceiterator;
753       }
754     }
755 
756     // Are there any files with the correct 16k hash?
757     if (sourceiterator != sourcefiles.end())
758     {
759       // Compute the MD5 hash of the whole file
760       if (filesize > 16384)
761       {
762         u64 progress = 0;
763         u64 offset = 16384;
764         while (offset < filesize)
765         {
766           if (noiselevel > CommandLine::nlQuiet)
767           {
768             // Update a progress indicator
769             u32 oldfraction = (u32)(1000 * (progress) / filesize);
770             u32 newfraction = (u32)(1000 * (progress=offset) / filesize);
771             if (oldfraction != newfraction)
772             {
773               cout << "Scanning: \"" << name << "\": " << newfraction/10 << '.' << newfraction%10 << "%\r" << flush;
774             }
775           }
776 
777           want = (size_t)min((u64)buffersize, filesize-offset);
778 
779           if (!diskfile->Read(offset, buffer, want))
780           {
781             delete [] buffer;
782             return false;
783           }
784 
785           contextfull.Update(buffer, want);
786 
787           offset += want;
788         }
789       }
790 
791       MD5Hash hashfull;
792       contextfull.Final(hashfull);
793 
794       // Search for the first file that has the correct full hash
795       while (sourceiterator != sourcefiles.end() &&
796             (filesize != (*sourceiterator)->FileSize() ||
797               (!ignore16kfilehash && hash16k != (*sourceiterator)->Hash16k()) ||
798               hashfull != (*sourceiterator)->HashFull()))
799       {
800         ++sourceiterator;
801       }
802 
803       // Are there any files with the correct full hash?
804       if (sourceiterator != sourcefiles.end())
805       {
806         // If a source file was originally specified, check to see if it is a match
807         if (sourcefile != 0 &&
808             sourcefile->FileSize() == filesize &&
809             (ignore16kfilehash || sourcefile->Hash16k() == hash16k) &&
810             sourcefile->HashFull() == hashfull)
811         {
812           match = sourcefile;
813         }
814         else
815         {
816           // Search for a file which matches and has not already been matched
817           while (sourceiterator != sourcefiles.end() &&
818                 (filesize != (*sourceiterator)->FileSize() ||
819                   (!ignore16kfilehash && hash16k != (*sourceiterator)->Hash16k()) ||
820                   hashfull != (*sourceiterator)->HashFull() ||
821                   (*sourceiterator)->GetCompleteFile() != 0))
822           {
823             ++sourceiterator;
824           }
825 
826           // Did we find a match
827           if (sourceiterator != sourcefiles.end())
828           {
829             match = *sourceiterator;
830           }
831         }
832       }
833     }
834 
835     delete [] buffer;
836   }
837 
838   // Did we find a match
839   if (match != 0)
840   {
841     match->SetCompleteFile(diskfile);
842 
843     if (noiselevel > CommandLine::nlSilent)
844     {
845       // Was the match the file we were originally looking for
846       if (match == sourcefile)
847       {
848         cout << "Target: \"" << name << "\" - found." << endl;
849       }
850       // Were we looking for a specific file
851       else if (sourcefile != 0)
852       {
853         string targetname;
854         DiskFile::SplitFilename(sourcefile->FileName(), path, targetname);
855 
856         cout << "Target: \""
857               << name
858               << "\" - is a match for \""
859               << targetname
860               << "\"."
861               << endl;
862       }
863     }
864     else
865     {
866       if (noiselevel > CommandLine::nlSilent)
867       {
868         string targetname;
869         DiskFile::SplitFilename(match->FileName(), path, targetname);
870 
871         cout << "File: \""
872               << name
873               << "\" - is a match for \""
874               << targetname
875               << "\"."
876               << endl;
877       }
878     }
879   }
880   else
881   {
882     if (noiselevel > CommandLine:: nlSilent)
883       cout << "File: \""
884             << name
885             << "\" - no data found."
886             << endl;
887   }
888 
889   return true;
890 }
891 
UpdateVerificationResults(void)892 void Par1Repairer::UpdateVerificationResults(void)
893 {
894   completefilecount = 0;
895   renamedfilecount = 0;
896   damagedfilecount = 0;
897   missingfilecount = 0;
898 
899   vector<Par1RepairerSourceFile*>::iterator sf = sourcefiles.begin();
900 
901   // Check the recoverable files
902   while (sf != sourcefiles.end())
903   {
904     Par1RepairerSourceFile *sourcefile = *sf;
905 
906     // Was a perfect match for the file found
907     if (sourcefile->GetCompleteFile() != 0)
908     {
909       // Is it the target file or a different one
910       if (sourcefile->GetCompleteFile() == sourcefile->GetTargetFile())
911       {
912         completefilecount++;
913       }
914       else
915       {
916         renamedfilecount++;
917       }
918     }
919     else
920     {
921       // Does the target file exist
922       if (sourcefile->GetTargetExists())
923       {
924         damagedfilecount++;
925       }
926       else
927       {
928         missingfilecount++;
929       }
930     }
931 
932     ++sf;
933   }
934 }
935 
CheckVerificationResults(void)936 bool Par1Repairer::CheckVerificationResults(void)
937 {
938   // Is repair needed
939   if (completefilecount < sourcefiles.size() ||
940       renamedfilecount > 0 ||
941       damagedfilecount > 0 ||
942       missingfilecount > 0)
943   {
944     if (noiselevel > CommandLine::nlSilent)
945       cout << "Repair is required." << endl;
946     if (noiselevel > CommandLine::nlQuiet)
947     {
948       if (renamedfilecount > 0) cout << renamedfilecount << " file(s) have the wrong name." << endl;
949       if (missingfilecount > 0) cout << missingfilecount << " file(s) are missing." << endl;
950       if (damagedfilecount > 0) cout << damagedfilecount << " file(s) exist but are damaged." << endl;
951       if (completefilecount > 0) cout << completefilecount << " file(s) are ok." << endl;
952     }
953 
954     // Is repair possible
955     if (recoveryblocks.size() >= damagedfilecount+missingfilecount)
956     {
957       if (noiselevel > CommandLine::nlSilent)
958         cout << "Repair is possible." << endl;
959 
960       if (noiselevel > CommandLine::nlQuiet)
961       {
962         if (recoveryblocks.size() > damagedfilecount+missingfilecount)
963           cout << "You have an excess of "
964                << (u32)recoveryblocks.size() - (damagedfilecount+missingfilecount)
965                << " recovery files." << endl;
966 
967         if (damagedfilecount+missingfilecount > 0)
968           cout << damagedfilecount+missingfilecount
969                << " recovery files will be used to repair." << endl;
970         else if (recoveryblocks.size())
971           cout << "None of the recovery files will be used for the repair." << endl;
972       }
973 
974       return true;
975     }
976     else
977     {
978       if (noiselevel > CommandLine::nlSilent)
979       {
980         cout << "Repair is not possible." << endl;
981         cout << "You need " << damagedfilecount+missingfilecount - recoveryblocks.size()
982              << " more recovery files to be able to repair." << endl;
983       }
984 
985       return false;
986     }
987   }
988   else
989   {
990     if (noiselevel > CommandLine::nlSilent)
991       cout << "All files are correct, repair is not required." << endl;
992 
993     return true;
994   }
995 
996   return true;
997 }
998 
RenameTargetFiles(void)999 bool Par1Repairer::RenameTargetFiles(void)
1000 {
1001   vector<Par1RepairerSourceFile*>::iterator sf = sourcefiles.begin();
1002 
1003   // Rename any damaged target files
1004   while (sf != sourcefiles.end())
1005   {
1006     Par1RepairerSourceFile *sourcefile = *sf;
1007 
1008     // If the target file exists but is not a complete version of the file
1009     if (sourcefile->GetTargetExists() &&
1010         sourcefile->GetTargetFile() != sourcefile->GetCompleteFile())
1011     {
1012       DiskFile *targetfile = sourcefile->GetTargetFile();
1013 
1014       // Rename it
1015       diskfilemap.Remove(targetfile);
1016       if (!targetfile->Rename())
1017         return false;
1018       bool success = diskfilemap.Insert(targetfile);
1019       assert(success);
1020 
1021       // We no longer have a target file
1022       sourcefile->SetTargetExists(false);
1023       sourcefile->SetTargetFile(0);
1024     }
1025 
1026     ++sf;
1027   }
1028 
1029   sf = sourcefiles.begin();
1030 
1031   // Rename any missnamed but complete versions of the files
1032   while (sf != sourcefiles.end())
1033   {
1034     Par1RepairerSourceFile *sourcefile = *sf;
1035 
1036     // If there is no targetfile and there is a complete version
1037     if (sourcefile->GetTargetFile() == 0 &&
1038         sourcefile->GetCompleteFile() != 0)
1039     {
1040       DiskFile *targetfile = sourcefile->GetCompleteFile();
1041 
1042       // Rename it
1043       diskfilemap.Remove(targetfile);
1044       if (!targetfile->Rename(sourcefile->FileName()))
1045         return false;
1046       bool success = diskfilemap.Insert(targetfile);
1047       assert(success);
1048 
1049       // This file is now the target file
1050       sourcefile->SetTargetExists(true);
1051       sourcefile->SetTargetFile(targetfile);
1052 
1053       // We have one more complete file
1054       completefilecount++;
1055     }
1056 
1057     ++sf;
1058   }
1059 
1060   return true;
1061 }
1062 
1063 // Work out which files are being repaired, create them, and allocate
1064 // target DataBlocks to them, and remember them for later verification.
CreateTargetFiles(void)1065 bool Par1Repairer::CreateTargetFiles(void)
1066 {
1067   vector<Par1RepairerSourceFile*>::iterator sf = sourcefiles.begin();
1068 
1069   // Create any missing target files
1070   while (sf != sourcefiles.end())
1071   {
1072     Par1RepairerSourceFile *sourcefile = *sf;
1073 
1074     // If the file does not exist
1075     if (!sourcefile->GetTargetExists())
1076     {
1077       DiskFile *targetfile = new DiskFile;
1078       string filename = sourcefile->FileName();
1079       u64 filesize = sourcefile->FileSize();
1080 
1081       // Create the target file
1082       if (!targetfile->Create(filename, filesize))
1083       {
1084         delete targetfile;
1085         return false;
1086       }
1087 
1088       // This file is now the target file
1089       sourcefile->SetTargetExists(true);
1090       sourcefile->SetTargetFile(targetfile);
1091 
1092       // Remember this file
1093       bool success = diskfilemap.Insert(targetfile);
1094       assert(success);
1095 
1096       sourcefile->SetTargetBlock(targetfile);
1097 
1098       // Add the file to the list of those that will need to be verified
1099       // once the repair has completed.
1100       verifylist.push_back(sourcefile);
1101     }
1102 
1103     ++sf;
1104   }
1105 
1106   return true;
1107 }
1108 
1109 // Work out which data blocks are available, which need to be recreated,
1110 // and compute the appropriate Reed Solomon matrix.
ComputeRSmatrix(void)1111 bool Par1Repairer::ComputeRSmatrix(void)
1112 {
1113   inputblocks.resize(sourcefiles.size()); // The DataBlocks that will read from disk
1114   outputblocks.resize(verifylist.size()); // Those DataBlocks that will re recalculated
1115 
1116   vector<DataBlock*>::iterator inputblock  = inputblocks.begin();
1117   vector<DataBlock*>::iterator outputblock = outputblocks.begin();
1118 
1119   // Build an array listing which source data blocks are present and which are missing
1120   vector<bool> present;
1121   present.resize(sourcefiles.size());
1122 
1123   vector<Par1RepairerSourceFile*>::iterator sourceiterator = sourcefiles.begin();
1124   vector<bool>::iterator              pres = present.begin();
1125 
1126   // Iterate through all source files
1127   while (sourceiterator != sourcefiles.end())
1128   {
1129     Par1RepairerSourceFile *sourcefile = *sourceiterator;
1130     DataBlock *sourceblock = sourcefile->SourceBlock();
1131     DataBlock *targetblock = sourcefile->TargetBlock();
1132 
1133     // Was this block found
1134     if (sourceblock->IsSet())
1135     {
1136       // Open the file the block was found in.
1137       if (!sourceblock->Open())
1138       {
1139         return false;
1140       }
1141 
1142       // Record that the block was found
1143       *pres = true;
1144 
1145       // Add the block to the list of those which will be read
1146       // as input (and which might also need to be copied).
1147       *inputblock = sourceblock;
1148       ++inputblock;
1149     }
1150     else
1151     {
1152       // Record that the block was missing
1153       *pres = false;
1154 
1155       // Add the block to the list of those to be written
1156       *outputblock = targetblock;
1157       ++outputblock;
1158     }
1159 
1160     ++sourceiterator;
1161     ++pres;
1162   }
1163 
1164   // Set the number of source blocks and which of them are present
1165   if (!rs.SetInput(present))
1166   {
1167     return false;
1168   }
1169 
1170   // Start iterating through the available recovery packets
1171   map<u32, DataBlock*>::iterator recoveryiterator = recoveryblocks.begin();
1172 
1173   // Continue to fill the remaining list of data blocks to be read
1174   while (inputblock != inputblocks.end())
1175   {
1176     // Get the next available recovery block
1177     u32        exponent      = recoveryiterator->first;
1178     DataBlock *recoveryblock = recoveryiterator->second;
1179 
1180     // Make sure the file is open
1181     if (!recoveryblock->Open())
1182     {
1183       return false;
1184     }
1185     // Add the recovery block to the list of blocks that will be read
1186     *inputblock = recoveryblock;
1187 
1188     // Record that the corresponding exponent value is the next one
1189     // to use in the RS matrix
1190     if (!rs.SetOutput(true, (u16)exponent))
1191     {
1192       return false;
1193     }
1194 
1195     ++inputblock;
1196     ++recoveryiterator;
1197   }
1198 
1199   // If we need to, compute and solve the RS matrix
1200   if (verifylist.size() == 0)
1201   {
1202     return true;
1203   }
1204 
1205   bool success = rs.Compute(noiselevel);
1206   return success;
1207 }
1208 
1209 // Allocate memory buffers for reading and writing data to disk.
AllocateBuffers(size_t memorylimit)1210 bool Par1Repairer::AllocateBuffers(size_t memorylimit)
1211 {
1212   // Would single pass processing use too much memory
1213   if (blocksize * verifylist.size() > memorylimit)
1214   {
1215     // Pick a size that is small enough
1216     chunksize = ~3 & (memorylimit / verifylist.size());
1217   }
1218   else
1219   {
1220     chunksize = (size_t)blocksize;
1221   }
1222 
1223   // Allocate the two buffers
1224   inputbuffersize = (size_t)chunksize;
1225   inputbuffer = new u8[inputbuffersize];
1226   outputbufferalignment = (inputbuffersize + sizeof(u32)-1) & ~(sizeof(u32)-1);
1227   outputbuffersize = outputbufferalignment * verifylist.size();
1228   outputbuffer = new u8[outputbuffersize];
1229 
1230   if (inputbuffer == NULL || outputbuffer == NULL)
1231   {
1232     cerr << "Could not allocate buffer memory." << endl;
1233     return false;
1234   }
1235 
1236   return true;
1237 }
1238 
1239 // Read source data, process it through the RS matrix and write it to disk.
ProcessData(u64 blockoffset,size_t blocklength)1240 bool Par1Repairer::ProcessData(u64 blockoffset, size_t blocklength)
1241 {
1242   u64 totalwritten = 0;
1243   // Clear the output buffer
1244   memset(outputbuffer, 0, outputbuffersize);
1245 
1246   vector<DataBlock*>::iterator inputblock = inputblocks.begin();
1247   u32                          inputindex = 0;
1248 
1249   // Are there any blocks which need to be reconstructed
1250   if (verifylist.size() > 0)
1251   {
1252     // For each input block
1253     while (inputblock != inputblocks.end())
1254     {
1255       // Read data from the current input block
1256       if (!(*inputblock)->ReadData(blockoffset, blocklength, inputbuffer))
1257         return false;
1258 
1259       // For each output block
1260       for (u32 outputindex=0; outputindex<verifylist.size(); outputindex++)
1261       {
1262         // Select the appropriate part of the output buffer
1263         void *outbuf = &outputbuffer[outputbufferalignment * outputindex];
1264 
1265         // Process the data
1266         rs.Process(blocklength, inputindex, inputbuffer, outputindex, outbuf);
1267 
1268         if (noiselevel > CommandLine::nlQuiet)
1269         {
1270           // Update a progress indicator
1271           u32 oldfraction = (u32)(1000 * progress / totaldata);
1272           progress += blocklength;
1273           u32 newfraction = (u32)(1000 * progress / totaldata);
1274 
1275           if (oldfraction != newfraction)
1276           {
1277             cout << "Repairing: " << newfraction/10 << '.' << newfraction%10 << "%\r" << flush;
1278           }
1279         }
1280       }
1281 
1282       ++inputblock;
1283       ++inputindex;
1284     }
1285   }
1286 
1287   if (noiselevel > CommandLine::nlQuiet)
1288     cout << "Writing recovered data\r";
1289 
1290   // For each output block that has been recomputed
1291   vector<DataBlock*>::iterator outputblock = outputblocks.begin();
1292   for (u32 outputindex=0; outputindex<verifylist.size();outputindex++)
1293   {
1294     // Select the appropriate part of the output buffer
1295     char *outbuf = &((char*)outputbuffer)[outputbufferalignment * outputindex];
1296 
1297     // Write the data to the target file
1298     size_t wrote;
1299     if (!(*outputblock)->WriteData(blockoffset, blocklength, outbuf, wrote))
1300       return false;
1301     totalwritten += wrote;
1302 
1303     ++outputblock;
1304   }
1305 
1306   if (noiselevel > CommandLine::nlQuiet)
1307     cout << "Wrote " << totalwritten << " bytes to disk" << endl;
1308 
1309   return true;
1310 }
1311 
1312 // Verify that all of the reconstructed target files are now correct
VerifyTargetFiles(void)1313 bool Par1Repairer::VerifyTargetFiles(void)
1314 {
1315   bool finalresult = true;
1316 
1317   // Verify the target files in alphabetical order
1318 //  sort(verifylist.begin(), verifylist.end(), SortSourceFilesByFileName);
1319 
1320   // Iterate through each file in the verification list
1321   for (list<Par1RepairerSourceFile*>::iterator sf = verifylist.begin();
1322        sf != verifylist.end();
1323        ++sf)
1324   {
1325     Par1RepairerSourceFile *sourcefile = *sf;
1326     DiskFile *targetfile = sourcefile->GetTargetFile();
1327 
1328     // Close the file
1329     if (targetfile->IsOpen())
1330       targetfile->Close();
1331 
1332     // Say we don't have a complete version of the file
1333     sourcefile->SetCompleteFile(0);
1334 
1335     // Re-open the target file
1336     if (!targetfile->Open())
1337     {
1338       finalresult = false;
1339       continue;
1340     }
1341 
1342     // Verify the file again
1343     if (!VerifyDataFile(targetfile, sourcefile))
1344       finalresult = false;
1345 
1346     // Close the file again
1347     targetfile->Close();
1348 
1349     // Find out how much data we have found
1350     UpdateVerificationResults();
1351   }
1352 
1353   return finalresult;
1354 }
1355 
1356 // Delete all of the partly reconstructed files
DeleteIncompleteTargetFiles(void)1357 bool Par1Repairer::DeleteIncompleteTargetFiles(void)
1358 {
1359   list<Par1RepairerSourceFile*>::iterator sf = verifylist.begin();
1360 
1361   // Iterate through each file in the verification list
1362   while (sf != verifylist.end())
1363   {
1364     Par1RepairerSourceFile *sourcefile = *sf;
1365     if (sourcefile->GetTargetExists())
1366     {
1367       DiskFile *targetfile = sourcefile->GetTargetFile();
1368 
1369       // Close and delete the file
1370       if (targetfile->IsOpen())
1371         targetfile->Close();
1372       targetfile->Delete();
1373 
1374       // Forget the file
1375       diskfilemap.Remove(targetfile);
1376 
1377       delete targetfile;
1378 
1379       // There is no target file
1380       sourcefile->SetTargetExists(false);
1381       sourcefile->SetTargetFile(0);
1382     }
1383 
1384     ++sf;
1385   }
1386 
1387   return true;
1388 }
1389