1 //  This file is part of par2cmdline (a PAR 2.0 compatible file verification and
2 //  repair tool). See http://parchive.sourceforge.net for details of PAR 2.0.
3 //
4 //  Copyright (c) 2003 Peter Brian Clements
5 //  Copyright (c) 2019 Michael D. Nahas
6 //
7 //  par2cmdline is free software; you can redistribute it and/or modify
8 //  it under the terms of the GNU General Public License as published by
9 //  the Free Software Foundation; either version 2 of the License, or
10 //  (at your option) any later version.
11 //
12 //  par2cmdline is distributed in the hope that it will be useful,
13 //  but WITHOUT ANY WARRANTY; without even the implied warranty of
14 //  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 //  GNU General Public License for more details.
16 //
17 //  You should have received a copy of the GNU General Public License
18 //  along with this program; if not, write to the Free Software
19 //  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20 
21 #include "libpar2internal.h"
22 
23 #ifdef _MSC_VER
24 #ifdef _DEBUG
25 #undef THIS_FILE
26 static char THIS_FILE[]=__FILE__;
27 #define new DEBUG_NEW
28 #endif
29 #endif
30 
31 
32 // static variable
33 #ifdef _OPENMP
34 u32 Par2Repairer::filethreads = _FILE_THREADS;
35 #endif
36 
37 
Par2Repairer(std::ostream & sout,std::ostream & serr,const NoiseLevel noiselevel)38 Par2Repairer::Par2Repairer(std::ostream &sout, std::ostream &serr, const NoiseLevel noiselevel)
39 : sout(sout)
40 , serr(serr)
41 , noiselevel(noiselevel)
42 , searchpath()
43 , basepath()
44 , setid()
45 , recoverypacketmap()
46 , diskFileMap()
47 , sourcefilemap()
48 , sourcefiles()
49 , verifylist()
50 , backuplist()
51 , par2list()
52 , sourceblocks()
53 , targetblocks()
54 , windowmask(0)
55 , blockverifiable(false)
56 , verificationhashtable()
57 , unverifiablesourcefiles()
58 , inputblocks()
59 , copyblocks()
60 , outputblocks()
61 , rs()
62 {
63   skipdata = false;
64   skipleaway = 0;
65 
66   firstpacket = true;
67   mainpacket = 0;
68   creatorpacket = 0;
69 
70   blocksize = 0;
71   chunksize = 0;
72 
73   sourceblockcount = 0;
74   availableblockcount = 0;
75   missingblockcount = 0;
76 
77   memset(windowtable, 0, sizeof(windowtable));
78 
79   blocksallocated = false;
80 
81   completefilecount = 0;
82   renamedfilecount = 0;
83   damagedfilecount = 0;
84   missingfilecount = 0;
85 
86   inputbuffer = 0;
87   outputbuffer = 0;
88 
89   progress = 0;
90   totaldata = 0;
91 
92 #ifdef _OPENMP
93   mttotalsize = 0;
94   mttotalextrasize = 0;
95   mttotalprogress = 0;
96   mtprocessingextrafiles = false;
97 #endif
98 }
99 
~Par2Repairer(void)100 Par2Repairer::~Par2Repairer(void)
101 {
102   delete [] (u8*)inputbuffer;
103   delete [] (u8*)outputbuffer;
104 
105   map<u32,RecoveryPacket*>::iterator rp = recoverypacketmap.begin();
106   while (rp != recoverypacketmap.end())
107   {
108     delete (*rp).second;
109 
110     ++rp;
111   }
112 
113   map<MD5Hash,Par2RepairerSourceFile*>::iterator sf = sourcefilemap.begin();
114   while (sf != sourcefilemap.end())
115   {
116     Par2RepairerSourceFile *sourcefile = (*sf).second;
117     delete sourcefile;
118 
119     ++sf;
120   }
121 
122   delete mainpacket;
123   delete creatorpacket;
124 }
125 
Process(const size_t memorylimit,const string & _basepath,const u32 nthreads,const u32 _filethreads,string parfilename,const vector<string> & _extrafiles,const bool dorepair,const bool purgefiles,const bool _skipdata,const u64 _skipleaway)126 Result Par2Repairer::Process(
127 			     const size_t memorylimit,
128 			     const string &_basepath,
129 #ifdef _OPENMP
130 			     const u32 nthreads,
131 			     const u32 _filethreads,
132 #endif
133 			     string parfilename,
134 			     const vector<string> &_extrafiles,
135 			     const bool dorepair,   // derived from operation
136 			     const bool purgefiles,
137 			     const bool _skipdata,
138 			     const u64 _skipleaway
139 			     )
140 {
141 #ifdef _OPENMP
142   filethreads = _filethreads;
143 #endif
144 
145   // Should we skip data whilst scanning files
146   skipdata = _skipdata;
147 
148   // How much leaway should we allow when scanning files
149   skipleaway = _skipleaway;
150 
151   // Get filenames from the command line
152   basepath = _basepath;
153   std::vector<string> extrafiles = _extrafiles;
154 
155 #ifdef _OPENMP
156   // Set the number of threads
157   if (nthreads != 0)
158     omp_set_num_threads(nthreads);
159 #endif
160 
161   // Determine the searchpath from the location of the main PAR2 file
162   string name;
163   DiskFile::SplitFilename(parfilename, searchpath, name);
164 
165   par2list.push_back(parfilename);
166 
167   // Load packets from the main PAR2 file
168   if (!LoadPacketsFromFile(searchpath + name))
169     return eLogicError;
170 
171   // Load packets from other PAR2 files with names based on the original PAR2 file
172   if (!LoadPacketsFromOtherFiles(parfilename))
173     return eLogicError;
174 
175   // Load packets from any other PAR2 files whose names are given on the command line
176   if (!LoadPacketsFromExtraFiles(extrafiles))
177     return eLogicError;
178 
179   if (noiselevel > nlQuiet)
180     sout << endl;
181 
182   // Check that the packets are consistent and discard any that are not
183   if (!CheckPacketConsistency())
184     return eInsufficientCriticalData;
185 
186   // Use the information in the main packet to get the source files
187   // into the correct order and determine their filenames
188   if (!CreateSourceFileList())
189     return eLogicError;
190 
191   // Determine the total number of DataBlocks for the recoverable source files
192   // The allocate the DataBlocks and assign them to each source file
193   if (!AllocateSourceBlocks())
194     return eLogicError;
195 
196   // Create a verification hash table for all files for which we have not
197   // found a complete version of the file and for which we have
198   // a verification packet
199   if (!PrepareVerificationHashTable())
200     return eLogicError;
201 
202   // Compute the table for the sliding CRC computation
203   if (!ComputeWindowTable())
204     return eLogicError;
205 
206   // Attempt to verify all of the source files
207   if (!VerifySourceFiles(basepath, extrafiles))
208     return eFileIOError;
209 
210   if (completefilecount < mainpacket->RecoverableFileCount())
211   {
212     // Scan any extra files specified on the command line
213     if (!VerifyExtraFiles(extrafiles, basepath))
214       return eLogicError;
215   }
216 
217   // Find out how much data we have found
218   UpdateVerificationResults();
219 
220   if (noiselevel > nlSilent)
221     sout << endl;
222 
223   // Check the verification results and report the results
224   if (!CheckVerificationResults())
225     return eRepairNotPossible;
226 
227   // Are any of the files incomplete
228   if (completefilecount < mainpacket->RecoverableFileCount())
229   {
230     // Do we want to carry out a repair
231     if (dorepair)
232     {
233       if (noiselevel > nlSilent)
234         sout << endl;
235 
236       // Rename any damaged or missnamed target files.
237       if (!RenameTargetFiles())
238         return eFileIOError;
239 
240       // Are we still missing any files
241       if (completefilecount < mainpacket->RecoverableFileCount())
242       {
243         // Work out which files are being repaired, create them, and allocate
244         // target DataBlocks to them, and remember them for later verification.
245         if (!CreateTargetFiles())
246           return eFileIOError;
247 
248         // Work out which data blocks are available, which need to be copied
249         // directly to the output, and which need to be recreated, and compute
250         // the appropriate Reed Solomon matrix.
251         if (!ComputeRSmatrix())
252         {
253           // Delete all of the partly reconstructed files
254           DeleteIncompleteTargetFiles();
255           return eFileIOError;
256         }
257 
258         if (noiselevel > nlSilent)
259           sout << endl;
260 
261         // Allocate memory buffers for reading and writing data to disk.
262         if (!AllocateBuffers(memorylimit))
263         {
264           // Delete all of the partly reconstructed files
265           DeleteIncompleteTargetFiles();
266           return eMemoryError;
267         }
268 
269         // Set the total amount of data to be processed.
270         progress = 0;
271         totaldata = blocksize * sourceblockcount * (missingblockcount > 0 ? missingblockcount : 1);
272 
273         // Start at an offset of 0 within a block.
274         u64 blockoffset = 0;
275         while (blockoffset < blocksize) // Continue until the end of the block.
276         {
277           // Work out how much data to process this time.
278           size_t blocklength = (size_t)min((u64)chunksize, blocksize-blockoffset);
279 
280           // Read source data, process it through the RS matrix and write it to disk.
281           if (!ProcessData(blockoffset, blocklength))
282           {
283             // Delete all of the partly reconstructed files
284             DeleteIncompleteTargetFiles();
285             return eFileIOError;
286           }
287 
288           // Advance to the need offset within each block
289           blockoffset += blocklength;
290         }
291 
292         if (noiselevel > nlSilent)
293           sout << endl << "Verifying repaired files:" << endl << endl;
294 
295         // Verify that all of the reconstructed target files are now correct
296         if (!VerifyTargetFiles(basepath))
297         {
298           // Delete all of the partly reconstructed files
299           DeleteIncompleteTargetFiles();
300           return eFileIOError;
301         }
302       }
303 
304       // Are all of the target files now complete?
305       if (completefilecount<mainpacket->RecoverableFileCount())
306       {
307         serr << "Repair Failed." << endl;
308         return eRepairFailed;
309       }
310       else
311       {
312         if (noiselevel > nlSilent)
313           sout << endl << "Repair complete." << endl;
314       }
315     }
316     else
317     {
318       return eRepairPossible;
319     }
320   }
321 
322   if (purgefiles == true)
323   {
324     RemoveBackupFiles();
325     RemoveParFiles();
326   }
327 
328   return eSuccess;
329 }
330 
331 // Load the packets from the specified file
LoadPacketsFromFile(string filename)332 bool Par2Repairer::LoadPacketsFromFile(string filename)
333 {
334   // Skip the file if it has already been processed
335   if (diskFileMap.Find(filename) != 0)
336   {
337     return true;
338   }
339 
340   DiskFile *diskfile = new DiskFile(sout, serr);
341 
342   // Open the file
343   if (!diskfile->Open(filename))
344   {
345     // If we could not open the file, ignore the error and
346     // proceed to the next file
347     delete diskfile;
348     return true;
349   }
350 
351   if (noiselevel > nlSilent)
352   {
353     string path;
354     string name;
355     DiskFile::SplitFilename(filename, path, name);
356     sout << "Loading \"" << name << "\"." << endl;
357   }
358 
359   // How many useable packets have we found
360   u32 packets = 0;
361 
362   // How many recovery packets were there
363   u32 recoverypackets = 0;
364 
365   // How big is the file
366   u64 filesize = diskfile->FileSize();
367   if (filesize > 0)
368   {
369     // Allocate a buffer to read data into
370     // The buffer should be large enough to hold a whole
371     // critical packet (i.e. file verification, file description, main,
372     // and creator), but not necessarily a whole recovery packet.
373     size_t buffersize = (size_t)min((u64)1048576, filesize);
374     u8 *buffer = new u8[buffersize];
375 
376     // Progress indicator
377     u64 progress = 0;
378 
379     // Start at the beginning of the file
380     u64 offset = 0;
381 
382     // Continue as long as there is at least enough for the packet header
383     while (offset + sizeof(PACKET_HEADER) <= filesize)
384     {
385       if (noiselevel > nlQuiet)
386       {
387         // Update a progress indicator
388         u32 oldfraction = (u32)(1000 * progress / filesize);
389         u32 newfraction = (u32)(1000 * offset / filesize);
390         if (oldfraction != newfraction)
391         {
392           sout << "Loading: " << newfraction/10 << '.' << newfraction%10 << "%\r" << flush;
393           progress = offset;
394         }
395       }
396 
397       // Attempt to read the next packet header
398       PACKET_HEADER header;
399       if (!diskfile->Read(offset, &header, sizeof(header)))
400         break;
401 
402       // Does this look like it might be a packet
403       if (packet_magic != header.magic)
404       {
405         offset++;
406 
407         // Is there still enough for at least a whole packet header
408         while (offset + sizeof(PACKET_HEADER) <= filesize)
409         {
410           // How much can we read into the buffer
411           size_t want = (size_t)min((u64)buffersize, filesize-offset);
412 
413           // Fill the buffer
414           if (!diskfile->Read(offset, buffer, want))
415           {
416             offset = filesize;
417             break;
418           }
419 
420           // Scan the buffer for the magic value
421           u8 *current = buffer;
422           u8 *limit = &buffer[want-sizeof(PACKET_HEADER)];
423           while (current <= limit && packet_magic != ((PACKET_HEADER*)current)->magic)
424           {
425             current++;
426           }
427 
428           // What file offset did we reach
429           offset += current-buffer;
430 
431           // Did we find the magic
432           if (current <= limit)
433           {
434             memcpy(&header, current, sizeof(header));
435             break;
436           }
437         }
438 
439         // Did we reach the end of the file
440         if (offset + sizeof(PACKET_HEADER) > filesize)
441         {
442           break;
443         }
444       }
445 
446       // We have found the magic
447 
448       // Check the packet length
449       if (sizeof(PACKET_HEADER) > header.length || // packet length is too small
450           0 != (header.length & 3) ||              // packet length is not a multiple of 4
451           filesize < offset + header.length)       // packet would extend beyond the end of the file
452       {
453         offset++;
454         continue;
455       }
456 
457       // Compute the MD5 Hash of the packet
458       MD5Context context;
459       context.Update(&header.setid, sizeof(header)-offsetof(PACKET_HEADER, setid));
460 
461       // How much more do I need to read to get the whole packet
462       u64 current = offset+sizeof(PACKET_HEADER);
463       u64 limit = offset+header.length;
464       while (current < limit)
465       {
466         size_t want = (size_t)min((u64)buffersize, limit-current);
467 
468         if (!diskfile->Read(current, buffer, want))
469           break;
470 
471         context.Update(buffer, want);
472 
473         current += want;
474       }
475 
476       // Did the whole packet get processed
477       if (current<limit)
478       {
479         offset++;
480         continue;
481       }
482 
483       // Check the calculated packet hash against the value in the header
484       MD5Hash hash;
485       context.Final(hash);
486       if (hash != header.hash)
487       {
488         offset++;
489         continue;
490       }
491 
492       // If this is the first packet that we have found then record the setid
493       if (firstpacket)
494       {
495         setid = header.setid;
496         firstpacket = false;
497       }
498 
499       // Is the packet from the correct set
500       if (setid == header.setid)
501       {
502         // Is it a packet type that we are interested in
503         if (recoveryblockpacket_type == header.type)
504         {
505           if (LoadRecoveryPacket(diskfile, offset, header))
506           {
507             recoverypackets++;
508             packets++;
509           }
510         }
511         else if (fileverificationpacket_type == header.type)
512         {
513           if (LoadVerificationPacket(diskfile, offset, header))
514           {
515             packets++;
516           }
517         }
518         else if (filedescriptionpacket_type == header.type)
519         {
520           if (LoadDescriptionPacket(diskfile, offset, header))
521           {
522             packets++;
523           }
524         }
525         else if (mainpacket_type == header.type)
526         {
527           if (LoadMainPacket(diskfile, offset, header))
528           {
529             packets++;
530           }
531         }
532         else if (creatorpacket_type == header.type)
533         {
534           if (LoadCreatorPacket(diskfile, offset, header))
535           {
536             packets++;
537           }
538         }
539       }
540 
541       // Advance to the next packet
542       offset += header.length;
543     }
544 
545     delete [] buffer;
546   }
547 
548   // We have finished with the file for now
549   diskfile->Close();
550 
551   // Did we actually find any interesting packets
552   if (packets > 0)
553   {
554     if (noiselevel > nlQuiet)
555     {
556       sout << "Loaded " << packets << " new packets";
557       if (recoverypackets > 0) sout << " including " << recoverypackets << " recovery blocks";
558       sout << endl;
559     }
560 
561     // Remember that the file was processed
562     bool success = diskFileMap.Insert(diskfile);
563     assert(success);
564   }
565   else
566   {
567     if (noiselevel > nlQuiet)
568       sout << "No new packets found" << endl;
569     delete diskfile;
570   }
571 
572   return true;
573 }
574 
575 // Finish loading a recovery packet
LoadRecoveryPacket(DiskFile * diskfile,u64 offset,PACKET_HEADER & header)576 bool Par2Repairer::LoadRecoveryPacket(DiskFile *diskfile, u64 offset, PACKET_HEADER &header)
577 {
578   RecoveryPacket *packet = new RecoveryPacket;
579 
580   // Load the packet from disk
581   if (!packet->Load(diskfile, offset, header))
582   {
583     delete packet;
584     return false;
585   }
586 
587   // What is the exponent value of this recovery packet
588   u32 exponent = packet->Exponent();
589 
590   // Try to insert the new packet into the recovery packet map
591   pair<map<u32,RecoveryPacket*>::const_iterator, bool> location = recoverypacketmap.insert(pair<u32,RecoveryPacket*>(exponent, packet));
592 
593   // Did the insert fail
594   if (!location.second)
595   {
596     // The packet must be a duplicate of one we already have
597     delete packet;
598     return false;
599   }
600 
601   return true;
602 }
603 
604 // Finish loading a file description packet
LoadDescriptionPacket(DiskFile * diskfile,u64 offset,PACKET_HEADER & header)605 bool Par2Repairer::LoadDescriptionPacket(DiskFile *diskfile, u64 offset, PACKET_HEADER &header)
606 {
607   DescriptionPacket *packet = new DescriptionPacket;
608 
609   // Load the packet from disk
610   if (!packet->Load(diskfile, offset, header))
611   {
612     delete packet;
613     return false;
614   }
615 
616   // What is the fileid
617   const MD5Hash &fileid = packet->FileId();
618 
619   // Look up the fileid in the source file map for an existing source file entry
620   map<MD5Hash, Par2RepairerSourceFile*>::iterator sfmi = sourcefilemap.find(fileid);
621   Par2RepairerSourceFile *sourcefile = (sfmi == sourcefilemap.end()) ? 0 :sfmi->second;
622 
623   // Was there an existing source file
624   if (sourcefile)
625   {
626     // Does the source file already have a description packet
627     if (sourcefile->GetDescriptionPacket())
628     {
629       // Yes. We don't need another copy
630       delete packet;
631       return false;
632     }
633     else
634     {
635       // No. Store the packet in the source file
636       sourcefile->SetDescriptionPacket(packet);
637       return true;
638     }
639   }
640   else
641   {
642     // Create a new source file for the packet
643     sourcefile = new Par2RepairerSourceFile(packet, NULL);
644 
645     // Record the source file in the source file map
646     sourcefilemap.insert(pair<MD5Hash, Par2RepairerSourceFile*>(fileid, sourcefile));
647 
648     return true;
649   }
650 }
651 
652 // Finish loading a file verification packet
LoadVerificationPacket(DiskFile * diskfile,u64 offset,PACKET_HEADER & header)653 bool Par2Repairer::LoadVerificationPacket(DiskFile *diskfile, u64 offset, PACKET_HEADER &header)
654 {
655   VerificationPacket *packet = new VerificationPacket;
656 
657   // Load the packet from disk
658   if (!packet->Load(diskfile, offset, header))
659   {
660     delete packet;
661     return false;
662   }
663 
664   // What is the fileid
665   const MD5Hash &fileid = packet->FileId();
666 
667   // Look up the fileid in the source file map for an existing source file entry
668   map<MD5Hash, Par2RepairerSourceFile*>::iterator sfmi = sourcefilemap.find(fileid);
669   Par2RepairerSourceFile *sourcefile = (sfmi == sourcefilemap.end()) ? 0 :sfmi->second;
670 
671   // Was there an existing source file
672   if (sourcefile)
673   {
674     // Does the source file already have a verification packet
675     if (sourcefile->GetVerificationPacket())
676     {
677       // Yes. We don't need another copy.
678       delete packet;
679       return false;
680     }
681     else
682     {
683       // No. Store the packet in the source file
684       sourcefile->SetVerificationPacket(packet);
685 
686       return true;
687     }
688   }
689   else
690   {
691     // Create a new source file for the packet
692     sourcefile = new Par2RepairerSourceFile(NULL, packet);
693 
694     // Record the source file in the source file map
695     sourcefilemap.insert(pair<MD5Hash, Par2RepairerSourceFile*>(fileid, sourcefile));
696 
697     return true;
698   }
699 }
700 
701 // Finish loading the main packet
LoadMainPacket(DiskFile * diskfile,u64 offset,PACKET_HEADER & header)702 bool Par2Repairer::LoadMainPacket(DiskFile *diskfile, u64 offset, PACKET_HEADER &header)
703 {
704   // Do we already have a main packet
705   if (0 != mainpacket)
706     return false;
707 
708   MainPacket *packet = new MainPacket;
709 
710   // Load the packet from disk;
711   if (!packet->Load(diskfile, offset, header))
712   {
713     delete packet;
714     return false;
715   }
716 
717   mainpacket = packet;
718 
719   return true;
720 }
721 
722 // Finish loading the creator packet
LoadCreatorPacket(DiskFile * diskfile,u64 offset,PACKET_HEADER & header)723 bool Par2Repairer::LoadCreatorPacket(DiskFile *diskfile, u64 offset, PACKET_HEADER &header)
724 {
725   // Do we already have a creator packet
726   if (0 != creatorpacket)
727     return false;
728 
729   CreatorPacket *packet = new CreatorPacket;
730 
731   // Load the packet from disk;
732   if (!packet->Load(diskfile, offset, header))
733   {
734     delete packet;
735     return false;
736   }
737 
738   creatorpacket = packet;
739 
740   return true;
741 }
742 
743 // Load packets from other PAR2 files with names based on the original PAR2 file
LoadPacketsFromOtherFiles(string filename)744 bool Par2Repairer::LoadPacketsFromOtherFiles(string filename)
745 {
746   // Split the original PAR2 filename into path and name parts
747   string path;
748   string name;
749   DiskFile::SplitFilename(filename, path, name);
750 
751   string::size_type where;
752 
753   // Trim ".par2" off of the end original name
754 
755   // Look for the last "." in the filename
756   while (string::npos != (where = name.find_last_of('.')))
757   {
758     // Trim what follows the last .
759     string tail = name.substr(where+1);
760     name = name.substr(0,where);
761 
762     // Was what followed the last "." "par2"
763     if (0 == stricmp(tail.c_str(), "par2"))
764       break;
765   }
766 
767   // If what is left ends in ".volNNN-NNN" or ".volNNN+NNN" strip that as well
768 
769   // Is there another "."
770   if (string::npos != (where = name.find_last_of('.')))
771   {
772     // What follows the "."
773     string tail = name.substr(where+1);
774 
775     // Scan what follows the last "." to see of it matches vol123-456 or vol123+456
776     int n = 0;
777     string::const_iterator p;
778     for (p=tail.begin(); p!=tail.end(); ++p)
779     {
780       char ch = *p;
781 
782       if (0 == n)
783       {
784         if (tolower(ch) == 'v') { n++; } else { break; }
785       }
786       else if (1 == n)
787       {
788         if (tolower(ch) == 'o') { n++; } else { break; }
789       }
790       else if (2 == n)
791       {
792         if (tolower(ch) == 'l') { n++; } else { break; }
793       }
794       else if (3 == n)
795       {
796         if (isdigit(ch)) {} else if (ch == '-' || ch == '+') { n++; } else { break; }
797       }
798       else if (4 == n)
799       {
800         if (isdigit(ch)) {} else { break; }
801       }
802     }
803 
804     // If we matched then retain only what precedes the "."
805     if (p == tail.end())
806     {
807       name = name.substr(0,where);
808     }
809   }
810 
811   // Find files called "*.par2" or "name.*.par2"
812 
813   {
814     string wildcard = name.empty() ? "*.par2" : name + ".*.par2";
815     std::unique_ptr< list<string> > files(
816 					DiskFile::FindFiles(path, wildcard, false)
817 					);
818     par2list.merge(*files);
819 
820     string wildcardu = name.empty() ? "*.PAR2" : name + ".*.PAR2";
821     std::unique_ptr< list<string> > filesu(
822 					 DiskFile::FindFiles(path, wildcardu, false)
823 					 );
824     par2list.merge(*filesu);
825 
826     // Load packets from each file that was found
827     for (list<string>::const_iterator s=par2list.begin(); s!=par2list.end(); ++s)
828     {
829       LoadPacketsFromFile(*s);
830     }
831 
832     // delete files;  Taken care of by unique_ptr<>
833     // delete filesu;
834   }
835 
836   return true;
837 }
838 
839 // Load packets from any other PAR2 files whose names are given on the command line
LoadPacketsFromExtraFiles(const vector<string> & extrafiles)840 bool Par2Repairer::LoadPacketsFromExtraFiles(const vector<string> &extrafiles)
841 {
842   for (vector<string>::const_iterator i=extrafiles.begin(); i!=extrafiles.end(); i++)
843   {
844     string filename = *i;
845 
846     // If the filename contains ".par2" anywhere
847     if (string::npos != filename.find(".par2") ||
848         string::npos != filename.find(".PAR2"))
849     {
850       LoadPacketsFromFile(filename);
851     }
852   }
853 
854   return true;
855 }
856 
857 // Check that the packets are consistent and discard any that are not
CheckPacketConsistency(void)858 bool Par2Repairer::CheckPacketConsistency(void)
859 {
860   // Do we have a main packet
861   if (0 == mainpacket)
862   {
863     // If we don't have a main packet, then there is nothing more that we can do.
864     // We cannot verify or repair any files.
865 
866     serr << "Main packet not found." << endl;
867     return false;
868   }
869 
870   // Remember the block size from the main packet
871   blocksize = mainpacket->BlockSize();
872 
873   // Check that the recovery blocks have the correct amount of data
874   // and discard any that don't
875   {
876     map<u32,RecoveryPacket*>::iterator rp = recoverypacketmap.begin();
877     while (rp != recoverypacketmap.end())
878     {
879       if (rp->second->BlockSize() == blocksize)
880       {
881         ++rp;
882       }
883       else
884       {
885         serr << "Incorrect sized recovery block for exponent " << rp->second->Exponent() << " discarded" << endl;
886 
887         delete rp->second;
888         map<u32,RecoveryPacket*>::iterator x = rp++;
889         recoverypacketmap.erase(x);
890       }
891     }
892   }
893 
894   // Check for source files that have no description packet or where the
895   // verification packet has the wrong number of entries and discard them.
896   {
897     map<MD5Hash, Par2RepairerSourceFile*>::iterator sf = sourcefilemap.begin();
898     while (sf != sourcefilemap.end())
899     {
900       // Do we have a description packet
901       DescriptionPacket *descriptionpacket = sf->second->GetDescriptionPacket();
902       if (descriptionpacket == 0)
903       {
904         // No description packet
905 
906         // Discard the source file
907         delete sf->second;
908         map<MD5Hash, Par2RepairerSourceFile*>::iterator x = sf++;
909         sourcefilemap.erase(x);
910 
911         continue;
912       }
913 
914       // Compute and store the block count from the filesize and blocksize
915       sf->second->SetBlockCount(blocksize);
916 
917       // Do we have a verification packet
918       VerificationPacket *verificationpacket = sf->second->GetVerificationPacket();
919       if (verificationpacket == 0)
920       {
921         // No verification packet
922 
923         // That is ok, but we won't be able to use block verification.
924 
925         // Proceed to the next file.
926         ++sf;
927 
928         continue;
929       }
930 
931       // Work out the block count for the file from the file size
932       // and compare that with the verification packet
933       u64 filesize = descriptionpacket->FileSize();
934       u32 blockcount = verificationpacket->BlockCount();
935 
936       if ((filesize + blocksize-1) / blocksize != (u64)blockcount)
937       {
938         // The block counts are different!
939 
940         serr << "Incorrectly sized verification packet for \"" << descriptionpacket->FileName() << "\" discarded" << endl;
941 
942         // Discard the source file
943 
944         delete sf->second;
945         map<MD5Hash, Par2RepairerSourceFile*>::iterator x = sf++;
946         sourcefilemap.erase(x);
947 
948         continue;
949       }
950 
951       // Everything is ok.
952 
953       // Proceed to the next file
954       ++sf;
955     }
956   }
957 
958   if (noiselevel > nlQuiet)
959   {
960     sout << "There are "
961       << mainpacket->RecoverableFileCount()
962       << " recoverable files and "
963       << mainpacket->TotalFileCount() - mainpacket->RecoverableFileCount()
964       << " other files."
965       << endl;
966 
967     sout << "The block size used was "
968       << blocksize
969       << " bytes."
970       << endl;
971   }
972 
973   return true;
974 }
975 
976 // Use the information in the main packet to get the source files
977 // into the correct order and determine their filenames
CreateSourceFileList(void)978 bool Par2Repairer::CreateSourceFileList(void)
979 {
980   // For each FileId entry in the main packet
981   for (u32 filenumber=0; filenumber<mainpacket->TotalFileCount(); filenumber++)
982   {
983     const MD5Hash &fileid = mainpacket->FileId(filenumber);
984 
985     // Look up the fileid in the source file map
986     map<MD5Hash, Par2RepairerSourceFile*>::iterator sfmi = sourcefilemap.find(fileid);
987     Par2RepairerSourceFile *sourcefile = (sfmi == sourcefilemap.end()) ? 0 :sfmi->second;
988 
989     if (sourcefile)
990     {
991       sourcefile->ComputeTargetFileName(sout, serr, noiselevel, basepath);
992 
993 #ifdef _OPENMP
994       // Need actual filesize on disk for mt-progress line
995       sourcefile->SetDiskFileSize();
996 #endif
997     }
998 
999     sourcefiles.push_back(sourcefile);
1000   }
1001 
1002   return true;
1003 }
1004 
1005 // Determine the total number of DataBlocks for the recoverable source files
1006 // The allocate the DataBlocks and assign them to each source file
AllocateSourceBlocks(void)1007 bool Par2Repairer::AllocateSourceBlocks(void)
1008 {
1009   sourceblockcount = 0;
1010 
1011   u32 filenumber = 0;
1012   vector<Par2RepairerSourceFile*>::iterator sf = sourcefiles.begin();
1013 
1014   // For each recoverable source file
1015   while (filenumber < mainpacket->RecoverableFileCount() && sf != sourcefiles.end())
1016   {
1017     // Do we have a source file
1018     Par2RepairerSourceFile *sourcefile = *sf;
1019     if (sourcefile)
1020     {
1021       sourceblockcount += sourcefile->BlockCount();
1022     }
1023     else
1024     {
1025       // No details for this source file so we don't know what the
1026       // total number of source blocks is
1027       //      sourceblockcount = 0;
1028       //      break;
1029     }
1030 
1031     ++sf;
1032     ++filenumber;
1033   }
1034 
1035   // Did we determine the total number of source blocks
1036   if (sourceblockcount > 0)
1037   {
1038     // Yes.
1039 
1040     // Allocate all of the Source and Target DataBlocks (which will be used
1041     // to read and write data to disk).
1042 
1043     sourceblocks.resize(sourceblockcount);
1044     targetblocks.resize(sourceblockcount);
1045 
1046     // Which DataBlocks will be allocated first
1047     vector<DataBlock>::iterator sourceblock = sourceblocks.begin();
1048     vector<DataBlock>::iterator targetblock = targetblocks.begin();
1049 
1050     u64 totalsize = 0;
1051     u32 blocknumber = 0;
1052 
1053     filenumber = 0;
1054     sf = sourcefiles.begin();
1055 
1056     while (filenumber < mainpacket->RecoverableFileCount() && sf != sourcefiles.end())
1057     {
1058       Par2RepairerSourceFile *sourcefile = *sf;
1059 
1060       if (sourcefile)
1061       {
1062         totalsize += sourcefile->GetDescriptionPacket()->FileSize();
1063         u32 blockcount = sourcefile->BlockCount();
1064 
1065         // Allocate the source and target DataBlocks to the sourcefile
1066         sourcefile->SetBlocks(blocknumber, blockcount, sourceblock, targetblock, blocksize);
1067 
1068         blocknumber++;
1069 
1070         sourceblock += blockcount;
1071         targetblock += blockcount;
1072       }
1073 
1074       ++sf;
1075       ++filenumber;
1076     }
1077 
1078     blocksallocated = true;
1079 
1080     if (noiselevel > nlQuiet)
1081     {
1082       sout << "There are a total of "
1083         << sourceblockcount
1084         << " data blocks."
1085         << endl;
1086 
1087       sout << "The total size of the data files is "
1088         << totalsize
1089         << " bytes."
1090         << endl;
1091     }
1092   }
1093 
1094   return true;
1095 }
1096 
1097 // Create a verification hash table for all files for which we have not
1098 // found a complete version of the file and for which we have
1099 // a verification packet
PrepareVerificationHashTable(void)1100 bool Par2Repairer::PrepareVerificationHashTable(void)
1101 {
1102   if (noiselevel >= nlDebug)
1103     sout << "[DEBUG] Prepare verification hashtable" << endl;
1104 
1105   // Choose a size for the hash table
1106   verificationhashtable.SetLimit(sourceblockcount);
1107 
1108   // Will any files be block verifiable
1109   blockverifiable = false;
1110 
1111   // For each source file
1112   vector<Par2RepairerSourceFile*>::iterator sf = sourcefiles.begin();
1113   while (sf != sourcefiles.end())
1114   {
1115     // Get the source file
1116     Par2RepairerSourceFile *sourcefile = *sf;
1117 
1118     if (sourcefile)
1119     {
1120       // Do we have a verification packet
1121       if (0 != sourcefile->GetVerificationPacket())
1122       {
1123         // Yes. Load the verification entries into the hash table
1124         verificationhashtable.Load(sourcefile, blocksize);
1125 
1126         blockverifiable = true;
1127       }
1128       else
1129       {
1130         // No. We can only check the whole file
1131         unverifiablesourcefiles.push_back(sourcefile);
1132       }
1133     }
1134 
1135     ++sf;
1136   }
1137 
1138   return true;
1139 }
1140 
1141 // Compute the table for the sliding CRC computation
ComputeWindowTable(void)1142 bool Par2Repairer::ComputeWindowTable(void)
1143 {
1144   if (noiselevel >= nlDebug)
1145     sout << "[DEBUG] compute window table" << endl;
1146 
1147   if (blockverifiable)
1148   {
1149     GenerateWindowTable(blocksize, windowtable);
1150     windowmask = ComputeWindowMask(blocksize);
1151   }
1152 
1153   return true;
1154 }
1155 
SortSourceFilesByFileName(Par2RepairerSourceFile * low,Par2RepairerSourceFile * high)1156 static bool SortSourceFilesByFileName(Par2RepairerSourceFile *low,
1157                                       Par2RepairerSourceFile *high)
1158 {
1159   return low->TargetFileName() < high->TargetFileName();
1160 }
1161 
1162 // Attempt to verify all of the source files
VerifySourceFiles(const std::string & basepath,std::vector<string> & extrafiles)1163 bool Par2Repairer::VerifySourceFiles(const std::string& basepath, std::vector<string>& extrafiles)
1164 {
1165   if (noiselevel > nlQuiet)
1166     sout << endl << "Verifying source files:" << endl << endl;
1167 
1168   bool finalresult = true;
1169 
1170   // Created a sorted list of the source files and verify them in that
1171   // order rather than the order they are in the main packet.
1172   vector<Par2RepairerSourceFile*> sortedfiles;
1173 
1174   u32 filenumber = 0;
1175   vector<Par2RepairerSourceFile*>::iterator sf = sourcefiles.begin();
1176 
1177 #ifdef _OPENMP
1178   mttotalsize = 0;
1179   mttotalprogress = 0;
1180 #endif
1181 
1182   while (sf != sourcefiles.end())
1183   {
1184     // Do we have a source file
1185     Par2RepairerSourceFile *sourcefile = *sf;
1186     if (sourcefile)
1187     {
1188       sortedfiles.push_back(sourcefile);
1189 #ifdef _OPENMP
1190       // Total filesizes for mt-progress line
1191       mttotalsize += sourcefile->DiskFileSize();
1192 #endif
1193      }
1194     else
1195     {
1196       // Was this one of the recoverable files
1197       if (filenumber < mainpacket->RecoverableFileCount())
1198       {
1199         serr << "No details available for recoverable file number " << filenumber+1 << "." << endl << "Recovery will not be possible." << endl;
1200 
1201         // Set error but let verification of other files continue
1202         finalresult = false;
1203       }
1204       else
1205       {
1206         serr << "No details available for non-recoverable file number " << filenumber - mainpacket->RecoverableFileCount() + 1 << endl;
1207       }
1208     }
1209 
1210     ++sf;
1211   }
1212 
1213   sort(sortedfiles.begin(), sortedfiles.end(), SortSourceFilesByFileName);
1214 
1215   // Start verifying the files
1216   #pragma omp parallel for schedule(dynamic) num_threads(Par2Repairer::GetFileThreads())
1217   for (int i=0; i< static_cast<int>(sortedfiles.size()); ++i)
1218   {
1219     // Do we have a source file
1220     Par2RepairerSourceFile *sourcefile = sortedfiles[i];
1221 
1222     // What filename does the file use
1223     const std::string& file = sourcefile->TargetFileName();
1224     const std::string& name = DiskFile::SplitRelativeFilename(file, basepath);
1225     const std::string& target_pathname = DiskFile::GetCanonicalPathname(file);
1226 
1227     if (noiselevel >= nlDebug)
1228     {
1229       #pragma omp critical
1230       {
1231       sout << "[DEBUG] VerifySourceFiles ----" << endl;
1232       sout << "[DEBUG] file: " << file << endl;
1233       sout << "[DEBUG] name: " << name << endl;
1234       sout << "[DEBUG] targ: " << target_pathname << endl;
1235       }
1236     }
1237 
1238     // if the target file is in the list of extra files, we remove it
1239     // from the extra files.
1240     #pragma omp critical
1241     {
1242       vector<string>::iterator it = extrafiles.begin();
1243       for (; it != extrafiles.end(); ++it)
1244       {
1245 	const string& e = *it;
1246 	const std::string& extra_pathname = e;
1247 	if (!extra_pathname.compare(target_pathname))
1248 	{
1249 	  extrafiles.erase(it);
1250 	  break;
1251 	}
1252       }
1253     }
1254 
1255     // Check to see if we have already used this file
1256     bool b;
1257     #pragma omp critical
1258     b = diskFileMap.Find(file) != 0;
1259     if (b)
1260     {
1261       // The file has already been used!
1262       #pragma omp critical
1263       serr << "Source file " << name << " is a duplicate." << endl;
1264 
1265       finalresult = false;
1266     }
1267     else
1268     {
1269       DiskFile *diskfile = new DiskFile(sout, serr);
1270 
1271       // Does the target file exist
1272       if (diskfile->Open(file))
1273       {
1274         // Yes. Record that fact.
1275         sourcefile->SetTargetExists(true);
1276 
1277         // Remember that the DiskFile is the target file
1278         sourcefile->SetTargetFile(diskfile);
1279 
1280         // Remember that we have processed this file
1281         bool success;
1282         #pragma omp critical
1283         success = diskFileMap.Insert(diskfile);
1284         assert(success);
1285         // Do the actual verification
1286         if (!VerifyDataFile(diskfile, sourcefile, basepath))
1287           finalresult = false;
1288 
1289         // We have finished with the file for now
1290         diskfile->Close();
1291       }
1292       else
1293       {
1294         // The file does not exist.
1295         delete diskfile;
1296 
1297         if (noiselevel > nlSilent)
1298         {
1299           #pragma omp critical
1300           sout << "Target: \"" << name << "\" - missing." << endl;
1301         }
1302       }
1303     }
1304   }
1305 
1306   // Find out how much data we have found
1307   UpdateVerificationResults();
1308 
1309   return finalresult;
1310 }
1311 
1312 // Scan any extra files specified on the command line
VerifyExtraFiles(const vector<string> & extrafiles,const string & basepath)1313 bool Par2Repairer::VerifyExtraFiles(const vector<string> &extrafiles, const string &basepath)
1314 {
1315   if (noiselevel > nlQuiet)
1316     sout << endl << "Scanning extra files:" << endl << endl;
1317 
1318   if (completefilecount < mainpacket->RecoverableFileCount())
1319   {
1320 #ifdef _OPENMP
1321     // Total size of extra files for mt-progress line
1322     mtprocessingextrafiles = true;
1323     mttotalprogress = 0;
1324     mttotalextrasize = 0;
1325 
1326     for (size_t i=0; i<extrafiles.size(); ++i)
1327       mttotalextrasize += DiskFile::GetFileSize(extrafiles[i]);
1328 #endif
1329 
1330     #pragma omp parallel for schedule(dynamic) num_threads(Par2Repairer::GetFileThreads())
1331     for (int i=0; i< static_cast<int>(extrafiles.size()); ++i)
1332     {
1333       string filename = extrafiles[i];
1334 
1335       // If the filename does not include ".par2" we are interested in it.
1336       if (string::npos == filename.find(".par2") &&
1337           string::npos == filename.find(".PAR2"))
1338       {
1339         filename = DiskFile::GetCanonicalPathname(filename);
1340 
1341         // Has this file already been dealt with
1342         bool b;
1343         #pragma omp critical
1344         b = diskFileMap.Find(filename) == 0;
1345         if (b)
1346         {
1347           DiskFile *diskfile = new DiskFile(sout, serr);
1348 
1349           // Does the file exist
1350           if (!diskfile->Open(filename))
1351           {
1352             delete diskfile;
1353             continue;
1354           }
1355 
1356           // Remember that we have processed this file
1357           bool success;
1358           #pragma omp critical
1359           success = diskFileMap.Insert(diskfile);
1360           assert(success);
1361 
1362           // Do the actual verification
1363           VerifyDataFile(diskfile, 0, basepath);
1364           // Ignore errors
1365 
1366           // We have finished with the file for now
1367           diskfile->Close();
1368         }
1369       }
1370     }
1371   }
1372   // Find out how much data we have found
1373   UpdateVerificationResults();
1374 
1375 #if _OPENMP
1376     mtprocessingextrafiles = false;
1377 #endif
1378 
1379   return true;
1380 }
1381 
1382 // Attempt to match the data in the DiskFile with the source file
VerifyDataFile(DiskFile * diskfile,Par2RepairerSourceFile * sourcefile,const string & basepath)1383 bool Par2Repairer::VerifyDataFile(DiskFile *diskfile, Par2RepairerSourceFile *sourcefile, const string &basepath)
1384 {
1385   MatchType matchtype; // What type of match was made
1386   MD5Hash hashfull;    // The MD5 Hash of the whole file
1387   MD5Hash hash16k;     // The MD5 Hash of the files 16k of the file
1388 
1389   // Are there any files that can be verified at the block level
1390   if (blockverifiable)
1391   {
1392     u32 count;
1393 
1394     // Scan the file at the block level.
1395 
1396     if (!ScanDataFile(diskfile,   // [in]      The file to scan
1397                       basepath,
1398                       sourcefile, // [in/out]  Modified in the match is for another source file
1399                       matchtype,  // [out]
1400                       hashfull,   // [out]
1401                       hash16k,    // [out]
1402                       count))     // [out]
1403       return false;
1404 
1405     switch (matchtype)
1406     {
1407       case eNoMatch:
1408         // No data was found at all.
1409 
1410         // Continue to next test.
1411         break;
1412       case ePartialMatch:
1413         {
1414           // We found some data.
1415 
1416           // Return them.
1417           return true;
1418         }
1419         break;
1420       case eFullMatch:
1421         {
1422           // We found a perfect match.
1423 
1424           sourcefile->SetCompleteFile(diskfile);
1425 
1426           // Return the match
1427           return true;
1428         }
1429         break;
1430     }
1431   }
1432 
1433   // We did not find a match for any blocks of data within the file, but if
1434   // there are any files for which we did not have a verification packet
1435   // we can try a simple match of the hash for the whole file.
1436 
1437   // Are there any files that cannot be verified at the block level
1438   if (!unverifiablesourcefiles.empty())
1439   {
1440     // Would we have already computed the file hashes
1441     if (!blockverifiable)
1442     {
1443       u64 filesize = diskfile->FileSize();
1444 
1445       size_t buffersize = 1024*1024;
1446       if (buffersize > min(blocksize, filesize))
1447         buffersize = (size_t)min(blocksize, filesize);
1448 
1449       char *buffer = new char[buffersize];
1450 
1451       u64 offset = 0;
1452 
1453       MD5Context context;
1454 
1455       while (offset < filesize)
1456       {
1457         size_t want = (size_t)min((u64)buffersize, filesize-offset);
1458 
1459         if (!diskfile->Read(offset, buffer, want))
1460         {
1461           delete [] buffer;
1462           return false;
1463         }
1464 
1465         // Will the newly read data reach the 16k boundary
1466         if (offset < 16384 && offset + want >= 16384)
1467         {
1468           context.Update(buffer, (size_t)(16384-offset));
1469 
1470           // Compute the 16k hash
1471           MD5Context temp = context;
1472           temp.Final(hash16k);
1473 
1474           // Is there more data
1475           if (offset + want > 16384)
1476           {
1477             context.Update(&buffer[16384-offset], (size_t)(offset+want)-16384);
1478           }
1479         }
1480         else
1481         {
1482           context.Update(buffer, want);
1483         }
1484 
1485         offset += want;
1486       }
1487 
1488       // Compute the file hash
1489       MD5Hash hashfull;
1490       context.Final(hashfull);
1491 
1492       // If we did not have 16k of data, then the 16k hash
1493       // is the same as the full hash
1494       if (filesize < 16384)
1495       {
1496         hash16k = hashfull;
1497       }
1498     }
1499 
1500     list<Par2RepairerSourceFile*>::iterator sf = unverifiablesourcefiles.begin();
1501 
1502     // Compare the hash values of each source file for a match
1503     while (sf != unverifiablesourcefiles.end())
1504     {
1505       sourcefile = *sf;
1506 
1507       // Does the file match
1508       if (sourcefile->GetCompleteFile() == 0 &&
1509           diskfile->FileSize() == sourcefile->GetDescriptionPacket()->FileSize() &&
1510           hash16k == sourcefile->GetDescriptionPacket()->Hash16k() &&
1511           hashfull == sourcefile->GetDescriptionPacket()->HashFull())
1512       {
1513         if (noiselevel > nlSilent)
1514         {
1515           #pragma omp critical
1516           sout << diskfile->FileName() << " is a perfect match for " << sourcefile->GetDescriptionPacket()->FileName() << endl;
1517         }
1518         // Record that we have a perfect match for this source file
1519         sourcefile->SetCompleteFile(diskfile);
1520 
1521         if (blocksallocated)
1522         {
1523           // Allocate all of the DataBlocks for the source file to the DiskFile
1524 
1525           u64 offset = 0;
1526           u64 filesize = sourcefile->GetDescriptionPacket()->FileSize();
1527 
1528           vector<DataBlock>::iterator sb = sourcefile->SourceBlocks();
1529 
1530           while (offset < filesize)
1531           {
1532             DataBlock &datablock = *sb;
1533 
1534             datablock.SetLocation(diskfile, offset);
1535             datablock.SetLength(min(blocksize, filesize-offset));
1536 
1537             offset += blocksize;
1538             ++sb;
1539           }
1540         }
1541 
1542         // Return the match
1543         return true;
1544       }
1545 
1546       ++sf;
1547     }
1548   }
1549 
1550   return true;
1551 }
1552 
1553 // Perform a sliding window scan of the DiskFile looking for blocks of data that
1554 // might belong to any of the source files (for which a verification packet was
1555 // available). If a block of data might be from more than one source file, prefer
1556 // the one specified by the "sourcefile" parameter. If the first data block
1557 // found is for a different source file then "sourcefile" is changed accordingly.
ScanDataFile(DiskFile * diskfile,string basepath,Par2RepairerSourceFile * & sourcefile,MatchType & matchtype,MD5Hash & hashfull,MD5Hash & hash16k,u32 & count)1558 bool Par2Repairer::ScanDataFile(DiskFile                *diskfile,    // [in]
1559                                 string                  basepath,     // [in]
1560                                 Par2RepairerSourceFile* &sourcefile,  // [in/out]
1561                                 MatchType               &matchtype,   // [out]
1562                                 MD5Hash                 &hashfull,    // [out]
1563                                 MD5Hash                 &hash16k,     // [out]
1564                                 u32                     &count)       // [out]
1565 {
1566   // Remember which file we wanted to match
1567   Par2RepairerSourceFile *originalsourcefile = sourcefile;
1568 
1569   matchtype = eNoMatch;
1570 
1571   string name;
1572   DiskFile::SplitRelativeFilename(diskfile->FileName(), basepath, name);
1573 
1574   // Is the file empty
1575   if (diskfile->FileSize() == 0)
1576   {
1577     // If the file is empty, then just return
1578     if (noiselevel > nlSilent)
1579     {
1580       if (originalsourcefile != 0)
1581       {
1582         #pragma omp critical
1583         sout << "Target: \"" << name << "\" - empty." << endl;
1584       }
1585       else
1586       {
1587         #pragma omp critical
1588         sout << "File: \"" << name << "\" - empty." << endl;
1589       }
1590     }
1591     return true;
1592   }
1593 
1594   string shortname;
1595   if (name.size() > 56)
1596   {
1597     shortname = name.substr(0, 28) + "..." + name.substr(name.size()-28);
1598   }
1599   else
1600   {
1601     shortname = name;
1602   }
1603 
1604   // Create the checksummer for the file and start reading from it
1605   FileCheckSummer filechecksummer(diskfile, blocksize, windowtable, windowmask);
1606   if (!filechecksummer.Start())
1607     return false;
1608 
1609   // Assume we will make a perfect match for the file
1610   matchtype = eFullMatch;
1611 
1612   // How many matches have we had
1613   count = 0;
1614 
1615   // How many blocks have already been found
1616   u32 duplicatecount = 0;
1617 
1618   // Have we found data blocks in this file that belong to more than one target file
1619   bool multipletargets = false;
1620 
1621   // Which block do we expect to find first
1622   const VerificationHashEntry *nextentry = 0;
1623 
1624   // How far will we scan the file (1 byte at a time)
1625   // before skipping ahead looking for the next block
1626   u64 scandistance = min(skipleaway<<1, blocksize);
1627 
1628   // Distance to skip forward if we don't find a block
1629   u64 scanskip = skipdata ? blocksize - scandistance : 0;
1630 
1631   // Assume with are half way through scanning
1632   u64 scanoffset = scandistance >> 1;
1633 
1634   // Total number of bytes that were skipped whilst scanning
1635   u64 skippeddata = 0;
1636 
1637   // Offset of last data that was found
1638   u64 lastmatchoffset = 0;
1639 
1640   bool progressline = false;
1641 
1642   u64 oldoffset = 0;
1643   u64 printprogress = 0;
1644 
1645 #ifdef _OPENMP
1646   if (noiselevel > nlQuiet)
1647   {
1648     #pragma omp critical
1649     sout << "Opening: \"" << shortname << "\"" << endl;
1650   }
1651 #endif
1652 
1653   // Whilst we have not reached the end of the file
1654   while (filechecksummer.Offset() < diskfile->FileSize())
1655   {
1656 // OPENMP progress line printing
1657 #ifdef _OPENMP
1658     if (noiselevel > nlQuiet)
1659     {
1660       // Are we processing extrafiles? Use correct total size
1661       u64 ts = mtprocessingextrafiles ? mttotalextrasize : mttotalsize;
1662 
1663       // Update progress indicator
1664       printprogress += filechecksummer.Offset() - oldoffset;
1665       if (printprogress == blocksize || filechecksummer.ShortBlock())
1666       {
1667         u32 oldfraction;
1668         u32 newfraction;
1669         #pragma omp critical
1670         {
1671         oldfraction = (u32)(1000 * mttotalprogress / ts);
1672         mttotalprogress += printprogress;
1673         newfraction = (u32)(1000 * mttotalprogress / ts);
1674         }
1675 
1676         printprogress = 0;
1677 
1678         if (oldfraction != newfraction)
1679         {
1680           #pragma omp critical
1681           sout << "Scanning: " << newfraction/10 << '.' << newfraction%10 << "%\r" << flush;
1682 
1683           progressline = true;
1684         }
1685       }
1686       oldoffset = filechecksummer.Offset();
1687 
1688     }
1689 // NON-OPENMP progress line printing
1690 #else
1691     if (noiselevel > nlQuiet)
1692     {
1693       // Update progress indicator
1694       printprogress += filechecksummer.Offset() - oldoffset;
1695       if (printprogress == blocksize || filechecksummer.ShortBlock())
1696       {
1697         u32 oldfraction = (u32)(1000 * (filechecksummer.Offset() - printprogress) / diskfile->FileSize());
1698         u32 newfraction = (u32)(1000 * filechecksummer.Offset() / diskfile->FileSize());
1699         printprogress = 0;
1700 
1701         if (oldfraction != newfraction)
1702         {
1703           sout << "Scanning: \"" << shortname << "\": " << newfraction/10 << '.' << newfraction%10 << "%\r" << flush;
1704 
1705           progressline = true;
1706         }
1707       }
1708       oldoffset = filechecksummer.Offset();
1709     }
1710 #endif
1711 
1712     // If we fail to find a match, it might be because it was a duplicate of a block
1713     // that we have already found.
1714     bool duplicate;
1715 
1716     // Look for a match
1717     const VerificationHashEntry *currententry = verificationhashtable.FindMatch(nextentry, sourcefile, filechecksummer, duplicate);
1718 
1719     // Did we find a match
1720     if (currententry != 0)
1721     {
1722       if (lastmatchoffset < filechecksummer.Offset() && noiselevel > nlNormal)
1723       {
1724         if (progressline)
1725         {
1726           #pragma omp critical
1727           sout << endl;
1728           progressline = false;
1729         }
1730         #pragma omp critical
1731         sout << "No data found between offset " << lastmatchoffset
1732           << " and " << filechecksummer.Offset() << endl;
1733       }
1734 
1735       // Is this the first match
1736       if (count == 0)
1737       {
1738         // Which source file was it
1739         sourcefile = currententry->SourceFile();
1740 
1741         // If the first match found was not actually the first block
1742         // for the source file, or it was not at the start of the
1743         // data file: then this is a partial match.
1744         if (!currententry->FirstBlock() || filechecksummer.Offset() != 0)
1745         {
1746           matchtype = ePartialMatch;
1747         }
1748       }
1749       else
1750       {
1751         // If the match found is not the one which was expected
1752         // then this is a partial match
1753 
1754         if (currententry != nextentry)
1755         {
1756           matchtype = ePartialMatch;
1757         }
1758 
1759         // Is the match from a different source file
1760         if (sourcefile != currententry->SourceFile())
1761         {
1762           multipletargets = true;
1763         }
1764       }
1765 
1766       if (blocksallocated)
1767       {
1768         // Record the match
1769         currententry->SetBlock(diskfile, filechecksummer.Offset());
1770       }
1771 
1772       // Update the number of matches found
1773       count++;
1774 
1775       // What entry do we expect next
1776       nextentry = currententry->Next();
1777 
1778       // Advance to the next block
1779       if (!filechecksummer.Jump(currententry->GetDataBlock()->GetLength()))
1780         return false;
1781 
1782       // If the next match fails, assume we hare half way through scanning for the next block
1783       scanoffset = scandistance >> 1;
1784 
1785       // Update offset of last match
1786       lastmatchoffset = filechecksummer.Offset();
1787     }
1788     else
1789     {
1790       // This cannot be a perfect match
1791       matchtype = ePartialMatch;
1792 
1793       // Was this a duplicate match
1794       if (duplicate && false) // ignore duplicates
1795       {
1796         duplicatecount++;
1797 
1798         // What entry would we expect next
1799         nextentry = 0;
1800 
1801         // Advance one whole block
1802         if (!filechecksummer.Jump(blocksize))
1803           return false;
1804       }
1805       else
1806       {
1807         // What entry do we expect next
1808         nextentry = 0;
1809 
1810         if (!filechecksummer.Step())
1811           return false;
1812 
1813         u64 skipfrom = filechecksummer.Offset();
1814 
1815         // Have we scanned too far without finding a block?
1816         if (scanskip > 0
1817             && ++scanoffset >= scandistance
1818             && skipfrom < diskfile->FileSize())
1819         {
1820           // Skip forwards to where we think we might find more data
1821           if (!filechecksummer.Jump(scanskip))
1822             return false;
1823 
1824           // Update the count of skipped data
1825           skippeddata += filechecksummer.Offset() - skipfrom;
1826 
1827           // Reset scan offset to 0
1828           scanoffset = 0;
1829         }
1830       }
1831     }
1832   }
1833 
1834 #ifdef _OPENMP
1835   if (noiselevel > nlQuiet)
1836   {
1837     if (filechecksummer.Offset() == diskfile->FileSize()) {
1838       #pragma omp atomic
1839       mttotalprogress += filechecksummer.Offset() - oldoffset;
1840     }
1841   }
1842 #endif
1843 
1844   if (lastmatchoffset < filechecksummer.Offset() && noiselevel > nlNormal)
1845   {
1846     if (progressline)
1847     {
1848       #pragma omp critical
1849       sout << endl;
1850       progressline = false;
1851     }
1852 
1853     #pragma omp critical
1854     sout << "No data found between offset " << lastmatchoffset
1855       << " and " << filechecksummer.Offset() << endl;
1856   }
1857 
1858   // Get the Full and 16k hash values of the file
1859   filechecksummer.GetFileHashes(hashfull, hash16k);
1860 
1861   if (noiselevel >= nlDebug)
1862   {
1863     #pragma omp critical
1864     {
1865     // Clear out old scanning line
1866     sout << std::setw(shortname.size()+19) << std::setfill(' ') << "";
1867 
1868     if (duplicatecount > 0)
1869       sout << "\r[DEBUG] duplicates: " << duplicatecount << endl;
1870     sout << "\r[DEBUG] matchcount: " << count << endl;
1871     sout << "[DEBUG] ----------------------" << endl;
1872     }
1873   }
1874 
1875   // Did we make any matches at all
1876   if (count > 0)
1877   {
1878     // If this still might be a perfect match, check the
1879     // hashes, file size, and number of blocks to confirm.
1880     if (matchtype            != eFullMatch ||
1881         count                != sourcefile->GetVerificationPacket()->BlockCount() ||
1882         diskfile->FileSize() != sourcefile->GetDescriptionPacket()->FileSize() ||
1883         hashfull             != sourcefile->GetDescriptionPacket()->HashFull() ||
1884         hash16k              != sourcefile->GetDescriptionPacket()->Hash16k())
1885     {
1886       matchtype = ePartialMatch;
1887 
1888       if (noiselevel > nlSilent)
1889       {
1890         // Did we find data from multiple target files
1891         if (multipletargets)
1892         {
1893           // Were we scanning the target file or an extra file
1894           if (originalsourcefile != 0)
1895           {
1896             #pragma omp critical
1897             sout << "Target: \""
1898               << name
1899               << "\" - damaged, found "
1900               << count
1901               << " data blocks from several target files."
1902               << endl;
1903           }
1904           else
1905           {
1906             #pragma omp critical
1907             sout << "File: \""
1908               << name
1909               << "\" - found "
1910               << count
1911               << " data blocks from several target files."
1912               << endl;
1913           }
1914         }
1915         else
1916         {
1917           // Did we find data blocks that belong to the target file
1918           if (originalsourcefile == sourcefile)
1919           {
1920             #pragma omp critical
1921             sout << "Target: \""
1922               << name
1923               << "\" - damaged. Found "
1924               << count
1925               << " of "
1926               << sourcefile->GetVerificationPacket()->BlockCount()
1927               << " data blocks."
1928               << endl;
1929           }
1930           // Were we scanning the target file or an extra file
1931           else if (originalsourcefile != 0)
1932           {
1933             string targetname;
1934             DiskFile::SplitRelativeFilename(sourcefile->TargetFileName(), basepath, targetname);
1935 
1936             #pragma omp critical
1937             sout << "Target: \""
1938               << name
1939               << "\" - damaged. Found "
1940               << count
1941               << " of "
1942               << sourcefile->GetVerificationPacket()->BlockCount()
1943               << " data blocks from \""
1944               << targetname
1945               << "\"."
1946               << endl;
1947           }
1948           else
1949           {
1950             string targetname;
1951             DiskFile::SplitRelativeFilename(sourcefile->TargetFileName(), basepath, targetname);
1952 
1953             #pragma omp critical
1954             sout << "File: \""
1955               << name
1956               << "\" - found "
1957               << count
1958               << " of "
1959               << sourcefile->GetVerificationPacket()->BlockCount()
1960               << " data blocks from \""
1961               << targetname
1962               << "\"."
1963               << endl;
1964           }
1965         }
1966 
1967         if (skippeddata > 0)
1968         {
1969           #pragma omp critical
1970           sout << skippeddata << " bytes of data were skipped whilst scanning." << endl
1971             << "If there are not enough blocks found to repair: try again "
1972             << "with the -N option." << endl;
1973         }
1974       }
1975     }
1976     else
1977     {
1978       if (noiselevel > nlSilent)
1979       {
1980         // Did we match the target file
1981         if (originalsourcefile == sourcefile)
1982         {
1983           #pragma omp critical
1984           sout << "Target: \"" << name << "\" - found." << endl;
1985         }
1986         // Were we scanning the target file or an extra file
1987         else if (originalsourcefile != 0)
1988         {
1989           string targetname;
1990           DiskFile::SplitRelativeFilename(sourcefile->TargetFileName(), basepath, targetname);
1991 
1992           #pragma omp critical
1993           sout << "Target: \""
1994             << name
1995             << "\" - is a match for \""
1996             << targetname
1997             << "\"."
1998             << endl;
1999         }
2000         else
2001         {
2002           string targetname;
2003           DiskFile::SplitRelativeFilename(sourcefile->TargetFileName(), basepath, targetname);
2004 
2005           #pragma omp critical
2006           sout << "File: \""
2007             << name
2008             << "\" - is a match for \""
2009             << targetname
2010             << "\"."
2011             << endl;
2012         }
2013       }
2014     }
2015   }
2016   else
2017   {
2018     matchtype = eNoMatch;
2019 
2020     if (noiselevel > nlSilent)
2021     {
2022       // We found not data, but did the file actually contain blocks we
2023       // had already found in other files.
2024       if (duplicatecount > 0)
2025       {
2026         #pragma omp critical
2027         sout << "File: \""
2028           << name
2029           << "\" - found "
2030           << duplicatecount
2031           << " duplicate data blocks."
2032           << endl;
2033       }
2034       else
2035       {
2036         #pragma omp critical
2037         sout << "File: \""
2038           << name
2039           << "\" - no data found."
2040           << endl;
2041       }
2042 
2043       if (skippeddata > 0)
2044       {
2045         #pragma omp critical
2046         sout << skippeddata << " bytes of data were skipped whilst scanning." << endl
2047           << "If there are not enough blocks found to repair: try again "
2048           << "with the -N option." << endl;
2049       }
2050     }
2051   }
2052 
2053   return true;
2054 }
2055 
2056 // Find out how much data we have found
UpdateVerificationResults(void)2057 void Par2Repairer::UpdateVerificationResults(void)
2058 {
2059   availableblockcount = 0;
2060   missingblockcount = 0;
2061 
2062   completefilecount = 0;
2063   renamedfilecount = 0;
2064   damagedfilecount = 0;
2065   missingfilecount = 0;
2066 
2067   u32 filenumber = 0;
2068   vector<Par2RepairerSourceFile*>::iterator sf = sourcefiles.begin();
2069 
2070   // Check the recoverable files
2071   while (sf != sourcefiles.end() && filenumber < mainpacket->TotalFileCount())
2072   {
2073     Par2RepairerSourceFile *sourcefile = *sf;
2074 
2075     if (sourcefile)
2076     {
2077       // Was a perfect match for the file found
2078       if (sourcefile->GetCompleteFile() != 0)
2079       {
2080         // Is it the target file or a different one
2081         if (sourcefile->GetCompleteFile() == sourcefile->GetTargetFile())
2082         {
2083           completefilecount++;
2084         }
2085         else
2086         {
2087           renamedfilecount++;
2088         }
2089 
2090         availableblockcount += sourcefile->BlockCount();
2091       }
2092       else
2093       {
2094         // Count the number of blocks that have been found
2095         vector<DataBlock>::iterator sb = sourcefile->SourceBlocks();
2096         for (u32 blocknumber=0; blocknumber<sourcefile->BlockCount(); ++blocknumber, ++sb)
2097         {
2098           DataBlock &datablock = *sb;
2099 
2100           if (datablock.IsSet())
2101             availableblockcount++;
2102         }
2103 
2104         // Does the target file exist
2105         if (sourcefile->GetTargetExists())
2106         {
2107           damagedfilecount++;
2108         }
2109         else
2110         {
2111           missingfilecount++;
2112         }
2113       }
2114     }
2115     else
2116     {
2117       missingfilecount++;
2118     }
2119 
2120     ++filenumber;
2121     ++sf;
2122   }
2123 
2124   missingblockcount = sourceblockcount - availableblockcount;
2125 }
2126 
2127 // Check the verification results and report the results
CheckVerificationResults(void)2128 bool Par2Repairer::CheckVerificationResults(void)
2129 {
2130   // Is repair needed
2131   if (completefilecount < mainpacket->RecoverableFileCount() ||
2132       renamedfilecount > 0 ||
2133       damagedfilecount > 0 ||
2134       missingfilecount > 0)
2135   {
2136     if (noiselevel > nlSilent)
2137       sout << "Repair is required." << endl;
2138     if (noiselevel > nlQuiet)
2139     {
2140       if (renamedfilecount > 0) sout << renamedfilecount << " file(s) have the wrong name." << endl;
2141       if (missingfilecount > 0) sout << missingfilecount << " file(s) are missing." << endl;
2142       if (damagedfilecount > 0) sout << damagedfilecount << " file(s) exist but are damaged." << endl;
2143       if (completefilecount > 0) sout << completefilecount << " file(s) are ok." << endl;
2144 
2145       sout << "You have " << availableblockcount
2146         << " out of " << sourceblockcount
2147         << " data blocks available." << endl;
2148       if (recoverypacketmap.size() > 0)
2149         sout << "You have " << (u32)recoverypacketmap.size()
2150           << " recovery blocks available." << endl;
2151     }
2152 
2153     // Is repair possible
2154     if (recoverypacketmap.size() >= missingblockcount)
2155     {
2156       if (noiselevel > nlSilent)
2157         sout << "Repair is possible." << endl;
2158 
2159       if (noiselevel > nlQuiet)
2160       {
2161         if (recoverypacketmap.size() > missingblockcount)
2162           sout << "You have an excess of "
2163             << (u32)recoverypacketmap.size() - missingblockcount
2164             << " recovery blocks." << endl;
2165 
2166         if (missingblockcount > 0)
2167           sout << missingblockcount
2168             << " recovery blocks will be used to repair." << endl;
2169         else if (recoverypacketmap.size())
2170           sout << "None of the recovery blocks will be used for the repair." << endl;
2171       }
2172 
2173       return true;
2174     }
2175     else
2176     {
2177       if (noiselevel > nlSilent)
2178       {
2179         sout << "Repair is not possible." << endl;
2180         sout << "You need " << missingblockcount - recoverypacketmap.size()
2181           << " more recovery blocks to be able to repair." << endl;
2182       }
2183 
2184       return false;
2185     }
2186   }
2187   else
2188   {
2189     if (noiselevel > nlSilent)
2190       sout << "All files are correct, repair is not required." << endl;
2191 
2192     return true;
2193   }
2194 
2195   return true;
2196 }
2197 
2198 // Rename any damaged or missnamed target files.
RenameTargetFiles(void)2199 bool Par2Repairer::RenameTargetFiles(void)
2200 {
2201   u32 filenumber = 0;
2202   vector<Par2RepairerSourceFile*>::iterator sf = sourcefiles.begin();
2203 
2204   // Rename any damaged target files
2205   while (sf != sourcefiles.end() && filenumber < mainpacket->TotalFileCount())
2206   {
2207     Par2RepairerSourceFile *sourcefile = *sf;
2208 
2209     // If the target file exists but is not a complete version of the file
2210     if (sourcefile->GetTargetExists() &&
2211         sourcefile->GetTargetFile() != sourcefile->GetCompleteFile())
2212     {
2213       DiskFile *targetfile = sourcefile->GetTargetFile();
2214 
2215       // Rename it
2216       diskFileMap.Remove(targetfile);
2217 
2218       if (!targetfile->Rename())
2219         return false;
2220 
2221       backuplist.push_back(targetfile);
2222 
2223       bool success = diskFileMap.Insert(targetfile);
2224       assert(success);
2225 
2226       // We no longer have a target file
2227       sourcefile->SetTargetExists(false);
2228       sourcefile->SetTargetFile(0);
2229     }
2230 
2231     ++sf;
2232     ++filenumber;
2233   }
2234 
2235   filenumber = 0;
2236   sf = sourcefiles.begin();
2237 
2238   // Rename any missnamed but complete versions of the files
2239   while (sf != sourcefiles.end() && filenumber < mainpacket->TotalFileCount())
2240   {
2241     Par2RepairerSourceFile *sourcefile = *sf;
2242 
2243     // If there is no targetfile and there is a complete version
2244     if (sourcefile->GetTargetFile() == 0 &&
2245         sourcefile->GetCompleteFile() != 0)
2246     {
2247       DiskFile *targetfile = sourcefile->GetCompleteFile();
2248 
2249       // Rename it
2250       diskFileMap.Remove(targetfile);
2251 
2252       if (!targetfile->Rename(sourcefile->TargetFileName()))
2253         return false;
2254 
2255       bool success = diskFileMap.Insert(targetfile);
2256       assert(success);
2257 
2258       // This file is now the target file
2259       sourcefile->SetTargetExists(true);
2260       sourcefile->SetTargetFile(targetfile);
2261 
2262       // We have one more complete file
2263       completefilecount++;
2264     }
2265 
2266     ++sf;
2267     ++filenumber;
2268   }
2269 
2270   return true;
2271 }
2272 
2273 // Work out which files are being repaired, create them, and allocate
2274 // target DataBlocks to them, and remember them for later verification.
CreateTargetFiles(void)2275 bool Par2Repairer::CreateTargetFiles(void)
2276 {
2277   u32 filenumber = 0;
2278   vector<Par2RepairerSourceFile*>::iterator sf = sourcefiles.begin();
2279 
2280   // Create any missing target files
2281   while (sf != sourcefiles.end() && filenumber < mainpacket->TotalFileCount())
2282   {
2283     Par2RepairerSourceFile *sourcefile = *sf;
2284 
2285     // If the file does not exist
2286     if (!sourcefile->GetTargetExists())
2287     {
2288       DiskFile *targetfile = new DiskFile(sout, serr);
2289       string filename = sourcefile->TargetFileName();
2290       u64 filesize = sourcefile->GetDescriptionPacket()->FileSize();
2291 
2292       // Create the target file
2293       if (!targetfile->Create(filename, filesize))
2294       {
2295         delete targetfile;
2296         return false;
2297       }
2298 
2299       // This file is now the target file
2300       sourcefile->SetTargetExists(true);
2301       sourcefile->SetTargetFile(targetfile);
2302 
2303       // Remember this file
2304       bool success = diskFileMap.Insert(targetfile);
2305       assert(success);
2306 
2307       u64 offset = 0;
2308       vector<DataBlock>::iterator tb = sourcefile->TargetBlocks();
2309 
2310       // Allocate all of the target data blocks
2311       while (offset < filesize)
2312       {
2313         DataBlock &datablock = *tb;
2314 
2315         datablock.SetLocation(targetfile, offset);
2316         datablock.SetLength(min(blocksize, filesize-offset));
2317 
2318         offset += blocksize;
2319         ++tb;
2320       }
2321 
2322       // Add the file to the list of those that will need to be verified
2323       // once the repair has completed.
2324       verifylist.push_back(sourcefile);
2325     }
2326 
2327     ++sf;
2328     ++filenumber;
2329   }
2330 
2331   return true;
2332 }
2333 
2334 // Work out which data blocks are available, which need to be copied
2335 // directly to the output, and which need to be recreated, and compute
2336 // the appropriate Reed Solomon matrix.
ComputeRSmatrix(void)2337 bool Par2Repairer::ComputeRSmatrix(void)
2338 {
2339   inputblocks.resize(sourceblockcount);   // The DataBlocks that will read from disk
2340   copyblocks.resize(availableblockcount); // Those DataBlocks which need to be copied
2341   outputblocks.resize(missingblockcount); // Those DataBlocks that will re recalculated
2342 
2343   vector<DataBlock*>::iterator inputblock  = inputblocks.begin();
2344   vector<DataBlock*>::iterator copyblock   = copyblocks.begin();
2345   vector<DataBlock*>::iterator outputblock = outputblocks.begin();
2346 
2347   // Build an array listing which source data blocks are present and which are missing
2348   vector<bool> present;
2349   present.resize(sourceblockcount);
2350 
2351   vector<DataBlock>::iterator sourceblock  = sourceblocks.begin();
2352   vector<DataBlock>::iterator targetblock  = targetblocks.begin();
2353   vector<bool>::iterator              pres = present.begin();
2354 
2355   // Iterate through all source blocks for all files
2356   while (sourceblock != sourceblocks.end())
2357   {
2358     // Was this block found
2359     if (sourceblock->IsSet())
2360     {
2361       //// Open the file the block was found in.
2362       //if (!sourceblock->Open())
2363       //  return false;
2364 
2365       // Record that the block was found
2366       *pres = true;
2367 
2368       // Add the block to the list of those which will be read
2369       // as input (and which might also need to be copied).
2370       *inputblock = &*sourceblock;
2371       *copyblock = &*targetblock;
2372 
2373       ++inputblock;
2374       ++copyblock;
2375     }
2376     else
2377     {
2378       // Record that the block was missing
2379       *pres = false;
2380 
2381       // Add the block to the list of those to be written
2382       *outputblock = &*targetblock;
2383       ++outputblock;
2384     }
2385 
2386     ++sourceblock;
2387     ++targetblock;
2388     ++pres;
2389   }
2390 
2391   // Set the number of source blocks and which of them are present
2392   if (!rs.SetInput(present, sout, serr))
2393     return false;
2394 
2395   // Start iterating through the available recovery packets
2396   map<u32,RecoveryPacket*>::iterator rp = recoverypacketmap.begin();
2397 
2398   // Continue to fill the remaining list of data blocks to be read
2399   while (inputblock != inputblocks.end())
2400   {
2401     // Get the next available recovery packet
2402     u32 exponent = rp->first;
2403     RecoveryPacket* recoverypacket = rp->second;
2404 
2405     // Get the DataBlock from the recovery packet
2406     DataBlock *recoveryblock = recoverypacket->GetDataBlock();
2407 
2408     //// Make sure the file is open
2409     //if (!recoveryblock->Open())
2410     //  return false;
2411 
2412     // Add the recovery block to the list of blocks that will be read
2413     *inputblock = recoveryblock;
2414 
2415     // Record that the corresponding exponent value is the next one
2416     // to use in the RS matrix
2417     if (!rs.SetOutput(true, (u16)exponent))
2418       return false;
2419 
2420     ++inputblock;
2421     ++rp;
2422   }
2423 
2424   // If we need to, compute and solve the RS matrix
2425   if (missingblockcount == 0)
2426     return true;
2427 
2428   bool success = rs.Compute(noiselevel, sout, serr);
2429 
2430   return success;
2431 }
2432 
2433 // Allocate memory buffers for reading and writing data to disk.
AllocateBuffers(size_t memorylimit)2434 bool Par2Repairer::AllocateBuffers(size_t memorylimit)
2435 {
2436   // Would single pass processing use too much memory
2437   if (blocksize * missingblockcount > memorylimit)
2438   {
2439     // Pick a size that is small enough
2440     chunksize = ~3 & (memorylimit / missingblockcount);
2441   }
2442   else
2443   {
2444     chunksize = (size_t)blocksize;
2445   }
2446 
2447   // Allocate the two buffers
2448   inputbuffer = new u8[(size_t)chunksize];
2449   outputbuffer = new u8[(size_t)chunksize * missingblockcount];
2450 
2451   if (inputbuffer == NULL || outputbuffer == NULL)
2452   {
2453     serr << "Could not allocate buffer memory." << endl;
2454     return false;
2455   }
2456 
2457   return true;
2458 }
2459 
2460 // Read source data, process it through the RS matrix and write it to disk.
ProcessData(u64 blockoffset,size_t blocklength)2461 bool Par2Repairer::ProcessData(u64 blockoffset, size_t blocklength)
2462 {
2463   u64 totalwritten = 0;
2464 
2465   // Clear the output buffer
2466   memset(outputbuffer, 0, (size_t)chunksize * missingblockcount);
2467 
2468   vector<DataBlock*>::iterator inputblock = inputblocks.begin();
2469   vector<DataBlock*>::iterator copyblock  = copyblocks.begin();
2470   u32                          inputindex = 0;
2471 
2472   DiskFile *lastopenfile = NULL;
2473 
2474   // Are there any blocks which need to be reconstructed
2475   if (missingblockcount > 0)
2476   {
2477     // For each input block
2478     while (inputblock != inputblocks.end())
2479     {
2480       // Are we reading from a new file?
2481       if (lastopenfile != (*inputblock)->GetDiskFile())
2482       {
2483         // Close the last file
2484         if (lastopenfile != NULL)
2485         {
2486           lastopenfile->Close();
2487         }
2488 
2489         // Open the new file
2490         lastopenfile = (*inputblock)->GetDiskFile();
2491         if (!lastopenfile->Open())
2492         {
2493           return false;
2494         }
2495       }
2496 
2497       // Read data from the current input block
2498       if (!(*inputblock)->ReadData(blockoffset, blocklength, inputbuffer))
2499         return false;
2500 
2501       // Have we reached the last source data block
2502       if (copyblock != copyblocks.end())
2503       {
2504         // Does this block need to be copied to the target file
2505         if ((*copyblock)->IsSet())
2506         {
2507           size_t wrote;
2508 
2509           // Write the block back to disk in the new target file
2510           if (!(*copyblock)->WriteData(blockoffset, blocklength, inputbuffer, wrote))
2511             return false;
2512 
2513           totalwritten += wrote;
2514         }
2515         ++copyblock;
2516       }
2517 
2518       // For each output block
2519       #pragma omp parallel for
2520       for (i64 outputindex=0; outputindex<missingblockcount; outputindex++)
2521       {
2522         u32 internalOutputindex = (u32) outputindex;
2523         // Select the appropriate part of the output buffer
2524         void *outbuf = &((u8*)outputbuffer)[chunksize * internalOutputindex];
2525 
2526         // Process the data
2527         rs.Process(blocklength, inputindex, inputbuffer, internalOutputindex, outbuf);
2528 
2529         if (noiselevel > nlQuiet)
2530         {
2531           // Update a progress indicator
2532           u32 oldfraction = (u32)(1000 * progress / totaldata);
2533           #pragma omp atomic
2534           progress += blocklength;
2535           u32 newfraction = (u32)(1000 * progress / totaldata);
2536 
2537           if (oldfraction != newfraction)
2538           {
2539             #pragma omp critical
2540             sout << "Repairing: " << newfraction/10 << '.' << newfraction%10 << "%\r" << flush;
2541           }
2542         }
2543       }
2544 
2545       ++inputblock;
2546       ++inputindex;
2547     }
2548   }
2549   else
2550   {
2551     // Reconstruction is not required, we are just copying blocks between files
2552 
2553     // For each block that might need to be copied
2554     while (copyblock != copyblocks.end())
2555     {
2556       // Does this block need to be copied
2557       if ((*copyblock)->IsSet())
2558       {
2559         // Are we reading from a new file?
2560         if (lastopenfile != (*inputblock)->GetDiskFile())
2561         {
2562           // Close the last file
2563           if (lastopenfile != NULL)
2564           {
2565             lastopenfile->Close();
2566           }
2567 
2568           // Open the new file
2569           lastopenfile = (*inputblock)->GetDiskFile();
2570           if (!lastopenfile->Open())
2571           {
2572             return false;
2573           }
2574         }
2575 
2576         // Read data from the current input block
2577         if (!(*inputblock)->ReadData(blockoffset, blocklength, inputbuffer))
2578           return false;
2579 
2580         size_t wrote;
2581         if (!(*copyblock)->WriteData(blockoffset, blocklength, inputbuffer, wrote))
2582           return false;
2583         totalwritten += wrote;
2584       }
2585 
2586       if (noiselevel > nlQuiet)
2587       {
2588         // Update a progress indicator
2589         u32 oldfraction = (u32)(1000 * progress / totaldata);
2590         progress += blocklength;
2591         u32 newfraction = (u32)(1000 * progress / totaldata);
2592 
2593         if (oldfraction != newfraction)
2594         {
2595           sout << "Processing: " << newfraction/10 << '.' << newfraction%10 << "%\r" << flush;
2596         }
2597       }
2598 
2599       ++copyblock;
2600       ++inputblock;
2601     }
2602   }
2603 
2604   // Close the last file
2605   if (lastopenfile != NULL)
2606   {
2607     lastopenfile->Close();
2608   }
2609 
2610   if (noiselevel > nlQuiet)
2611     sout << "Writing recovered data\r";
2612 
2613   // For each output block that has been recomputed
2614   vector<DataBlock*>::iterator outputblock = outputblocks.begin();
2615   for (u32 outputindex=0; outputindex<missingblockcount;outputindex++)
2616   {
2617     // Select the appropriate part of the output buffer
2618     char *outbuf = &((char*)outputbuffer)[chunksize * outputindex];
2619 
2620     // Write the data to the target file
2621     size_t wrote;
2622     if (!(*outputblock)->WriteData(blockoffset, blocklength, outbuf, wrote))
2623       return false;
2624     totalwritten += wrote;
2625 
2626     ++outputblock;
2627   }
2628 
2629   if (noiselevel > nlQuiet)
2630     sout << "Wrote " << totalwritten << " bytes to disk" << endl;
2631 
2632   return true;
2633 }
2634 
2635 // Verify that all of the reconstructed target files are now correct
VerifyTargetFiles(const string & basepath)2636 bool Par2Repairer::VerifyTargetFiles(const string &basepath)
2637 {
2638   bool finalresult = true;
2639 
2640   // Verify the target files in alphabetical order
2641   sort(verifylist.begin(), verifylist.end(), SortSourceFilesByFileName);
2642 
2643 #ifdef _OPENMP
2644   mttotalsize = 0;
2645   mttotalprogress = 0;
2646 
2647   for (size_t i=0; i<verifylist.size(); ++i)
2648   {
2649     if (verifylist[i])
2650       mttotalsize += verifylist[i]->GetDescriptionPacket()->FileSize();
2651   }
2652 #endif
2653 
2654   // Iterate through each file in the verification list
2655   #pragma omp parallel for schedule(dynamic) num_threads(Par2Repairer::GetFileThreads())
2656   for (int i=0; i< static_cast<int>(verifylist.size()); ++i)
2657   {
2658     Par2RepairerSourceFile *sourcefile = verifylist[i];
2659     DiskFile *targetfile = sourcefile->GetTargetFile();
2660 
2661     // Close the file
2662     if (targetfile->IsOpen())
2663       targetfile->Close();
2664 
2665     // Mark all data blocks for the file as unknown
2666     vector<DataBlock>::iterator sb = sourcefile->SourceBlocks();
2667     for (u32 blocknumber=0; blocknumber<sourcefile->BlockCount(); blocknumber++)
2668     {
2669       sb->ClearLocation();
2670       ++sb;
2671     }
2672 
2673     // Say we don't have a complete version of the file
2674     sourcefile->SetCompleteFile(0);
2675 
2676     // Re-open the target file
2677     if (!targetfile->Open())
2678     {
2679       finalresult = false;
2680       continue;
2681     }
2682 
2683     // Verify the file again
2684     if (!VerifyDataFile(targetfile, sourcefile, basepath))
2685       finalresult = false;
2686 
2687     // Close the file again
2688     targetfile->Close();
2689   }
2690 
2691   // Find out how much data we have found
2692   UpdateVerificationResults();
2693 
2694   return finalresult;
2695 }
2696 
2697 // Delete all of the partly reconstructed files
DeleteIncompleteTargetFiles(void)2698 bool Par2Repairer::DeleteIncompleteTargetFiles(void)
2699 {
2700   vector<Par2RepairerSourceFile*>::iterator sf = verifylist.begin();
2701 
2702   // Iterate through each file in the verification list
2703   while (sf != verifylist.end())
2704   {
2705     Par2RepairerSourceFile *sourcefile = *sf;
2706     if (sourcefile->GetTargetExists())
2707     {
2708       DiskFile *targetfile = sourcefile->GetTargetFile();
2709 
2710       // Close and delete the file
2711       if (targetfile->IsOpen())
2712         targetfile->Close();
2713       targetfile->Delete();
2714 
2715       // Forget the file
2716       diskFileMap.Remove(targetfile);
2717       delete targetfile;
2718 
2719       // There is no target file
2720       sourcefile->SetTargetExists(false);
2721       sourcefile->SetTargetFile(0);
2722     }
2723 
2724     ++sf;
2725   }
2726 
2727   return true;
2728 }
2729 
RemoveBackupFiles(void)2730 bool Par2Repairer::RemoveBackupFiles(void)
2731 {
2732   vector<DiskFile*>::iterator bf = backuplist.begin();
2733 
2734   if (noiselevel > nlSilent
2735       && bf != backuplist.end())
2736   {
2737     sout << endl << "Purge backup files." << endl;
2738   }
2739 
2740   // Iterate through each file in the backuplist
2741   while (bf != backuplist.end())
2742   {
2743     if (noiselevel > nlSilent)
2744     {
2745       string name;
2746       string path;
2747       DiskFile::SplitFilename((*bf)->FileName(), path, name);
2748       sout << "Remove \"" << name << "\"." << endl;
2749     }
2750 
2751     if ((*bf)->IsOpen())
2752       (*bf)->Close();
2753     (*bf)->Delete();
2754 
2755     ++bf;
2756   }
2757 
2758   return true;
2759 }
2760 
RemoveParFiles(void)2761 bool Par2Repairer::RemoveParFiles(void)
2762 {
2763   if (noiselevel > nlSilent
2764       && !par2list.empty())
2765   {
2766     sout << endl << "Purge par files." << endl;
2767   }
2768 
2769   for (list<string>::const_iterator s=par2list.begin(); s!=par2list.end(); ++s)
2770   {
2771     DiskFile *diskfile = new DiskFile(sout, serr);
2772 
2773     if (diskfile->Open(*s))
2774     {
2775       if (noiselevel > nlSilent)
2776       {
2777         string name;
2778         string path;
2779         DiskFile::SplitFilename((*s), path, name);
2780         sout << "Remove \"" << name << "\"." << endl;
2781       }
2782 
2783       if (diskfile->IsOpen())
2784         diskfile->Close();
2785       diskfile->Delete();
2786     }
2787 
2788     delete diskfile;
2789   }
2790 
2791   return true;
2792 }
2793