1 // This file is part of par2cmdline (a PAR 2.0 compatible file verification and
2 // repair tool). See http://parchive.sourceforge.net for details of PAR 2.0.
3 //
4 // Copyright (c) 2003 Peter Brian Clements
5 // Copyright (c) 2019 Michael D. Nahas
6 //
7 // par2cmdline is free software; you can redistribute it and/or modify
8 // it under the terms of the GNU General Public License as published by
9 // the Free Software Foundation; either version 2 of the License, or
10 // (at your option) any later version.
11 //
12 // par2cmdline is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 // GNU General Public License for more details.
16 //
17 // You should have received a copy of the GNU General Public License
18 // along with this program; if not, write to the Free Software
19 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20
21 #include "libpar2internal.h"
22
23 #ifdef _MSC_VER
24 #ifdef _DEBUG
25 #undef THIS_FILE
26 static char THIS_FILE[]=__FILE__;
27 #define new DEBUG_NEW
28 #endif
29 #endif
30
31
32 // static variable
33 #ifdef _OPENMP
34 u32 Par2Repairer::filethreads = _FILE_THREADS;
35 #endif
36
37
Par2Repairer(std::ostream & sout,std::ostream & serr,const NoiseLevel noiselevel)38 Par2Repairer::Par2Repairer(std::ostream &sout, std::ostream &serr, const NoiseLevel noiselevel)
39 : sout(sout)
40 , serr(serr)
41 , noiselevel(noiselevel)
42 , searchpath()
43 , basepath()
44 , setid()
45 , recoverypacketmap()
46 , diskFileMap()
47 , sourcefilemap()
48 , sourcefiles()
49 , verifylist()
50 , backuplist()
51 , par2list()
52 , sourceblocks()
53 , targetblocks()
54 , windowmask(0)
55 , blockverifiable(false)
56 , verificationhashtable()
57 , unverifiablesourcefiles()
58 , inputblocks()
59 , copyblocks()
60 , outputblocks()
61 , rs()
62 {
63 skipdata = false;
64 skipleaway = 0;
65
66 firstpacket = true;
67 mainpacket = 0;
68 creatorpacket = 0;
69
70 blocksize = 0;
71 chunksize = 0;
72
73 sourceblockcount = 0;
74 availableblockcount = 0;
75 missingblockcount = 0;
76
77 memset(windowtable, 0, sizeof(windowtable));
78
79 blocksallocated = false;
80
81 completefilecount = 0;
82 renamedfilecount = 0;
83 damagedfilecount = 0;
84 missingfilecount = 0;
85
86 inputbuffer = 0;
87 outputbuffer = 0;
88
89 progress = 0;
90 totaldata = 0;
91
92 #ifdef _OPENMP
93 mttotalsize = 0;
94 mttotalextrasize = 0;
95 mttotalprogress = 0;
96 mtprocessingextrafiles = false;
97 #endif
98 }
99
~Par2Repairer(void)100 Par2Repairer::~Par2Repairer(void)
101 {
102 delete [] (u8*)inputbuffer;
103 delete [] (u8*)outputbuffer;
104
105 map<u32,RecoveryPacket*>::iterator rp = recoverypacketmap.begin();
106 while (rp != recoverypacketmap.end())
107 {
108 delete (*rp).second;
109
110 ++rp;
111 }
112
113 map<MD5Hash,Par2RepairerSourceFile*>::iterator sf = sourcefilemap.begin();
114 while (sf != sourcefilemap.end())
115 {
116 Par2RepairerSourceFile *sourcefile = (*sf).second;
117 delete sourcefile;
118
119 ++sf;
120 }
121
122 delete mainpacket;
123 delete creatorpacket;
124 }
125
Process(const size_t memorylimit,const string & _basepath,const u32 nthreads,const u32 _filethreads,string parfilename,const vector<string> & _extrafiles,const bool dorepair,const bool purgefiles,const bool _skipdata,const u64 _skipleaway)126 Result Par2Repairer::Process(
127 const size_t memorylimit,
128 const string &_basepath,
129 #ifdef _OPENMP
130 const u32 nthreads,
131 const u32 _filethreads,
132 #endif
133 string parfilename,
134 const vector<string> &_extrafiles,
135 const bool dorepair, // derived from operation
136 const bool purgefiles,
137 const bool _skipdata,
138 const u64 _skipleaway
139 )
140 {
141 #ifdef _OPENMP
142 filethreads = _filethreads;
143 #endif
144
145 // Should we skip data whilst scanning files
146 skipdata = _skipdata;
147
148 // How much leaway should we allow when scanning files
149 skipleaway = _skipleaway;
150
151 // Get filenames from the command line
152 basepath = _basepath;
153 std::vector<string> extrafiles = _extrafiles;
154
155 #ifdef _OPENMP
156 // Set the number of threads
157 if (nthreads != 0)
158 omp_set_num_threads(nthreads);
159 #endif
160
161 // Determine the searchpath from the location of the main PAR2 file
162 string name;
163 DiskFile::SplitFilename(parfilename, searchpath, name);
164
165 par2list.push_back(parfilename);
166
167 // Load packets from the main PAR2 file
168 if (!LoadPacketsFromFile(searchpath + name))
169 return eLogicError;
170
171 // Load packets from other PAR2 files with names based on the original PAR2 file
172 if (!LoadPacketsFromOtherFiles(parfilename))
173 return eLogicError;
174
175 // Load packets from any other PAR2 files whose names are given on the command line
176 if (!LoadPacketsFromExtraFiles(extrafiles))
177 return eLogicError;
178
179 if (noiselevel > nlQuiet)
180 sout << endl;
181
182 // Check that the packets are consistent and discard any that are not
183 if (!CheckPacketConsistency())
184 return eInsufficientCriticalData;
185
186 // Use the information in the main packet to get the source files
187 // into the correct order and determine their filenames
188 if (!CreateSourceFileList())
189 return eLogicError;
190
191 // Determine the total number of DataBlocks for the recoverable source files
192 // The allocate the DataBlocks and assign them to each source file
193 if (!AllocateSourceBlocks())
194 return eLogicError;
195
196 // Create a verification hash table for all files for which we have not
197 // found a complete version of the file and for which we have
198 // a verification packet
199 if (!PrepareVerificationHashTable())
200 return eLogicError;
201
202 // Compute the table for the sliding CRC computation
203 if (!ComputeWindowTable())
204 return eLogicError;
205
206 // Attempt to verify all of the source files
207 if (!VerifySourceFiles(basepath, extrafiles))
208 return eFileIOError;
209
210 if (completefilecount < mainpacket->RecoverableFileCount())
211 {
212 // Scan any extra files specified on the command line
213 if (!VerifyExtraFiles(extrafiles, basepath))
214 return eLogicError;
215 }
216
217 // Find out how much data we have found
218 UpdateVerificationResults();
219
220 if (noiselevel > nlSilent)
221 sout << endl;
222
223 // Check the verification results and report the results
224 if (!CheckVerificationResults())
225 return eRepairNotPossible;
226
227 // Are any of the files incomplete
228 if (completefilecount < mainpacket->RecoverableFileCount())
229 {
230 // Do we want to carry out a repair
231 if (dorepair)
232 {
233 if (noiselevel > nlSilent)
234 sout << endl;
235
236 // Rename any damaged or missnamed target files.
237 if (!RenameTargetFiles())
238 return eFileIOError;
239
240 // Are we still missing any files
241 if (completefilecount < mainpacket->RecoverableFileCount())
242 {
243 // Work out which files are being repaired, create them, and allocate
244 // target DataBlocks to them, and remember them for later verification.
245 if (!CreateTargetFiles())
246 return eFileIOError;
247
248 // Work out which data blocks are available, which need to be copied
249 // directly to the output, and which need to be recreated, and compute
250 // the appropriate Reed Solomon matrix.
251 if (!ComputeRSmatrix())
252 {
253 // Delete all of the partly reconstructed files
254 DeleteIncompleteTargetFiles();
255 return eFileIOError;
256 }
257
258 if (noiselevel > nlSilent)
259 sout << endl;
260
261 // Allocate memory buffers for reading and writing data to disk.
262 if (!AllocateBuffers(memorylimit))
263 {
264 // Delete all of the partly reconstructed files
265 DeleteIncompleteTargetFiles();
266 return eMemoryError;
267 }
268
269 // Set the total amount of data to be processed.
270 progress = 0;
271 totaldata = blocksize * sourceblockcount * (missingblockcount > 0 ? missingblockcount : 1);
272
273 // Start at an offset of 0 within a block.
274 u64 blockoffset = 0;
275 while (blockoffset < blocksize) // Continue until the end of the block.
276 {
277 // Work out how much data to process this time.
278 size_t blocklength = (size_t)min((u64)chunksize, blocksize-blockoffset);
279
280 // Read source data, process it through the RS matrix and write it to disk.
281 if (!ProcessData(blockoffset, blocklength))
282 {
283 // Delete all of the partly reconstructed files
284 DeleteIncompleteTargetFiles();
285 return eFileIOError;
286 }
287
288 // Advance to the need offset within each block
289 blockoffset += blocklength;
290 }
291
292 if (noiselevel > nlSilent)
293 sout << endl << "Verifying repaired files:" << endl << endl;
294
295 // Verify that all of the reconstructed target files are now correct
296 if (!VerifyTargetFiles(basepath))
297 {
298 // Delete all of the partly reconstructed files
299 DeleteIncompleteTargetFiles();
300 return eFileIOError;
301 }
302 }
303
304 // Are all of the target files now complete?
305 if (completefilecount<mainpacket->RecoverableFileCount())
306 {
307 serr << "Repair Failed." << endl;
308 return eRepairFailed;
309 }
310 else
311 {
312 if (noiselevel > nlSilent)
313 sout << endl << "Repair complete." << endl;
314 }
315 }
316 else
317 {
318 return eRepairPossible;
319 }
320 }
321
322 if (purgefiles == true)
323 {
324 RemoveBackupFiles();
325 RemoveParFiles();
326 }
327
328 return eSuccess;
329 }
330
331 // Load the packets from the specified file
LoadPacketsFromFile(string filename)332 bool Par2Repairer::LoadPacketsFromFile(string filename)
333 {
334 // Skip the file if it has already been processed
335 if (diskFileMap.Find(filename) != 0)
336 {
337 return true;
338 }
339
340 DiskFile *diskfile = new DiskFile(sout, serr);
341
342 // Open the file
343 if (!diskfile->Open(filename))
344 {
345 // If we could not open the file, ignore the error and
346 // proceed to the next file
347 delete diskfile;
348 return true;
349 }
350
351 if (noiselevel > nlSilent)
352 {
353 string path;
354 string name;
355 DiskFile::SplitFilename(filename, path, name);
356 sout << "Loading \"" << name << "\"." << endl;
357 }
358
359 // How many useable packets have we found
360 u32 packets = 0;
361
362 // How many recovery packets were there
363 u32 recoverypackets = 0;
364
365 // How big is the file
366 u64 filesize = diskfile->FileSize();
367 if (filesize > 0)
368 {
369 // Allocate a buffer to read data into
370 // The buffer should be large enough to hold a whole
371 // critical packet (i.e. file verification, file description, main,
372 // and creator), but not necessarily a whole recovery packet.
373 size_t buffersize = (size_t)min((u64)1048576, filesize);
374 u8 *buffer = new u8[buffersize];
375
376 // Progress indicator
377 u64 progress = 0;
378
379 // Start at the beginning of the file
380 u64 offset = 0;
381
382 // Continue as long as there is at least enough for the packet header
383 while (offset + sizeof(PACKET_HEADER) <= filesize)
384 {
385 if (noiselevel > nlQuiet)
386 {
387 // Update a progress indicator
388 u32 oldfraction = (u32)(1000 * progress / filesize);
389 u32 newfraction = (u32)(1000 * offset / filesize);
390 if (oldfraction != newfraction)
391 {
392 sout << "Loading: " << newfraction/10 << '.' << newfraction%10 << "%\r" << flush;
393 progress = offset;
394 }
395 }
396
397 // Attempt to read the next packet header
398 PACKET_HEADER header;
399 if (!diskfile->Read(offset, &header, sizeof(header)))
400 break;
401
402 // Does this look like it might be a packet
403 if (packet_magic != header.magic)
404 {
405 offset++;
406
407 // Is there still enough for at least a whole packet header
408 while (offset + sizeof(PACKET_HEADER) <= filesize)
409 {
410 // How much can we read into the buffer
411 size_t want = (size_t)min((u64)buffersize, filesize-offset);
412
413 // Fill the buffer
414 if (!diskfile->Read(offset, buffer, want))
415 {
416 offset = filesize;
417 break;
418 }
419
420 // Scan the buffer for the magic value
421 u8 *current = buffer;
422 u8 *limit = &buffer[want-sizeof(PACKET_HEADER)];
423 while (current <= limit && packet_magic != ((PACKET_HEADER*)current)->magic)
424 {
425 current++;
426 }
427
428 // What file offset did we reach
429 offset += current-buffer;
430
431 // Did we find the magic
432 if (current <= limit)
433 {
434 memcpy(&header, current, sizeof(header));
435 break;
436 }
437 }
438
439 // Did we reach the end of the file
440 if (offset + sizeof(PACKET_HEADER) > filesize)
441 {
442 break;
443 }
444 }
445
446 // We have found the magic
447
448 // Check the packet length
449 if (sizeof(PACKET_HEADER) > header.length || // packet length is too small
450 0 != (header.length & 3) || // packet length is not a multiple of 4
451 filesize < offset + header.length) // packet would extend beyond the end of the file
452 {
453 offset++;
454 continue;
455 }
456
457 // Compute the MD5 Hash of the packet
458 MD5Context context;
459 context.Update(&header.setid, sizeof(header)-offsetof(PACKET_HEADER, setid));
460
461 // How much more do I need to read to get the whole packet
462 u64 current = offset+sizeof(PACKET_HEADER);
463 u64 limit = offset+header.length;
464 while (current < limit)
465 {
466 size_t want = (size_t)min((u64)buffersize, limit-current);
467
468 if (!diskfile->Read(current, buffer, want))
469 break;
470
471 context.Update(buffer, want);
472
473 current += want;
474 }
475
476 // Did the whole packet get processed
477 if (current<limit)
478 {
479 offset++;
480 continue;
481 }
482
483 // Check the calculated packet hash against the value in the header
484 MD5Hash hash;
485 context.Final(hash);
486 if (hash != header.hash)
487 {
488 offset++;
489 continue;
490 }
491
492 // If this is the first packet that we have found then record the setid
493 if (firstpacket)
494 {
495 setid = header.setid;
496 firstpacket = false;
497 }
498
499 // Is the packet from the correct set
500 if (setid == header.setid)
501 {
502 // Is it a packet type that we are interested in
503 if (recoveryblockpacket_type == header.type)
504 {
505 if (LoadRecoveryPacket(diskfile, offset, header))
506 {
507 recoverypackets++;
508 packets++;
509 }
510 }
511 else if (fileverificationpacket_type == header.type)
512 {
513 if (LoadVerificationPacket(diskfile, offset, header))
514 {
515 packets++;
516 }
517 }
518 else if (filedescriptionpacket_type == header.type)
519 {
520 if (LoadDescriptionPacket(diskfile, offset, header))
521 {
522 packets++;
523 }
524 }
525 else if (mainpacket_type == header.type)
526 {
527 if (LoadMainPacket(diskfile, offset, header))
528 {
529 packets++;
530 }
531 }
532 else if (creatorpacket_type == header.type)
533 {
534 if (LoadCreatorPacket(diskfile, offset, header))
535 {
536 packets++;
537 }
538 }
539 }
540
541 // Advance to the next packet
542 offset += header.length;
543 }
544
545 delete [] buffer;
546 }
547
548 // We have finished with the file for now
549 diskfile->Close();
550
551 // Did we actually find any interesting packets
552 if (packets > 0)
553 {
554 if (noiselevel > nlQuiet)
555 {
556 sout << "Loaded " << packets << " new packets";
557 if (recoverypackets > 0) sout << " including " << recoverypackets << " recovery blocks";
558 sout << endl;
559 }
560
561 // Remember that the file was processed
562 bool success = diskFileMap.Insert(diskfile);
563 assert(success);
564 }
565 else
566 {
567 if (noiselevel > nlQuiet)
568 sout << "No new packets found" << endl;
569 delete diskfile;
570 }
571
572 return true;
573 }
574
575 // Finish loading a recovery packet
LoadRecoveryPacket(DiskFile * diskfile,u64 offset,PACKET_HEADER & header)576 bool Par2Repairer::LoadRecoveryPacket(DiskFile *diskfile, u64 offset, PACKET_HEADER &header)
577 {
578 RecoveryPacket *packet = new RecoveryPacket;
579
580 // Load the packet from disk
581 if (!packet->Load(diskfile, offset, header))
582 {
583 delete packet;
584 return false;
585 }
586
587 // What is the exponent value of this recovery packet
588 u32 exponent = packet->Exponent();
589
590 // Try to insert the new packet into the recovery packet map
591 pair<map<u32,RecoveryPacket*>::const_iterator, bool> location = recoverypacketmap.insert(pair<u32,RecoveryPacket*>(exponent, packet));
592
593 // Did the insert fail
594 if (!location.second)
595 {
596 // The packet must be a duplicate of one we already have
597 delete packet;
598 return false;
599 }
600
601 return true;
602 }
603
604 // Finish loading a file description packet
LoadDescriptionPacket(DiskFile * diskfile,u64 offset,PACKET_HEADER & header)605 bool Par2Repairer::LoadDescriptionPacket(DiskFile *diskfile, u64 offset, PACKET_HEADER &header)
606 {
607 DescriptionPacket *packet = new DescriptionPacket;
608
609 // Load the packet from disk
610 if (!packet->Load(diskfile, offset, header))
611 {
612 delete packet;
613 return false;
614 }
615
616 // What is the fileid
617 const MD5Hash &fileid = packet->FileId();
618
619 // Look up the fileid in the source file map for an existing source file entry
620 map<MD5Hash, Par2RepairerSourceFile*>::iterator sfmi = sourcefilemap.find(fileid);
621 Par2RepairerSourceFile *sourcefile = (sfmi == sourcefilemap.end()) ? 0 :sfmi->second;
622
623 // Was there an existing source file
624 if (sourcefile)
625 {
626 // Does the source file already have a description packet
627 if (sourcefile->GetDescriptionPacket())
628 {
629 // Yes. We don't need another copy
630 delete packet;
631 return false;
632 }
633 else
634 {
635 // No. Store the packet in the source file
636 sourcefile->SetDescriptionPacket(packet);
637 return true;
638 }
639 }
640 else
641 {
642 // Create a new source file for the packet
643 sourcefile = new Par2RepairerSourceFile(packet, NULL);
644
645 // Record the source file in the source file map
646 sourcefilemap.insert(pair<MD5Hash, Par2RepairerSourceFile*>(fileid, sourcefile));
647
648 return true;
649 }
650 }
651
652 // Finish loading a file verification packet
LoadVerificationPacket(DiskFile * diskfile,u64 offset,PACKET_HEADER & header)653 bool Par2Repairer::LoadVerificationPacket(DiskFile *diskfile, u64 offset, PACKET_HEADER &header)
654 {
655 VerificationPacket *packet = new VerificationPacket;
656
657 // Load the packet from disk
658 if (!packet->Load(diskfile, offset, header))
659 {
660 delete packet;
661 return false;
662 }
663
664 // What is the fileid
665 const MD5Hash &fileid = packet->FileId();
666
667 // Look up the fileid in the source file map for an existing source file entry
668 map<MD5Hash, Par2RepairerSourceFile*>::iterator sfmi = sourcefilemap.find(fileid);
669 Par2RepairerSourceFile *sourcefile = (sfmi == sourcefilemap.end()) ? 0 :sfmi->second;
670
671 // Was there an existing source file
672 if (sourcefile)
673 {
674 // Does the source file already have a verification packet
675 if (sourcefile->GetVerificationPacket())
676 {
677 // Yes. We don't need another copy.
678 delete packet;
679 return false;
680 }
681 else
682 {
683 // No. Store the packet in the source file
684 sourcefile->SetVerificationPacket(packet);
685
686 return true;
687 }
688 }
689 else
690 {
691 // Create a new source file for the packet
692 sourcefile = new Par2RepairerSourceFile(NULL, packet);
693
694 // Record the source file in the source file map
695 sourcefilemap.insert(pair<MD5Hash, Par2RepairerSourceFile*>(fileid, sourcefile));
696
697 return true;
698 }
699 }
700
701 // Finish loading the main packet
LoadMainPacket(DiskFile * diskfile,u64 offset,PACKET_HEADER & header)702 bool Par2Repairer::LoadMainPacket(DiskFile *diskfile, u64 offset, PACKET_HEADER &header)
703 {
704 // Do we already have a main packet
705 if (0 != mainpacket)
706 return false;
707
708 MainPacket *packet = new MainPacket;
709
710 // Load the packet from disk;
711 if (!packet->Load(diskfile, offset, header))
712 {
713 delete packet;
714 return false;
715 }
716
717 mainpacket = packet;
718
719 return true;
720 }
721
722 // Finish loading the creator packet
LoadCreatorPacket(DiskFile * diskfile,u64 offset,PACKET_HEADER & header)723 bool Par2Repairer::LoadCreatorPacket(DiskFile *diskfile, u64 offset, PACKET_HEADER &header)
724 {
725 // Do we already have a creator packet
726 if (0 != creatorpacket)
727 return false;
728
729 CreatorPacket *packet = new CreatorPacket;
730
731 // Load the packet from disk;
732 if (!packet->Load(diskfile, offset, header))
733 {
734 delete packet;
735 return false;
736 }
737
738 creatorpacket = packet;
739
740 return true;
741 }
742
743 // Load packets from other PAR2 files with names based on the original PAR2 file
LoadPacketsFromOtherFiles(string filename)744 bool Par2Repairer::LoadPacketsFromOtherFiles(string filename)
745 {
746 // Split the original PAR2 filename into path and name parts
747 string path;
748 string name;
749 DiskFile::SplitFilename(filename, path, name);
750
751 string::size_type where;
752
753 // Trim ".par2" off of the end original name
754
755 // Look for the last "." in the filename
756 while (string::npos != (where = name.find_last_of('.')))
757 {
758 // Trim what follows the last .
759 string tail = name.substr(where+1);
760 name = name.substr(0,where);
761
762 // Was what followed the last "." "par2"
763 if (0 == stricmp(tail.c_str(), "par2"))
764 break;
765 }
766
767 // If what is left ends in ".volNNN-NNN" or ".volNNN+NNN" strip that as well
768
769 // Is there another "."
770 if (string::npos != (where = name.find_last_of('.')))
771 {
772 // What follows the "."
773 string tail = name.substr(where+1);
774
775 // Scan what follows the last "." to see of it matches vol123-456 or vol123+456
776 int n = 0;
777 string::const_iterator p;
778 for (p=tail.begin(); p!=tail.end(); ++p)
779 {
780 char ch = *p;
781
782 if (0 == n)
783 {
784 if (tolower(ch) == 'v') { n++; } else { break; }
785 }
786 else if (1 == n)
787 {
788 if (tolower(ch) == 'o') { n++; } else { break; }
789 }
790 else if (2 == n)
791 {
792 if (tolower(ch) == 'l') { n++; } else { break; }
793 }
794 else if (3 == n)
795 {
796 if (isdigit(ch)) {} else if (ch == '-' || ch == '+') { n++; } else { break; }
797 }
798 else if (4 == n)
799 {
800 if (isdigit(ch)) {} else { break; }
801 }
802 }
803
804 // If we matched then retain only what precedes the "."
805 if (p == tail.end())
806 {
807 name = name.substr(0,where);
808 }
809 }
810
811 // Find files called "*.par2" or "name.*.par2"
812
813 {
814 string wildcard = name.empty() ? "*.par2" : name + ".*.par2";
815 std::unique_ptr< list<string> > files(
816 DiskFile::FindFiles(path, wildcard, false)
817 );
818 par2list.merge(*files);
819
820 string wildcardu = name.empty() ? "*.PAR2" : name + ".*.PAR2";
821 std::unique_ptr< list<string> > filesu(
822 DiskFile::FindFiles(path, wildcardu, false)
823 );
824 par2list.merge(*filesu);
825
826 // Load packets from each file that was found
827 for (list<string>::const_iterator s=par2list.begin(); s!=par2list.end(); ++s)
828 {
829 LoadPacketsFromFile(*s);
830 }
831
832 // delete files; Taken care of by unique_ptr<>
833 // delete filesu;
834 }
835
836 return true;
837 }
838
839 // Load packets from any other PAR2 files whose names are given on the command line
LoadPacketsFromExtraFiles(const vector<string> & extrafiles)840 bool Par2Repairer::LoadPacketsFromExtraFiles(const vector<string> &extrafiles)
841 {
842 for (vector<string>::const_iterator i=extrafiles.begin(); i!=extrafiles.end(); i++)
843 {
844 string filename = *i;
845
846 // If the filename contains ".par2" anywhere
847 if (string::npos != filename.find(".par2") ||
848 string::npos != filename.find(".PAR2"))
849 {
850 LoadPacketsFromFile(filename);
851 }
852 }
853
854 return true;
855 }
856
857 // Check that the packets are consistent and discard any that are not
CheckPacketConsistency(void)858 bool Par2Repairer::CheckPacketConsistency(void)
859 {
860 // Do we have a main packet
861 if (0 == mainpacket)
862 {
863 // If we don't have a main packet, then there is nothing more that we can do.
864 // We cannot verify or repair any files.
865
866 serr << "Main packet not found." << endl;
867 return false;
868 }
869
870 // Remember the block size from the main packet
871 blocksize = mainpacket->BlockSize();
872
873 // Check that the recovery blocks have the correct amount of data
874 // and discard any that don't
875 {
876 map<u32,RecoveryPacket*>::iterator rp = recoverypacketmap.begin();
877 while (rp != recoverypacketmap.end())
878 {
879 if (rp->second->BlockSize() == blocksize)
880 {
881 ++rp;
882 }
883 else
884 {
885 serr << "Incorrect sized recovery block for exponent " << rp->second->Exponent() << " discarded" << endl;
886
887 delete rp->second;
888 map<u32,RecoveryPacket*>::iterator x = rp++;
889 recoverypacketmap.erase(x);
890 }
891 }
892 }
893
894 // Check for source files that have no description packet or where the
895 // verification packet has the wrong number of entries and discard them.
896 {
897 map<MD5Hash, Par2RepairerSourceFile*>::iterator sf = sourcefilemap.begin();
898 while (sf != sourcefilemap.end())
899 {
900 // Do we have a description packet
901 DescriptionPacket *descriptionpacket = sf->second->GetDescriptionPacket();
902 if (descriptionpacket == 0)
903 {
904 // No description packet
905
906 // Discard the source file
907 delete sf->second;
908 map<MD5Hash, Par2RepairerSourceFile*>::iterator x = sf++;
909 sourcefilemap.erase(x);
910
911 continue;
912 }
913
914 // Compute and store the block count from the filesize and blocksize
915 sf->second->SetBlockCount(blocksize);
916
917 // Do we have a verification packet
918 VerificationPacket *verificationpacket = sf->second->GetVerificationPacket();
919 if (verificationpacket == 0)
920 {
921 // No verification packet
922
923 // That is ok, but we won't be able to use block verification.
924
925 // Proceed to the next file.
926 ++sf;
927
928 continue;
929 }
930
931 // Work out the block count for the file from the file size
932 // and compare that with the verification packet
933 u64 filesize = descriptionpacket->FileSize();
934 u32 blockcount = verificationpacket->BlockCount();
935
936 if ((filesize + blocksize-1) / blocksize != (u64)blockcount)
937 {
938 // The block counts are different!
939
940 serr << "Incorrectly sized verification packet for \"" << descriptionpacket->FileName() << "\" discarded" << endl;
941
942 // Discard the source file
943
944 delete sf->second;
945 map<MD5Hash, Par2RepairerSourceFile*>::iterator x = sf++;
946 sourcefilemap.erase(x);
947
948 continue;
949 }
950
951 // Everything is ok.
952
953 // Proceed to the next file
954 ++sf;
955 }
956 }
957
958 if (noiselevel > nlQuiet)
959 {
960 sout << "There are "
961 << mainpacket->RecoverableFileCount()
962 << " recoverable files and "
963 << mainpacket->TotalFileCount() - mainpacket->RecoverableFileCount()
964 << " other files."
965 << endl;
966
967 sout << "The block size used was "
968 << blocksize
969 << " bytes."
970 << endl;
971 }
972
973 return true;
974 }
975
976 // Use the information in the main packet to get the source files
977 // into the correct order and determine their filenames
CreateSourceFileList(void)978 bool Par2Repairer::CreateSourceFileList(void)
979 {
980 // For each FileId entry in the main packet
981 for (u32 filenumber=0; filenumber<mainpacket->TotalFileCount(); filenumber++)
982 {
983 const MD5Hash &fileid = mainpacket->FileId(filenumber);
984
985 // Look up the fileid in the source file map
986 map<MD5Hash, Par2RepairerSourceFile*>::iterator sfmi = sourcefilemap.find(fileid);
987 Par2RepairerSourceFile *sourcefile = (sfmi == sourcefilemap.end()) ? 0 :sfmi->second;
988
989 if (sourcefile)
990 {
991 sourcefile->ComputeTargetFileName(sout, serr, noiselevel, basepath);
992
993 #ifdef _OPENMP
994 // Need actual filesize on disk for mt-progress line
995 sourcefile->SetDiskFileSize();
996 #endif
997 }
998
999 sourcefiles.push_back(sourcefile);
1000 }
1001
1002 return true;
1003 }
1004
1005 // Determine the total number of DataBlocks for the recoverable source files
1006 // The allocate the DataBlocks and assign them to each source file
AllocateSourceBlocks(void)1007 bool Par2Repairer::AllocateSourceBlocks(void)
1008 {
1009 sourceblockcount = 0;
1010
1011 u32 filenumber = 0;
1012 vector<Par2RepairerSourceFile*>::iterator sf = sourcefiles.begin();
1013
1014 // For each recoverable source file
1015 while (filenumber < mainpacket->RecoverableFileCount() && sf != sourcefiles.end())
1016 {
1017 // Do we have a source file
1018 Par2RepairerSourceFile *sourcefile = *sf;
1019 if (sourcefile)
1020 {
1021 sourceblockcount += sourcefile->BlockCount();
1022 }
1023 else
1024 {
1025 // No details for this source file so we don't know what the
1026 // total number of source blocks is
1027 // sourceblockcount = 0;
1028 // break;
1029 }
1030
1031 ++sf;
1032 ++filenumber;
1033 }
1034
1035 // Did we determine the total number of source blocks
1036 if (sourceblockcount > 0)
1037 {
1038 // Yes.
1039
1040 // Allocate all of the Source and Target DataBlocks (which will be used
1041 // to read and write data to disk).
1042
1043 sourceblocks.resize(sourceblockcount);
1044 targetblocks.resize(sourceblockcount);
1045
1046 // Which DataBlocks will be allocated first
1047 vector<DataBlock>::iterator sourceblock = sourceblocks.begin();
1048 vector<DataBlock>::iterator targetblock = targetblocks.begin();
1049
1050 u64 totalsize = 0;
1051 u32 blocknumber = 0;
1052
1053 filenumber = 0;
1054 sf = sourcefiles.begin();
1055
1056 while (filenumber < mainpacket->RecoverableFileCount() && sf != sourcefiles.end())
1057 {
1058 Par2RepairerSourceFile *sourcefile = *sf;
1059
1060 if (sourcefile)
1061 {
1062 totalsize += sourcefile->GetDescriptionPacket()->FileSize();
1063 u32 blockcount = sourcefile->BlockCount();
1064
1065 // Allocate the source and target DataBlocks to the sourcefile
1066 sourcefile->SetBlocks(blocknumber, blockcount, sourceblock, targetblock, blocksize);
1067
1068 blocknumber++;
1069
1070 sourceblock += blockcount;
1071 targetblock += blockcount;
1072 }
1073
1074 ++sf;
1075 ++filenumber;
1076 }
1077
1078 blocksallocated = true;
1079
1080 if (noiselevel > nlQuiet)
1081 {
1082 sout << "There are a total of "
1083 << sourceblockcount
1084 << " data blocks."
1085 << endl;
1086
1087 sout << "The total size of the data files is "
1088 << totalsize
1089 << " bytes."
1090 << endl;
1091 }
1092 }
1093
1094 return true;
1095 }
1096
1097 // Create a verification hash table for all files for which we have not
1098 // found a complete version of the file and for which we have
1099 // a verification packet
PrepareVerificationHashTable(void)1100 bool Par2Repairer::PrepareVerificationHashTable(void)
1101 {
1102 if (noiselevel >= nlDebug)
1103 sout << "[DEBUG] Prepare verification hashtable" << endl;
1104
1105 // Choose a size for the hash table
1106 verificationhashtable.SetLimit(sourceblockcount);
1107
1108 // Will any files be block verifiable
1109 blockverifiable = false;
1110
1111 // For each source file
1112 vector<Par2RepairerSourceFile*>::iterator sf = sourcefiles.begin();
1113 while (sf != sourcefiles.end())
1114 {
1115 // Get the source file
1116 Par2RepairerSourceFile *sourcefile = *sf;
1117
1118 if (sourcefile)
1119 {
1120 // Do we have a verification packet
1121 if (0 != sourcefile->GetVerificationPacket())
1122 {
1123 // Yes. Load the verification entries into the hash table
1124 verificationhashtable.Load(sourcefile, blocksize);
1125
1126 blockverifiable = true;
1127 }
1128 else
1129 {
1130 // No. We can only check the whole file
1131 unverifiablesourcefiles.push_back(sourcefile);
1132 }
1133 }
1134
1135 ++sf;
1136 }
1137
1138 return true;
1139 }
1140
1141 // Compute the table for the sliding CRC computation
ComputeWindowTable(void)1142 bool Par2Repairer::ComputeWindowTable(void)
1143 {
1144 if (noiselevel >= nlDebug)
1145 sout << "[DEBUG] compute window table" << endl;
1146
1147 if (blockverifiable)
1148 {
1149 GenerateWindowTable(blocksize, windowtable);
1150 windowmask = ComputeWindowMask(blocksize);
1151 }
1152
1153 return true;
1154 }
1155
SortSourceFilesByFileName(Par2RepairerSourceFile * low,Par2RepairerSourceFile * high)1156 static bool SortSourceFilesByFileName(Par2RepairerSourceFile *low,
1157 Par2RepairerSourceFile *high)
1158 {
1159 return low->TargetFileName() < high->TargetFileName();
1160 }
1161
1162 // Attempt to verify all of the source files
VerifySourceFiles(const std::string & basepath,std::vector<string> & extrafiles)1163 bool Par2Repairer::VerifySourceFiles(const std::string& basepath, std::vector<string>& extrafiles)
1164 {
1165 if (noiselevel > nlQuiet)
1166 sout << endl << "Verifying source files:" << endl << endl;
1167
1168 bool finalresult = true;
1169
1170 // Created a sorted list of the source files and verify them in that
1171 // order rather than the order they are in the main packet.
1172 vector<Par2RepairerSourceFile*> sortedfiles;
1173
1174 u32 filenumber = 0;
1175 vector<Par2RepairerSourceFile*>::iterator sf = sourcefiles.begin();
1176
1177 #ifdef _OPENMP
1178 mttotalsize = 0;
1179 mttotalprogress = 0;
1180 #endif
1181
1182 while (sf != sourcefiles.end())
1183 {
1184 // Do we have a source file
1185 Par2RepairerSourceFile *sourcefile = *sf;
1186 if (sourcefile)
1187 {
1188 sortedfiles.push_back(sourcefile);
1189 #ifdef _OPENMP
1190 // Total filesizes for mt-progress line
1191 mttotalsize += sourcefile->DiskFileSize();
1192 #endif
1193 }
1194 else
1195 {
1196 // Was this one of the recoverable files
1197 if (filenumber < mainpacket->RecoverableFileCount())
1198 {
1199 serr << "No details available for recoverable file number " << filenumber+1 << "." << endl << "Recovery will not be possible." << endl;
1200
1201 // Set error but let verification of other files continue
1202 finalresult = false;
1203 }
1204 else
1205 {
1206 serr << "No details available for non-recoverable file number " << filenumber - mainpacket->RecoverableFileCount() + 1 << endl;
1207 }
1208 }
1209
1210 ++sf;
1211 }
1212
1213 sort(sortedfiles.begin(), sortedfiles.end(), SortSourceFilesByFileName);
1214
1215 // Start verifying the files
1216 #pragma omp parallel for schedule(dynamic) num_threads(Par2Repairer::GetFileThreads())
1217 for (int i=0; i< static_cast<int>(sortedfiles.size()); ++i)
1218 {
1219 // Do we have a source file
1220 Par2RepairerSourceFile *sourcefile = sortedfiles[i];
1221
1222 // What filename does the file use
1223 const std::string& file = sourcefile->TargetFileName();
1224 const std::string& name = DiskFile::SplitRelativeFilename(file, basepath);
1225 const std::string& target_pathname = DiskFile::GetCanonicalPathname(file);
1226
1227 if (noiselevel >= nlDebug)
1228 {
1229 #pragma omp critical
1230 {
1231 sout << "[DEBUG] VerifySourceFiles ----" << endl;
1232 sout << "[DEBUG] file: " << file << endl;
1233 sout << "[DEBUG] name: " << name << endl;
1234 sout << "[DEBUG] targ: " << target_pathname << endl;
1235 }
1236 }
1237
1238 // if the target file is in the list of extra files, we remove it
1239 // from the extra files.
1240 #pragma omp critical
1241 {
1242 vector<string>::iterator it = extrafiles.begin();
1243 for (; it != extrafiles.end(); ++it)
1244 {
1245 const string& e = *it;
1246 const std::string& extra_pathname = e;
1247 if (!extra_pathname.compare(target_pathname))
1248 {
1249 extrafiles.erase(it);
1250 break;
1251 }
1252 }
1253 }
1254
1255 // Check to see if we have already used this file
1256 bool b;
1257 #pragma omp critical
1258 b = diskFileMap.Find(file) != 0;
1259 if (b)
1260 {
1261 // The file has already been used!
1262 #pragma omp critical
1263 serr << "Source file " << name << " is a duplicate." << endl;
1264
1265 finalresult = false;
1266 }
1267 else
1268 {
1269 DiskFile *diskfile = new DiskFile(sout, serr);
1270
1271 // Does the target file exist
1272 if (diskfile->Open(file))
1273 {
1274 // Yes. Record that fact.
1275 sourcefile->SetTargetExists(true);
1276
1277 // Remember that the DiskFile is the target file
1278 sourcefile->SetTargetFile(diskfile);
1279
1280 // Remember that we have processed this file
1281 bool success;
1282 #pragma omp critical
1283 success = diskFileMap.Insert(diskfile);
1284 assert(success);
1285 // Do the actual verification
1286 if (!VerifyDataFile(diskfile, sourcefile, basepath))
1287 finalresult = false;
1288
1289 // We have finished with the file for now
1290 diskfile->Close();
1291 }
1292 else
1293 {
1294 // The file does not exist.
1295 delete diskfile;
1296
1297 if (noiselevel > nlSilent)
1298 {
1299 #pragma omp critical
1300 sout << "Target: \"" << name << "\" - missing." << endl;
1301 }
1302 }
1303 }
1304 }
1305
1306 // Find out how much data we have found
1307 UpdateVerificationResults();
1308
1309 return finalresult;
1310 }
1311
1312 // Scan any extra files specified on the command line
VerifyExtraFiles(const vector<string> & extrafiles,const string & basepath)1313 bool Par2Repairer::VerifyExtraFiles(const vector<string> &extrafiles, const string &basepath)
1314 {
1315 if (noiselevel > nlQuiet)
1316 sout << endl << "Scanning extra files:" << endl << endl;
1317
1318 if (completefilecount < mainpacket->RecoverableFileCount())
1319 {
1320 #ifdef _OPENMP
1321 // Total size of extra files for mt-progress line
1322 mtprocessingextrafiles = true;
1323 mttotalprogress = 0;
1324 mttotalextrasize = 0;
1325
1326 for (size_t i=0; i<extrafiles.size(); ++i)
1327 mttotalextrasize += DiskFile::GetFileSize(extrafiles[i]);
1328 #endif
1329
1330 #pragma omp parallel for schedule(dynamic) num_threads(Par2Repairer::GetFileThreads())
1331 for (int i=0; i< static_cast<int>(extrafiles.size()); ++i)
1332 {
1333 string filename = extrafiles[i];
1334
1335 // If the filename does not include ".par2" we are interested in it.
1336 if (string::npos == filename.find(".par2") &&
1337 string::npos == filename.find(".PAR2"))
1338 {
1339 filename = DiskFile::GetCanonicalPathname(filename);
1340
1341 // Has this file already been dealt with
1342 bool b;
1343 #pragma omp critical
1344 b = diskFileMap.Find(filename) == 0;
1345 if (b)
1346 {
1347 DiskFile *diskfile = new DiskFile(sout, serr);
1348
1349 // Does the file exist
1350 if (!diskfile->Open(filename))
1351 {
1352 delete diskfile;
1353 continue;
1354 }
1355
1356 // Remember that we have processed this file
1357 bool success;
1358 #pragma omp critical
1359 success = diskFileMap.Insert(diskfile);
1360 assert(success);
1361
1362 // Do the actual verification
1363 VerifyDataFile(diskfile, 0, basepath);
1364 // Ignore errors
1365
1366 // We have finished with the file for now
1367 diskfile->Close();
1368 }
1369 }
1370 }
1371 }
1372 // Find out how much data we have found
1373 UpdateVerificationResults();
1374
1375 #if _OPENMP
1376 mtprocessingextrafiles = false;
1377 #endif
1378
1379 return true;
1380 }
1381
1382 // Attempt to match the data in the DiskFile with the source file
VerifyDataFile(DiskFile * diskfile,Par2RepairerSourceFile * sourcefile,const string & basepath)1383 bool Par2Repairer::VerifyDataFile(DiskFile *diskfile, Par2RepairerSourceFile *sourcefile, const string &basepath)
1384 {
1385 MatchType matchtype; // What type of match was made
1386 MD5Hash hashfull; // The MD5 Hash of the whole file
1387 MD5Hash hash16k; // The MD5 Hash of the files 16k of the file
1388
1389 // Are there any files that can be verified at the block level
1390 if (blockverifiable)
1391 {
1392 u32 count;
1393
1394 // Scan the file at the block level.
1395
1396 if (!ScanDataFile(diskfile, // [in] The file to scan
1397 basepath,
1398 sourcefile, // [in/out] Modified in the match is for another source file
1399 matchtype, // [out]
1400 hashfull, // [out]
1401 hash16k, // [out]
1402 count)) // [out]
1403 return false;
1404
1405 switch (matchtype)
1406 {
1407 case eNoMatch:
1408 // No data was found at all.
1409
1410 // Continue to next test.
1411 break;
1412 case ePartialMatch:
1413 {
1414 // We found some data.
1415
1416 // Return them.
1417 return true;
1418 }
1419 break;
1420 case eFullMatch:
1421 {
1422 // We found a perfect match.
1423
1424 sourcefile->SetCompleteFile(diskfile);
1425
1426 // Return the match
1427 return true;
1428 }
1429 break;
1430 }
1431 }
1432
1433 // We did not find a match for any blocks of data within the file, but if
1434 // there are any files for which we did not have a verification packet
1435 // we can try a simple match of the hash for the whole file.
1436
1437 // Are there any files that cannot be verified at the block level
1438 if (!unverifiablesourcefiles.empty())
1439 {
1440 // Would we have already computed the file hashes
1441 if (!blockverifiable)
1442 {
1443 u64 filesize = diskfile->FileSize();
1444
1445 size_t buffersize = 1024*1024;
1446 if (buffersize > min(blocksize, filesize))
1447 buffersize = (size_t)min(blocksize, filesize);
1448
1449 char *buffer = new char[buffersize];
1450
1451 u64 offset = 0;
1452
1453 MD5Context context;
1454
1455 while (offset < filesize)
1456 {
1457 size_t want = (size_t)min((u64)buffersize, filesize-offset);
1458
1459 if (!diskfile->Read(offset, buffer, want))
1460 {
1461 delete [] buffer;
1462 return false;
1463 }
1464
1465 // Will the newly read data reach the 16k boundary
1466 if (offset < 16384 && offset + want >= 16384)
1467 {
1468 context.Update(buffer, (size_t)(16384-offset));
1469
1470 // Compute the 16k hash
1471 MD5Context temp = context;
1472 temp.Final(hash16k);
1473
1474 // Is there more data
1475 if (offset + want > 16384)
1476 {
1477 context.Update(&buffer[16384-offset], (size_t)(offset+want)-16384);
1478 }
1479 }
1480 else
1481 {
1482 context.Update(buffer, want);
1483 }
1484
1485 offset += want;
1486 }
1487
1488 // Compute the file hash
1489 MD5Hash hashfull;
1490 context.Final(hashfull);
1491
1492 // If we did not have 16k of data, then the 16k hash
1493 // is the same as the full hash
1494 if (filesize < 16384)
1495 {
1496 hash16k = hashfull;
1497 }
1498 }
1499
1500 list<Par2RepairerSourceFile*>::iterator sf = unverifiablesourcefiles.begin();
1501
1502 // Compare the hash values of each source file for a match
1503 while (sf != unverifiablesourcefiles.end())
1504 {
1505 sourcefile = *sf;
1506
1507 // Does the file match
1508 if (sourcefile->GetCompleteFile() == 0 &&
1509 diskfile->FileSize() == sourcefile->GetDescriptionPacket()->FileSize() &&
1510 hash16k == sourcefile->GetDescriptionPacket()->Hash16k() &&
1511 hashfull == sourcefile->GetDescriptionPacket()->HashFull())
1512 {
1513 if (noiselevel > nlSilent)
1514 {
1515 #pragma omp critical
1516 sout << diskfile->FileName() << " is a perfect match for " << sourcefile->GetDescriptionPacket()->FileName() << endl;
1517 }
1518 // Record that we have a perfect match for this source file
1519 sourcefile->SetCompleteFile(diskfile);
1520
1521 if (blocksallocated)
1522 {
1523 // Allocate all of the DataBlocks for the source file to the DiskFile
1524
1525 u64 offset = 0;
1526 u64 filesize = sourcefile->GetDescriptionPacket()->FileSize();
1527
1528 vector<DataBlock>::iterator sb = sourcefile->SourceBlocks();
1529
1530 while (offset < filesize)
1531 {
1532 DataBlock &datablock = *sb;
1533
1534 datablock.SetLocation(diskfile, offset);
1535 datablock.SetLength(min(blocksize, filesize-offset));
1536
1537 offset += blocksize;
1538 ++sb;
1539 }
1540 }
1541
1542 // Return the match
1543 return true;
1544 }
1545
1546 ++sf;
1547 }
1548 }
1549
1550 return true;
1551 }
1552
1553 // Perform a sliding window scan of the DiskFile looking for blocks of data that
1554 // might belong to any of the source files (for which a verification packet was
1555 // available). If a block of data might be from more than one source file, prefer
1556 // the one specified by the "sourcefile" parameter. If the first data block
1557 // found is for a different source file then "sourcefile" is changed accordingly.
ScanDataFile(DiskFile * diskfile,string basepath,Par2RepairerSourceFile * & sourcefile,MatchType & matchtype,MD5Hash & hashfull,MD5Hash & hash16k,u32 & count)1558 bool Par2Repairer::ScanDataFile(DiskFile *diskfile, // [in]
1559 string basepath, // [in]
1560 Par2RepairerSourceFile* &sourcefile, // [in/out]
1561 MatchType &matchtype, // [out]
1562 MD5Hash &hashfull, // [out]
1563 MD5Hash &hash16k, // [out]
1564 u32 &count) // [out]
1565 {
1566 // Remember which file we wanted to match
1567 Par2RepairerSourceFile *originalsourcefile = sourcefile;
1568
1569 matchtype = eNoMatch;
1570
1571 string name;
1572 DiskFile::SplitRelativeFilename(diskfile->FileName(), basepath, name);
1573
1574 // Is the file empty
1575 if (diskfile->FileSize() == 0)
1576 {
1577 // If the file is empty, then just return
1578 if (noiselevel > nlSilent)
1579 {
1580 if (originalsourcefile != 0)
1581 {
1582 #pragma omp critical
1583 sout << "Target: \"" << name << "\" - empty." << endl;
1584 }
1585 else
1586 {
1587 #pragma omp critical
1588 sout << "File: \"" << name << "\" - empty." << endl;
1589 }
1590 }
1591 return true;
1592 }
1593
1594 string shortname;
1595 if (name.size() > 56)
1596 {
1597 shortname = name.substr(0, 28) + "..." + name.substr(name.size()-28);
1598 }
1599 else
1600 {
1601 shortname = name;
1602 }
1603
1604 // Create the checksummer for the file and start reading from it
1605 FileCheckSummer filechecksummer(diskfile, blocksize, windowtable, windowmask);
1606 if (!filechecksummer.Start())
1607 return false;
1608
1609 // Assume we will make a perfect match for the file
1610 matchtype = eFullMatch;
1611
1612 // How many matches have we had
1613 count = 0;
1614
1615 // How many blocks have already been found
1616 u32 duplicatecount = 0;
1617
1618 // Have we found data blocks in this file that belong to more than one target file
1619 bool multipletargets = false;
1620
1621 // Which block do we expect to find first
1622 const VerificationHashEntry *nextentry = 0;
1623
1624 // How far will we scan the file (1 byte at a time)
1625 // before skipping ahead looking for the next block
1626 u64 scandistance = min(skipleaway<<1, blocksize);
1627
1628 // Distance to skip forward if we don't find a block
1629 u64 scanskip = skipdata ? blocksize - scandistance : 0;
1630
1631 // Assume with are half way through scanning
1632 u64 scanoffset = scandistance >> 1;
1633
1634 // Total number of bytes that were skipped whilst scanning
1635 u64 skippeddata = 0;
1636
1637 // Offset of last data that was found
1638 u64 lastmatchoffset = 0;
1639
1640 bool progressline = false;
1641
1642 u64 oldoffset = 0;
1643 u64 printprogress = 0;
1644
1645 #ifdef _OPENMP
1646 if (noiselevel > nlQuiet)
1647 {
1648 #pragma omp critical
1649 sout << "Opening: \"" << shortname << "\"" << endl;
1650 }
1651 #endif
1652
1653 // Whilst we have not reached the end of the file
1654 while (filechecksummer.Offset() < diskfile->FileSize())
1655 {
1656 // OPENMP progress line printing
1657 #ifdef _OPENMP
1658 if (noiselevel > nlQuiet)
1659 {
1660 // Are we processing extrafiles? Use correct total size
1661 u64 ts = mtprocessingextrafiles ? mttotalextrasize : mttotalsize;
1662
1663 // Update progress indicator
1664 printprogress += filechecksummer.Offset() - oldoffset;
1665 if (printprogress == blocksize || filechecksummer.ShortBlock())
1666 {
1667 u32 oldfraction;
1668 u32 newfraction;
1669 #pragma omp critical
1670 {
1671 oldfraction = (u32)(1000 * mttotalprogress / ts);
1672 mttotalprogress += printprogress;
1673 newfraction = (u32)(1000 * mttotalprogress / ts);
1674 }
1675
1676 printprogress = 0;
1677
1678 if (oldfraction != newfraction)
1679 {
1680 #pragma omp critical
1681 sout << "Scanning: " << newfraction/10 << '.' << newfraction%10 << "%\r" << flush;
1682
1683 progressline = true;
1684 }
1685 }
1686 oldoffset = filechecksummer.Offset();
1687
1688 }
1689 // NON-OPENMP progress line printing
1690 #else
1691 if (noiselevel > nlQuiet)
1692 {
1693 // Update progress indicator
1694 printprogress += filechecksummer.Offset() - oldoffset;
1695 if (printprogress == blocksize || filechecksummer.ShortBlock())
1696 {
1697 u32 oldfraction = (u32)(1000 * (filechecksummer.Offset() - printprogress) / diskfile->FileSize());
1698 u32 newfraction = (u32)(1000 * filechecksummer.Offset() / diskfile->FileSize());
1699 printprogress = 0;
1700
1701 if (oldfraction != newfraction)
1702 {
1703 sout << "Scanning: \"" << shortname << "\": " << newfraction/10 << '.' << newfraction%10 << "%\r" << flush;
1704
1705 progressline = true;
1706 }
1707 }
1708 oldoffset = filechecksummer.Offset();
1709 }
1710 #endif
1711
1712 // If we fail to find a match, it might be because it was a duplicate of a block
1713 // that we have already found.
1714 bool duplicate;
1715
1716 // Look for a match
1717 const VerificationHashEntry *currententry = verificationhashtable.FindMatch(nextentry, sourcefile, filechecksummer, duplicate);
1718
1719 // Did we find a match
1720 if (currententry != 0)
1721 {
1722 if (lastmatchoffset < filechecksummer.Offset() && noiselevel > nlNormal)
1723 {
1724 if (progressline)
1725 {
1726 #pragma omp critical
1727 sout << endl;
1728 progressline = false;
1729 }
1730 #pragma omp critical
1731 sout << "No data found between offset " << lastmatchoffset
1732 << " and " << filechecksummer.Offset() << endl;
1733 }
1734
1735 // Is this the first match
1736 if (count == 0)
1737 {
1738 // Which source file was it
1739 sourcefile = currententry->SourceFile();
1740
1741 // If the first match found was not actually the first block
1742 // for the source file, or it was not at the start of the
1743 // data file: then this is a partial match.
1744 if (!currententry->FirstBlock() || filechecksummer.Offset() != 0)
1745 {
1746 matchtype = ePartialMatch;
1747 }
1748 }
1749 else
1750 {
1751 // If the match found is not the one which was expected
1752 // then this is a partial match
1753
1754 if (currententry != nextentry)
1755 {
1756 matchtype = ePartialMatch;
1757 }
1758
1759 // Is the match from a different source file
1760 if (sourcefile != currententry->SourceFile())
1761 {
1762 multipletargets = true;
1763 }
1764 }
1765
1766 if (blocksallocated)
1767 {
1768 // Record the match
1769 currententry->SetBlock(diskfile, filechecksummer.Offset());
1770 }
1771
1772 // Update the number of matches found
1773 count++;
1774
1775 // What entry do we expect next
1776 nextentry = currententry->Next();
1777
1778 // Advance to the next block
1779 if (!filechecksummer.Jump(currententry->GetDataBlock()->GetLength()))
1780 return false;
1781
1782 // If the next match fails, assume we hare half way through scanning for the next block
1783 scanoffset = scandistance >> 1;
1784
1785 // Update offset of last match
1786 lastmatchoffset = filechecksummer.Offset();
1787 }
1788 else
1789 {
1790 // This cannot be a perfect match
1791 matchtype = ePartialMatch;
1792
1793 // Was this a duplicate match
1794 if (duplicate && false) // ignore duplicates
1795 {
1796 duplicatecount++;
1797
1798 // What entry would we expect next
1799 nextentry = 0;
1800
1801 // Advance one whole block
1802 if (!filechecksummer.Jump(blocksize))
1803 return false;
1804 }
1805 else
1806 {
1807 // What entry do we expect next
1808 nextentry = 0;
1809
1810 if (!filechecksummer.Step())
1811 return false;
1812
1813 u64 skipfrom = filechecksummer.Offset();
1814
1815 // Have we scanned too far without finding a block?
1816 if (scanskip > 0
1817 && ++scanoffset >= scandistance
1818 && skipfrom < diskfile->FileSize())
1819 {
1820 // Skip forwards to where we think we might find more data
1821 if (!filechecksummer.Jump(scanskip))
1822 return false;
1823
1824 // Update the count of skipped data
1825 skippeddata += filechecksummer.Offset() - skipfrom;
1826
1827 // Reset scan offset to 0
1828 scanoffset = 0;
1829 }
1830 }
1831 }
1832 }
1833
1834 #ifdef _OPENMP
1835 if (noiselevel > nlQuiet)
1836 {
1837 if (filechecksummer.Offset() == diskfile->FileSize()) {
1838 #pragma omp atomic
1839 mttotalprogress += filechecksummer.Offset() - oldoffset;
1840 }
1841 }
1842 #endif
1843
1844 if (lastmatchoffset < filechecksummer.Offset() && noiselevel > nlNormal)
1845 {
1846 if (progressline)
1847 {
1848 #pragma omp critical
1849 sout << endl;
1850 progressline = false;
1851 }
1852
1853 #pragma omp critical
1854 sout << "No data found between offset " << lastmatchoffset
1855 << " and " << filechecksummer.Offset() << endl;
1856 }
1857
1858 // Get the Full and 16k hash values of the file
1859 filechecksummer.GetFileHashes(hashfull, hash16k);
1860
1861 if (noiselevel >= nlDebug)
1862 {
1863 #pragma omp critical
1864 {
1865 // Clear out old scanning line
1866 sout << std::setw(shortname.size()+19) << std::setfill(' ') << "";
1867
1868 if (duplicatecount > 0)
1869 sout << "\r[DEBUG] duplicates: " << duplicatecount << endl;
1870 sout << "\r[DEBUG] matchcount: " << count << endl;
1871 sout << "[DEBUG] ----------------------" << endl;
1872 }
1873 }
1874
1875 // Did we make any matches at all
1876 if (count > 0)
1877 {
1878 // If this still might be a perfect match, check the
1879 // hashes, file size, and number of blocks to confirm.
1880 if (matchtype != eFullMatch ||
1881 count != sourcefile->GetVerificationPacket()->BlockCount() ||
1882 diskfile->FileSize() != sourcefile->GetDescriptionPacket()->FileSize() ||
1883 hashfull != sourcefile->GetDescriptionPacket()->HashFull() ||
1884 hash16k != sourcefile->GetDescriptionPacket()->Hash16k())
1885 {
1886 matchtype = ePartialMatch;
1887
1888 if (noiselevel > nlSilent)
1889 {
1890 // Did we find data from multiple target files
1891 if (multipletargets)
1892 {
1893 // Were we scanning the target file or an extra file
1894 if (originalsourcefile != 0)
1895 {
1896 #pragma omp critical
1897 sout << "Target: \""
1898 << name
1899 << "\" - damaged, found "
1900 << count
1901 << " data blocks from several target files."
1902 << endl;
1903 }
1904 else
1905 {
1906 #pragma omp critical
1907 sout << "File: \""
1908 << name
1909 << "\" - found "
1910 << count
1911 << " data blocks from several target files."
1912 << endl;
1913 }
1914 }
1915 else
1916 {
1917 // Did we find data blocks that belong to the target file
1918 if (originalsourcefile == sourcefile)
1919 {
1920 #pragma omp critical
1921 sout << "Target: \""
1922 << name
1923 << "\" - damaged. Found "
1924 << count
1925 << " of "
1926 << sourcefile->GetVerificationPacket()->BlockCount()
1927 << " data blocks."
1928 << endl;
1929 }
1930 // Were we scanning the target file or an extra file
1931 else if (originalsourcefile != 0)
1932 {
1933 string targetname;
1934 DiskFile::SplitRelativeFilename(sourcefile->TargetFileName(), basepath, targetname);
1935
1936 #pragma omp critical
1937 sout << "Target: \""
1938 << name
1939 << "\" - damaged. Found "
1940 << count
1941 << " of "
1942 << sourcefile->GetVerificationPacket()->BlockCount()
1943 << " data blocks from \""
1944 << targetname
1945 << "\"."
1946 << endl;
1947 }
1948 else
1949 {
1950 string targetname;
1951 DiskFile::SplitRelativeFilename(sourcefile->TargetFileName(), basepath, targetname);
1952
1953 #pragma omp critical
1954 sout << "File: \""
1955 << name
1956 << "\" - found "
1957 << count
1958 << " of "
1959 << sourcefile->GetVerificationPacket()->BlockCount()
1960 << " data blocks from \""
1961 << targetname
1962 << "\"."
1963 << endl;
1964 }
1965 }
1966
1967 if (skippeddata > 0)
1968 {
1969 #pragma omp critical
1970 sout << skippeddata << " bytes of data were skipped whilst scanning." << endl
1971 << "If there are not enough blocks found to repair: try again "
1972 << "with the -N option." << endl;
1973 }
1974 }
1975 }
1976 else
1977 {
1978 if (noiselevel > nlSilent)
1979 {
1980 // Did we match the target file
1981 if (originalsourcefile == sourcefile)
1982 {
1983 #pragma omp critical
1984 sout << "Target: \"" << name << "\" - found." << endl;
1985 }
1986 // Were we scanning the target file or an extra file
1987 else if (originalsourcefile != 0)
1988 {
1989 string targetname;
1990 DiskFile::SplitRelativeFilename(sourcefile->TargetFileName(), basepath, targetname);
1991
1992 #pragma omp critical
1993 sout << "Target: \""
1994 << name
1995 << "\" - is a match for \""
1996 << targetname
1997 << "\"."
1998 << endl;
1999 }
2000 else
2001 {
2002 string targetname;
2003 DiskFile::SplitRelativeFilename(sourcefile->TargetFileName(), basepath, targetname);
2004
2005 #pragma omp critical
2006 sout << "File: \""
2007 << name
2008 << "\" - is a match for \""
2009 << targetname
2010 << "\"."
2011 << endl;
2012 }
2013 }
2014 }
2015 }
2016 else
2017 {
2018 matchtype = eNoMatch;
2019
2020 if (noiselevel > nlSilent)
2021 {
2022 // We found not data, but did the file actually contain blocks we
2023 // had already found in other files.
2024 if (duplicatecount > 0)
2025 {
2026 #pragma omp critical
2027 sout << "File: \""
2028 << name
2029 << "\" - found "
2030 << duplicatecount
2031 << " duplicate data blocks."
2032 << endl;
2033 }
2034 else
2035 {
2036 #pragma omp critical
2037 sout << "File: \""
2038 << name
2039 << "\" - no data found."
2040 << endl;
2041 }
2042
2043 if (skippeddata > 0)
2044 {
2045 #pragma omp critical
2046 sout << skippeddata << " bytes of data were skipped whilst scanning." << endl
2047 << "If there are not enough blocks found to repair: try again "
2048 << "with the -N option." << endl;
2049 }
2050 }
2051 }
2052
2053 return true;
2054 }
2055
2056 // Find out how much data we have found
UpdateVerificationResults(void)2057 void Par2Repairer::UpdateVerificationResults(void)
2058 {
2059 availableblockcount = 0;
2060 missingblockcount = 0;
2061
2062 completefilecount = 0;
2063 renamedfilecount = 0;
2064 damagedfilecount = 0;
2065 missingfilecount = 0;
2066
2067 u32 filenumber = 0;
2068 vector<Par2RepairerSourceFile*>::iterator sf = sourcefiles.begin();
2069
2070 // Check the recoverable files
2071 while (sf != sourcefiles.end() && filenumber < mainpacket->TotalFileCount())
2072 {
2073 Par2RepairerSourceFile *sourcefile = *sf;
2074
2075 if (sourcefile)
2076 {
2077 // Was a perfect match for the file found
2078 if (sourcefile->GetCompleteFile() != 0)
2079 {
2080 // Is it the target file or a different one
2081 if (sourcefile->GetCompleteFile() == sourcefile->GetTargetFile())
2082 {
2083 completefilecount++;
2084 }
2085 else
2086 {
2087 renamedfilecount++;
2088 }
2089
2090 availableblockcount += sourcefile->BlockCount();
2091 }
2092 else
2093 {
2094 // Count the number of blocks that have been found
2095 vector<DataBlock>::iterator sb = sourcefile->SourceBlocks();
2096 for (u32 blocknumber=0; blocknumber<sourcefile->BlockCount(); ++blocknumber, ++sb)
2097 {
2098 DataBlock &datablock = *sb;
2099
2100 if (datablock.IsSet())
2101 availableblockcount++;
2102 }
2103
2104 // Does the target file exist
2105 if (sourcefile->GetTargetExists())
2106 {
2107 damagedfilecount++;
2108 }
2109 else
2110 {
2111 missingfilecount++;
2112 }
2113 }
2114 }
2115 else
2116 {
2117 missingfilecount++;
2118 }
2119
2120 ++filenumber;
2121 ++sf;
2122 }
2123
2124 missingblockcount = sourceblockcount - availableblockcount;
2125 }
2126
2127 // Check the verification results and report the results
CheckVerificationResults(void)2128 bool Par2Repairer::CheckVerificationResults(void)
2129 {
2130 // Is repair needed
2131 if (completefilecount < mainpacket->RecoverableFileCount() ||
2132 renamedfilecount > 0 ||
2133 damagedfilecount > 0 ||
2134 missingfilecount > 0)
2135 {
2136 if (noiselevel > nlSilent)
2137 sout << "Repair is required." << endl;
2138 if (noiselevel > nlQuiet)
2139 {
2140 if (renamedfilecount > 0) sout << renamedfilecount << " file(s) have the wrong name." << endl;
2141 if (missingfilecount > 0) sout << missingfilecount << " file(s) are missing." << endl;
2142 if (damagedfilecount > 0) sout << damagedfilecount << " file(s) exist but are damaged." << endl;
2143 if (completefilecount > 0) sout << completefilecount << " file(s) are ok." << endl;
2144
2145 sout << "You have " << availableblockcount
2146 << " out of " << sourceblockcount
2147 << " data blocks available." << endl;
2148 if (recoverypacketmap.size() > 0)
2149 sout << "You have " << (u32)recoverypacketmap.size()
2150 << " recovery blocks available." << endl;
2151 }
2152
2153 // Is repair possible
2154 if (recoverypacketmap.size() >= missingblockcount)
2155 {
2156 if (noiselevel > nlSilent)
2157 sout << "Repair is possible." << endl;
2158
2159 if (noiselevel > nlQuiet)
2160 {
2161 if (recoverypacketmap.size() > missingblockcount)
2162 sout << "You have an excess of "
2163 << (u32)recoverypacketmap.size() - missingblockcount
2164 << " recovery blocks." << endl;
2165
2166 if (missingblockcount > 0)
2167 sout << missingblockcount
2168 << " recovery blocks will be used to repair." << endl;
2169 else if (recoverypacketmap.size())
2170 sout << "None of the recovery blocks will be used for the repair." << endl;
2171 }
2172
2173 return true;
2174 }
2175 else
2176 {
2177 if (noiselevel > nlSilent)
2178 {
2179 sout << "Repair is not possible." << endl;
2180 sout << "You need " << missingblockcount - recoverypacketmap.size()
2181 << " more recovery blocks to be able to repair." << endl;
2182 }
2183
2184 return false;
2185 }
2186 }
2187 else
2188 {
2189 if (noiselevel > nlSilent)
2190 sout << "All files are correct, repair is not required." << endl;
2191
2192 return true;
2193 }
2194
2195 return true;
2196 }
2197
2198 // Rename any damaged or missnamed target files.
RenameTargetFiles(void)2199 bool Par2Repairer::RenameTargetFiles(void)
2200 {
2201 u32 filenumber = 0;
2202 vector<Par2RepairerSourceFile*>::iterator sf = sourcefiles.begin();
2203
2204 // Rename any damaged target files
2205 while (sf != sourcefiles.end() && filenumber < mainpacket->TotalFileCount())
2206 {
2207 Par2RepairerSourceFile *sourcefile = *sf;
2208
2209 // If the target file exists but is not a complete version of the file
2210 if (sourcefile->GetTargetExists() &&
2211 sourcefile->GetTargetFile() != sourcefile->GetCompleteFile())
2212 {
2213 DiskFile *targetfile = sourcefile->GetTargetFile();
2214
2215 // Rename it
2216 diskFileMap.Remove(targetfile);
2217
2218 if (!targetfile->Rename())
2219 return false;
2220
2221 backuplist.push_back(targetfile);
2222
2223 bool success = diskFileMap.Insert(targetfile);
2224 assert(success);
2225
2226 // We no longer have a target file
2227 sourcefile->SetTargetExists(false);
2228 sourcefile->SetTargetFile(0);
2229 }
2230
2231 ++sf;
2232 ++filenumber;
2233 }
2234
2235 filenumber = 0;
2236 sf = sourcefiles.begin();
2237
2238 // Rename any missnamed but complete versions of the files
2239 while (sf != sourcefiles.end() && filenumber < mainpacket->TotalFileCount())
2240 {
2241 Par2RepairerSourceFile *sourcefile = *sf;
2242
2243 // If there is no targetfile and there is a complete version
2244 if (sourcefile->GetTargetFile() == 0 &&
2245 sourcefile->GetCompleteFile() != 0)
2246 {
2247 DiskFile *targetfile = sourcefile->GetCompleteFile();
2248
2249 // Rename it
2250 diskFileMap.Remove(targetfile);
2251
2252 if (!targetfile->Rename(sourcefile->TargetFileName()))
2253 return false;
2254
2255 bool success = diskFileMap.Insert(targetfile);
2256 assert(success);
2257
2258 // This file is now the target file
2259 sourcefile->SetTargetExists(true);
2260 sourcefile->SetTargetFile(targetfile);
2261
2262 // We have one more complete file
2263 completefilecount++;
2264 }
2265
2266 ++sf;
2267 ++filenumber;
2268 }
2269
2270 return true;
2271 }
2272
2273 // Work out which files are being repaired, create them, and allocate
2274 // target DataBlocks to them, and remember them for later verification.
CreateTargetFiles(void)2275 bool Par2Repairer::CreateTargetFiles(void)
2276 {
2277 u32 filenumber = 0;
2278 vector<Par2RepairerSourceFile*>::iterator sf = sourcefiles.begin();
2279
2280 // Create any missing target files
2281 while (sf != sourcefiles.end() && filenumber < mainpacket->TotalFileCount())
2282 {
2283 Par2RepairerSourceFile *sourcefile = *sf;
2284
2285 // If the file does not exist
2286 if (!sourcefile->GetTargetExists())
2287 {
2288 DiskFile *targetfile = new DiskFile(sout, serr);
2289 string filename = sourcefile->TargetFileName();
2290 u64 filesize = sourcefile->GetDescriptionPacket()->FileSize();
2291
2292 // Create the target file
2293 if (!targetfile->Create(filename, filesize))
2294 {
2295 delete targetfile;
2296 return false;
2297 }
2298
2299 // This file is now the target file
2300 sourcefile->SetTargetExists(true);
2301 sourcefile->SetTargetFile(targetfile);
2302
2303 // Remember this file
2304 bool success = diskFileMap.Insert(targetfile);
2305 assert(success);
2306
2307 u64 offset = 0;
2308 vector<DataBlock>::iterator tb = sourcefile->TargetBlocks();
2309
2310 // Allocate all of the target data blocks
2311 while (offset < filesize)
2312 {
2313 DataBlock &datablock = *tb;
2314
2315 datablock.SetLocation(targetfile, offset);
2316 datablock.SetLength(min(blocksize, filesize-offset));
2317
2318 offset += blocksize;
2319 ++tb;
2320 }
2321
2322 // Add the file to the list of those that will need to be verified
2323 // once the repair has completed.
2324 verifylist.push_back(sourcefile);
2325 }
2326
2327 ++sf;
2328 ++filenumber;
2329 }
2330
2331 return true;
2332 }
2333
2334 // Work out which data blocks are available, which need to be copied
2335 // directly to the output, and which need to be recreated, and compute
2336 // the appropriate Reed Solomon matrix.
ComputeRSmatrix(void)2337 bool Par2Repairer::ComputeRSmatrix(void)
2338 {
2339 inputblocks.resize(sourceblockcount); // The DataBlocks that will read from disk
2340 copyblocks.resize(availableblockcount); // Those DataBlocks which need to be copied
2341 outputblocks.resize(missingblockcount); // Those DataBlocks that will re recalculated
2342
2343 vector<DataBlock*>::iterator inputblock = inputblocks.begin();
2344 vector<DataBlock*>::iterator copyblock = copyblocks.begin();
2345 vector<DataBlock*>::iterator outputblock = outputblocks.begin();
2346
2347 // Build an array listing which source data blocks are present and which are missing
2348 vector<bool> present;
2349 present.resize(sourceblockcount);
2350
2351 vector<DataBlock>::iterator sourceblock = sourceblocks.begin();
2352 vector<DataBlock>::iterator targetblock = targetblocks.begin();
2353 vector<bool>::iterator pres = present.begin();
2354
2355 // Iterate through all source blocks for all files
2356 while (sourceblock != sourceblocks.end())
2357 {
2358 // Was this block found
2359 if (sourceblock->IsSet())
2360 {
2361 //// Open the file the block was found in.
2362 //if (!sourceblock->Open())
2363 // return false;
2364
2365 // Record that the block was found
2366 *pres = true;
2367
2368 // Add the block to the list of those which will be read
2369 // as input (and which might also need to be copied).
2370 *inputblock = &*sourceblock;
2371 *copyblock = &*targetblock;
2372
2373 ++inputblock;
2374 ++copyblock;
2375 }
2376 else
2377 {
2378 // Record that the block was missing
2379 *pres = false;
2380
2381 // Add the block to the list of those to be written
2382 *outputblock = &*targetblock;
2383 ++outputblock;
2384 }
2385
2386 ++sourceblock;
2387 ++targetblock;
2388 ++pres;
2389 }
2390
2391 // Set the number of source blocks and which of them are present
2392 if (!rs.SetInput(present, sout, serr))
2393 return false;
2394
2395 // Start iterating through the available recovery packets
2396 map<u32,RecoveryPacket*>::iterator rp = recoverypacketmap.begin();
2397
2398 // Continue to fill the remaining list of data blocks to be read
2399 while (inputblock != inputblocks.end())
2400 {
2401 // Get the next available recovery packet
2402 u32 exponent = rp->first;
2403 RecoveryPacket* recoverypacket = rp->second;
2404
2405 // Get the DataBlock from the recovery packet
2406 DataBlock *recoveryblock = recoverypacket->GetDataBlock();
2407
2408 //// Make sure the file is open
2409 //if (!recoveryblock->Open())
2410 // return false;
2411
2412 // Add the recovery block to the list of blocks that will be read
2413 *inputblock = recoveryblock;
2414
2415 // Record that the corresponding exponent value is the next one
2416 // to use in the RS matrix
2417 if (!rs.SetOutput(true, (u16)exponent))
2418 return false;
2419
2420 ++inputblock;
2421 ++rp;
2422 }
2423
2424 // If we need to, compute and solve the RS matrix
2425 if (missingblockcount == 0)
2426 return true;
2427
2428 bool success = rs.Compute(noiselevel, sout, serr);
2429
2430 return success;
2431 }
2432
2433 // Allocate memory buffers for reading and writing data to disk.
AllocateBuffers(size_t memorylimit)2434 bool Par2Repairer::AllocateBuffers(size_t memorylimit)
2435 {
2436 // Would single pass processing use too much memory
2437 if (blocksize * missingblockcount > memorylimit)
2438 {
2439 // Pick a size that is small enough
2440 chunksize = ~3 & (memorylimit / missingblockcount);
2441 }
2442 else
2443 {
2444 chunksize = (size_t)blocksize;
2445 }
2446
2447 // Allocate the two buffers
2448 inputbuffer = new u8[(size_t)chunksize];
2449 outputbuffer = new u8[(size_t)chunksize * missingblockcount];
2450
2451 if (inputbuffer == NULL || outputbuffer == NULL)
2452 {
2453 serr << "Could not allocate buffer memory." << endl;
2454 return false;
2455 }
2456
2457 return true;
2458 }
2459
2460 // Read source data, process it through the RS matrix and write it to disk.
ProcessData(u64 blockoffset,size_t blocklength)2461 bool Par2Repairer::ProcessData(u64 blockoffset, size_t blocklength)
2462 {
2463 u64 totalwritten = 0;
2464
2465 // Clear the output buffer
2466 memset(outputbuffer, 0, (size_t)chunksize * missingblockcount);
2467
2468 vector<DataBlock*>::iterator inputblock = inputblocks.begin();
2469 vector<DataBlock*>::iterator copyblock = copyblocks.begin();
2470 u32 inputindex = 0;
2471
2472 DiskFile *lastopenfile = NULL;
2473
2474 // Are there any blocks which need to be reconstructed
2475 if (missingblockcount > 0)
2476 {
2477 // For each input block
2478 while (inputblock != inputblocks.end())
2479 {
2480 // Are we reading from a new file?
2481 if (lastopenfile != (*inputblock)->GetDiskFile())
2482 {
2483 // Close the last file
2484 if (lastopenfile != NULL)
2485 {
2486 lastopenfile->Close();
2487 }
2488
2489 // Open the new file
2490 lastopenfile = (*inputblock)->GetDiskFile();
2491 if (!lastopenfile->Open())
2492 {
2493 return false;
2494 }
2495 }
2496
2497 // Read data from the current input block
2498 if (!(*inputblock)->ReadData(blockoffset, blocklength, inputbuffer))
2499 return false;
2500
2501 // Have we reached the last source data block
2502 if (copyblock != copyblocks.end())
2503 {
2504 // Does this block need to be copied to the target file
2505 if ((*copyblock)->IsSet())
2506 {
2507 size_t wrote;
2508
2509 // Write the block back to disk in the new target file
2510 if (!(*copyblock)->WriteData(blockoffset, blocklength, inputbuffer, wrote))
2511 return false;
2512
2513 totalwritten += wrote;
2514 }
2515 ++copyblock;
2516 }
2517
2518 // For each output block
2519 #pragma omp parallel for
2520 for (i64 outputindex=0; outputindex<missingblockcount; outputindex++)
2521 {
2522 u32 internalOutputindex = (u32) outputindex;
2523 // Select the appropriate part of the output buffer
2524 void *outbuf = &((u8*)outputbuffer)[chunksize * internalOutputindex];
2525
2526 // Process the data
2527 rs.Process(blocklength, inputindex, inputbuffer, internalOutputindex, outbuf);
2528
2529 if (noiselevel > nlQuiet)
2530 {
2531 // Update a progress indicator
2532 u32 oldfraction = (u32)(1000 * progress / totaldata);
2533 #pragma omp atomic
2534 progress += blocklength;
2535 u32 newfraction = (u32)(1000 * progress / totaldata);
2536
2537 if (oldfraction != newfraction)
2538 {
2539 #pragma omp critical
2540 sout << "Repairing: " << newfraction/10 << '.' << newfraction%10 << "%\r" << flush;
2541 }
2542 }
2543 }
2544
2545 ++inputblock;
2546 ++inputindex;
2547 }
2548 }
2549 else
2550 {
2551 // Reconstruction is not required, we are just copying blocks between files
2552
2553 // For each block that might need to be copied
2554 while (copyblock != copyblocks.end())
2555 {
2556 // Does this block need to be copied
2557 if ((*copyblock)->IsSet())
2558 {
2559 // Are we reading from a new file?
2560 if (lastopenfile != (*inputblock)->GetDiskFile())
2561 {
2562 // Close the last file
2563 if (lastopenfile != NULL)
2564 {
2565 lastopenfile->Close();
2566 }
2567
2568 // Open the new file
2569 lastopenfile = (*inputblock)->GetDiskFile();
2570 if (!lastopenfile->Open())
2571 {
2572 return false;
2573 }
2574 }
2575
2576 // Read data from the current input block
2577 if (!(*inputblock)->ReadData(blockoffset, blocklength, inputbuffer))
2578 return false;
2579
2580 size_t wrote;
2581 if (!(*copyblock)->WriteData(blockoffset, blocklength, inputbuffer, wrote))
2582 return false;
2583 totalwritten += wrote;
2584 }
2585
2586 if (noiselevel > nlQuiet)
2587 {
2588 // Update a progress indicator
2589 u32 oldfraction = (u32)(1000 * progress / totaldata);
2590 progress += blocklength;
2591 u32 newfraction = (u32)(1000 * progress / totaldata);
2592
2593 if (oldfraction != newfraction)
2594 {
2595 sout << "Processing: " << newfraction/10 << '.' << newfraction%10 << "%\r" << flush;
2596 }
2597 }
2598
2599 ++copyblock;
2600 ++inputblock;
2601 }
2602 }
2603
2604 // Close the last file
2605 if (lastopenfile != NULL)
2606 {
2607 lastopenfile->Close();
2608 }
2609
2610 if (noiselevel > nlQuiet)
2611 sout << "Writing recovered data\r";
2612
2613 // For each output block that has been recomputed
2614 vector<DataBlock*>::iterator outputblock = outputblocks.begin();
2615 for (u32 outputindex=0; outputindex<missingblockcount;outputindex++)
2616 {
2617 // Select the appropriate part of the output buffer
2618 char *outbuf = &((char*)outputbuffer)[chunksize * outputindex];
2619
2620 // Write the data to the target file
2621 size_t wrote;
2622 if (!(*outputblock)->WriteData(blockoffset, blocklength, outbuf, wrote))
2623 return false;
2624 totalwritten += wrote;
2625
2626 ++outputblock;
2627 }
2628
2629 if (noiselevel > nlQuiet)
2630 sout << "Wrote " << totalwritten << " bytes to disk" << endl;
2631
2632 return true;
2633 }
2634
2635 // Verify that all of the reconstructed target files are now correct
VerifyTargetFiles(const string & basepath)2636 bool Par2Repairer::VerifyTargetFiles(const string &basepath)
2637 {
2638 bool finalresult = true;
2639
2640 // Verify the target files in alphabetical order
2641 sort(verifylist.begin(), verifylist.end(), SortSourceFilesByFileName);
2642
2643 #ifdef _OPENMP
2644 mttotalsize = 0;
2645 mttotalprogress = 0;
2646
2647 for (size_t i=0; i<verifylist.size(); ++i)
2648 {
2649 if (verifylist[i])
2650 mttotalsize += verifylist[i]->GetDescriptionPacket()->FileSize();
2651 }
2652 #endif
2653
2654 // Iterate through each file in the verification list
2655 #pragma omp parallel for schedule(dynamic) num_threads(Par2Repairer::GetFileThreads())
2656 for (int i=0; i< static_cast<int>(verifylist.size()); ++i)
2657 {
2658 Par2RepairerSourceFile *sourcefile = verifylist[i];
2659 DiskFile *targetfile = sourcefile->GetTargetFile();
2660
2661 // Close the file
2662 if (targetfile->IsOpen())
2663 targetfile->Close();
2664
2665 // Mark all data blocks for the file as unknown
2666 vector<DataBlock>::iterator sb = sourcefile->SourceBlocks();
2667 for (u32 blocknumber=0; blocknumber<sourcefile->BlockCount(); blocknumber++)
2668 {
2669 sb->ClearLocation();
2670 ++sb;
2671 }
2672
2673 // Say we don't have a complete version of the file
2674 sourcefile->SetCompleteFile(0);
2675
2676 // Re-open the target file
2677 if (!targetfile->Open())
2678 {
2679 finalresult = false;
2680 continue;
2681 }
2682
2683 // Verify the file again
2684 if (!VerifyDataFile(targetfile, sourcefile, basepath))
2685 finalresult = false;
2686
2687 // Close the file again
2688 targetfile->Close();
2689 }
2690
2691 // Find out how much data we have found
2692 UpdateVerificationResults();
2693
2694 return finalresult;
2695 }
2696
2697 // Delete all of the partly reconstructed files
DeleteIncompleteTargetFiles(void)2698 bool Par2Repairer::DeleteIncompleteTargetFiles(void)
2699 {
2700 vector<Par2RepairerSourceFile*>::iterator sf = verifylist.begin();
2701
2702 // Iterate through each file in the verification list
2703 while (sf != verifylist.end())
2704 {
2705 Par2RepairerSourceFile *sourcefile = *sf;
2706 if (sourcefile->GetTargetExists())
2707 {
2708 DiskFile *targetfile = sourcefile->GetTargetFile();
2709
2710 // Close and delete the file
2711 if (targetfile->IsOpen())
2712 targetfile->Close();
2713 targetfile->Delete();
2714
2715 // Forget the file
2716 diskFileMap.Remove(targetfile);
2717 delete targetfile;
2718
2719 // There is no target file
2720 sourcefile->SetTargetExists(false);
2721 sourcefile->SetTargetFile(0);
2722 }
2723
2724 ++sf;
2725 }
2726
2727 return true;
2728 }
2729
RemoveBackupFiles(void)2730 bool Par2Repairer::RemoveBackupFiles(void)
2731 {
2732 vector<DiskFile*>::iterator bf = backuplist.begin();
2733
2734 if (noiselevel > nlSilent
2735 && bf != backuplist.end())
2736 {
2737 sout << endl << "Purge backup files." << endl;
2738 }
2739
2740 // Iterate through each file in the backuplist
2741 while (bf != backuplist.end())
2742 {
2743 if (noiselevel > nlSilent)
2744 {
2745 string name;
2746 string path;
2747 DiskFile::SplitFilename((*bf)->FileName(), path, name);
2748 sout << "Remove \"" << name << "\"." << endl;
2749 }
2750
2751 if ((*bf)->IsOpen())
2752 (*bf)->Close();
2753 (*bf)->Delete();
2754
2755 ++bf;
2756 }
2757
2758 return true;
2759 }
2760
RemoveParFiles(void)2761 bool Par2Repairer::RemoveParFiles(void)
2762 {
2763 if (noiselevel > nlSilent
2764 && !par2list.empty())
2765 {
2766 sout << endl << "Purge par files." << endl;
2767 }
2768
2769 for (list<string>::const_iterator s=par2list.begin(); s!=par2list.end(); ++s)
2770 {
2771 DiskFile *diskfile = new DiskFile(sout, serr);
2772
2773 if (diskfile->Open(*s))
2774 {
2775 if (noiselevel > nlSilent)
2776 {
2777 string name;
2778 string path;
2779 DiskFile::SplitFilename((*s), path, name);
2780 sout << "Remove \"" << name << "\"." << endl;
2781 }
2782
2783 if (diskfile->IsOpen())
2784 diskfile->Close();
2785 diskfile->Delete();
2786 }
2787
2788 delete diskfile;
2789 }
2790
2791 return true;
2792 }
2793