1 /*
2  * s3fs - FUSE-based file system backed by Amazon S3
3  *
4  * Copyright(C) 2007 Takeshi Nakatani <ggtakec.com>
5  *
6  * This program is free software; you can redistribute it and/or
7  * modify it under the terms of the GNU General Public License
8  * as published by the Free Software Foundation; either version 2
9  * of the License, or (at your option) any later version.
10  *
11  * This program is distributed in the hope that it will be useful,
12  * but WITHOUT ANY WARRANTY; without even the implied warranty of
13  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14  * GNU General Public License for more details.
15  *
16  * You should have received a copy of the GNU General Public License
17  * along with this program; if not, write to the Free Software
18  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
19  */
20 
21 #include <cstdio>
22 #include <cstdlib>
23 #include <cerrno>
24 #include <unistd.h>
25 #include <sstream>
26 
27 #include "common.h"
28 #include "s3fs.h"
29 #include "fdcache_page.h"
30 #include "string_util.h"
31 
32 //------------------------------------------------
33 // Symbols
34 //------------------------------------------------
35 static const int CHECK_CACHEFILE_PART_SIZE = 1024 * 16;    // Buffer size in PageList::CheckZeroAreaInFile()
36 
37 //------------------------------------------------
38 // fdpage_list_t utility
39 //------------------------------------------------
40 // Inline function for repeated processing
raw_add_compress_fdpage_list(fdpage_list_t & pagelist,fdpage & page,bool ignore_load,bool ignore_modify,bool default_load,bool default_modify)41 inline void raw_add_compress_fdpage_list(fdpage_list_t& pagelist, fdpage& page, bool ignore_load, bool ignore_modify, bool default_load, bool default_modify)
42 {
43     if(0 < page.bytes){
44         // [NOTE]
45         // The page variable is subject to change here.
46         //
47         if(ignore_load){
48             page.loaded   = default_load;
49         }
50         if(ignore_modify){
51             page.modified = default_modify;
52         }
53         pagelist.push_back(page);
54     }
55 }
56 
57 // Compress the page list
58 //
59 // ignore_load:     Ignore the flag of loaded member and compress
60 // ignore_modify:   Ignore the flag of modified member and compress
61 // default_load:    loaded flag value in the list after compression when ignore_load=true
62 // default_modify:  modified flag value in the list after compression when default_modify=true
63 //
64 // NOTE: ignore_modify and ignore_load cannot both be true.
65 //
raw_compress_fdpage_list(const fdpage_list_t & pages,bool ignore_load,bool ignore_modify,bool default_load,bool default_modify)66 static fdpage_list_t raw_compress_fdpage_list(const fdpage_list_t& pages, bool ignore_load, bool ignore_modify, bool default_load, bool default_modify)
67 {
68     fdpage_list_t compressed_pages;
69     fdpage        tmppage;
70     bool          is_first = true;
71     for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){
72         if(!is_first){
73             if( (!ignore_load   && (tmppage.loaded   != iter->loaded  )) ||
74                 (!ignore_modify && (tmppage.modified != iter->modified)) )
75             {
76                 // Different from the previous area, add it to list
77                 raw_add_compress_fdpage_list(compressed_pages, tmppage, ignore_load, ignore_modify, default_load, default_modify);
78 
79                 // keep current area
80                 tmppage = fdpage(iter->offset, iter->bytes, (ignore_load ? default_load : iter->loaded), (ignore_modify ? default_modify : iter->modified));
81             }else{
82                 // Same as the previous area
83                 if(tmppage.next() != iter->offset){
84                     // These are not contiguous areas, add it to list
85                     raw_add_compress_fdpage_list(compressed_pages, tmppage, ignore_load, ignore_modify, default_load, default_modify);
86 
87                     // keep current area
88                     tmppage = fdpage(iter->offset, iter->bytes, (ignore_load ? default_load : iter->loaded), (ignore_modify ? default_modify : iter->modified));
89                 }else{
90                     // These are contiguous areas
91 
92                     // add current area
93                     tmppage.bytes += iter->bytes;
94                 }
95             }
96         }else{
97             // first erea
98             is_first = false;
99 
100             // keep current area
101             tmppage = fdpage(iter->offset, iter->bytes, (ignore_load ? default_load : iter->loaded), (ignore_modify ? default_modify : iter->modified));
102         }
103     }
104     // add last area
105     if(!is_first){
106         raw_add_compress_fdpage_list(compressed_pages, tmppage, ignore_load, ignore_modify, default_load, default_modify);
107     }
108     return compressed_pages;
109 }
110 
compress_fdpage_list_ignore_modify(const fdpage_list_t & pages,bool default_modify)111 static fdpage_list_t compress_fdpage_list_ignore_modify(const fdpage_list_t& pages, bool default_modify)
112 {
113     return raw_compress_fdpage_list(pages, /* ignore_load= */ false, /* ignore_modify= */ true, /* default_load= */false, /* default_modify= */default_modify);
114 }
115 
compress_fdpage_list_ignore_load(const fdpage_list_t & pages,bool default_load)116 static fdpage_list_t compress_fdpage_list_ignore_load(const fdpage_list_t& pages, bool default_load)
117 {
118     return raw_compress_fdpage_list(pages, /* ignore_load= */ true, /* ignore_modify= */ false, /* default_load= */default_load, /* default_modify= */false);
119 }
120 
compress_fdpage_list(const fdpage_list_t & pages)121 static fdpage_list_t compress_fdpage_list(const fdpage_list_t& pages)
122 {
123     return raw_compress_fdpage_list(pages, /* ignore_load= */ false, /* ignore_modify= */ false, /* default_load= */false, /* default_modify= */false);
124 }
125 
parse_partsize_fdpage_list(const fdpage_list_t & pages,off_t max_partsize)126 static fdpage_list_t parse_partsize_fdpage_list(const fdpage_list_t& pages, off_t max_partsize)
127 {
128     fdpage_list_t parsed_pages;
129     for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){
130         if(iter->modified){
131             // modified page
132             fdpage tmppage = *iter;
133             for(off_t start = iter->offset, rest_bytes = iter->bytes; 0 < rest_bytes; ){
134                 if((max_partsize * 2) < rest_bytes){
135                     // do parse
136                     tmppage.offset = start;
137                     tmppage.bytes  = max_partsize;
138                     parsed_pages.push_back(tmppage);
139 
140                     start      += max_partsize;
141                     rest_bytes -= max_partsize;
142                 }else{
143                     // Since the number of remaining bytes is less than twice max_partsize,
144                     // one of the divided areas will be smaller than max_partsize.
145                     // Therefore, this area at the end should not be divided.
146                     tmppage.offset = start;
147                     tmppage.bytes  = rest_bytes;
148                     parsed_pages.push_back(tmppage);
149 
150                     start      += rest_bytes;
151                     rest_bytes  = 0;
152                 }
153             }
154         }else{
155             // not modified page is not parsed
156             parsed_pages.push_back(*iter);
157         }
158     }
159     return parsed_pages;
160 }
161 
162 //------------------------------------------------
163 // PageList class methods
164 //------------------------------------------------
165 //
166 // Examine and return the status of each block in the file.
167 //
168 // Assuming the file is a sparse file, check the HOLE and DATA areas
169 // and return it in fdpage_list_t. The loaded flag of each fdpage is
170 // set to false for HOLE blocks and true for DATA blocks.
171 //
GetSparseFilePages(int fd,size_t file_size,fdpage_list_t & sparse_list)172 bool PageList::GetSparseFilePages(int fd, size_t file_size, fdpage_list_t& sparse_list)
173 {
174     // [NOTE]
175     // Express the status of the cache file using fdpage_list_t.
176     // There is a hole in the cache file(sparse file), and the
177     // state of this hole is expressed by the "loaded" member of
178     // struct fdpage. (the "modified" member is not used)
179     //
180     if(0 == file_size){
181         // file is empty
182         return true;
183     }
184 
185     bool is_hole   = false;
186     off_t hole_pos = lseek(fd, 0, SEEK_HOLE);
187     off_t data_pos = lseek(fd, 0, SEEK_DATA);
188     if(-1 == hole_pos && -1 == data_pos){
189         S3FS_PRN_ERR("Could not find the first position both HOLE and DATA in the file(physical_fd=%d).", fd);
190         return false;
191     }else if(-1 == hole_pos){
192         is_hole   = false;
193     }else if(-1 == data_pos){
194         is_hole   = true;
195     }else if(hole_pos < data_pos){
196         is_hole   = true;
197     }else{
198         is_hole   = false;
199     }
200 
201     for(off_t cur_pos = 0, next_pos = 0; 0 <= cur_pos; cur_pos = next_pos, is_hole = !is_hole){
202         fdpage page;
203         page.offset   = cur_pos;
204         page.loaded   = !is_hole;
205         page.modified = false;
206 
207         next_pos = lseek(fd, cur_pos, (is_hole ? SEEK_DATA : SEEK_HOLE));
208         if(-1 == next_pos){
209             page.bytes = static_cast<off_t>(file_size - cur_pos);
210         }else{
211             page.bytes = next_pos - cur_pos;
212         }
213         sparse_list.push_back(page);
214     }
215     return true;
216 }
217 
218 //
219 // Confirm that the specified area is ZERO
220 //
CheckZeroAreaInFile(int fd,off_t start,size_t bytes)221 bool PageList::CheckZeroAreaInFile(int fd, off_t start, size_t bytes)
222 {
223     char* readbuff = new char[CHECK_CACHEFILE_PART_SIZE];
224 
225     for(size_t comp_bytes = 0, check_bytes = 0; comp_bytes < bytes; comp_bytes += check_bytes){
226         if(CHECK_CACHEFILE_PART_SIZE < (bytes - comp_bytes)){
227             check_bytes = CHECK_CACHEFILE_PART_SIZE;
228         }else{
229             check_bytes = bytes - comp_bytes;
230         }
231         bool    found_bad_data = false;
232         ssize_t read_bytes;
233         if(-1 == (read_bytes = pread(fd, readbuff, check_bytes, (start + comp_bytes)))){
234             S3FS_PRN_ERR("Something error is occurred in reading %zu bytes at %lld from file(physical_fd=%d).", check_bytes, static_cast<long long int>(start + comp_bytes), fd);
235             found_bad_data = true;
236         }else{
237             check_bytes = static_cast<size_t>(read_bytes);
238             for(size_t tmppos = 0; tmppos < check_bytes; ++tmppos){
239                 if('\0' != readbuff[tmppos]){
240                     // found not ZERO data.
241                     found_bad_data = true;
242                     break;
243                 }
244             }
245         }
246         if(found_bad_data){
247             delete[] readbuff;
248             return false;
249         }
250     }
251     delete[] readbuff;
252     return true;
253 }
254 
255 //
256 // Checks that the specified area matches the state of the sparse file.
257 //
258 // [Parameters]
259 // checkpage:    This is one state of the cache file, it is loaded from the stats file.
260 // sparse_list:  This is a list of the results of directly checking the cache file status(HOLE/DATA).
261 //               In the HOLE area, the "loaded" flag of fdpage is false. The DATA area has it set to true.
262 // fd:           opened file discriptor to target cache file.
263 //
CheckAreaInSparseFile(const struct fdpage & checkpage,const fdpage_list_t & sparse_list,int fd,fdpage_list_t & err_area_list,fdpage_list_t & warn_area_list)264 bool PageList::CheckAreaInSparseFile(const struct fdpage& checkpage, const fdpage_list_t& sparse_list, int fd, fdpage_list_t& err_area_list, fdpage_list_t& warn_area_list)
265 {
266     // Check the block status of a part(Check Area: checkpage) of the target file.
267     // The elements of sparse_list have 5 patterns that overlap this block area.
268     //
269     // File           |<---...--------------------------------------...--->|
270     // Check Area              (offset)<-------------------->(offset + bytes - 1)
271     // Area case(0)       <------->
272     // Area case(1)                                            <------->
273     // Area case(2)              <-------->
274     // Area case(3)                                 <---------->
275     // Area case(4)                      <----------->
276     // Area case(5)              <----------------------------->
277     //
278     bool result = true;
279 
280     for(fdpage_list_t::const_iterator iter = sparse_list.begin(); iter != sparse_list.end(); ++iter){
281         off_t check_start = 0;
282         off_t check_bytes = 0;
283         if((iter->offset + iter->bytes) <= checkpage.offset){
284             // case 0
285             continue;    // next
286 
287         }else if((checkpage.offset + checkpage.bytes) <= iter->offset){
288             // case 1
289             break;       // finish
290 
291         }else if(iter->offset < checkpage.offset && (iter->offset + iter->bytes) < (checkpage.offset + checkpage.bytes)){
292             // case 2
293             check_start = checkpage.offset;
294             check_bytes = iter->bytes - (checkpage.offset - iter->offset);
295 
296         }else if(iter->offset < (checkpage.offset + checkpage.bytes) && (checkpage.offset + checkpage.bytes) < (iter->offset + iter->bytes)){
297             // case 3
298             check_start = iter->offset;
299             check_bytes = checkpage.bytes - (iter->offset - checkpage.offset);
300 
301         }else if(checkpage.offset < iter->offset && (iter->offset + iter->bytes) < (checkpage.offset + checkpage.bytes)){
302             // case 4
303             check_start = iter->offset;
304             check_bytes = iter->bytes;
305 
306         }else{  // (iter->offset <= checkpage.offset && (checkpage.offset + checkpage.bytes) <= (iter->offset + iter->bytes))
307             // case 5
308             check_start = checkpage.offset;
309             check_bytes = checkpage.bytes;
310         }
311 
312         // check target area type
313         if(checkpage.loaded || checkpage.modified){
314             // target area must be not HOLE(DATA) area.
315             if(!iter->loaded){
316                 // Found bad area, it is HOLE area.
317                 fdpage page(check_start, check_bytes, false, false);
318                 err_area_list.push_back(page);
319                 result = false;
320             }
321         }else{
322             // target area should be HOLE area.(If it is not a block boundary, it may be a DATA area.)
323             if(iter->loaded){
324                 // need to check this area's each data, it should be ZERO.
325                 if(!PageList::CheckZeroAreaInFile(fd, check_start, static_cast<size_t>(check_bytes))){
326                     // Discovered an area that has un-initial status data but it probably does not effect bad.
327                     fdpage page(check_start, check_bytes, true, false);
328                     warn_area_list.push_back(page);
329                     result = false;
330                 }
331             }
332         }
333     }
334     return result;
335 }
336 
337 //------------------------------------------------
338 // PageList methods
339 //------------------------------------------------
FreeList(fdpage_list_t & list)340 void PageList::FreeList(fdpage_list_t& list)
341 {
342     list.clear();
343 }
344 
PageList(off_t size,bool is_loaded,bool is_modified)345 PageList::PageList(off_t size, bool is_loaded, bool is_modified)
346 {
347     Init(size, is_loaded, is_modified);
348 }
349 
PageList(const PageList & other)350 PageList::PageList(const PageList& other)
351 {
352     for(fdpage_list_t::const_iterator iter = other.pages.begin(); iter != other.pages.end(); ++iter){
353         pages.push_back(*iter);
354     }
355 }
356 
~PageList()357 PageList::~PageList()
358 {
359     Clear();
360 }
361 
Clear()362 void PageList::Clear()
363 {
364     PageList::FreeList(pages);
365 }
366 
Init(off_t size,bool is_loaded,bool is_modified)367 bool PageList::Init(off_t size, bool is_loaded, bool is_modified)
368 {
369     Clear();
370     if(0 < size){
371         fdpage page(0, size, is_loaded, is_modified);
372         pages.push_back(page);
373     }
374     return true;
375 }
376 
Size() const377 off_t PageList::Size() const
378 {
379     if(pages.empty()){
380         return 0;
381     }
382     fdpage_list_t::const_reverse_iterator riter = pages.rbegin();
383     return riter->next();
384 }
385 
Compress()386 bool PageList::Compress()
387 {
388     pages = compress_fdpage_list(pages);
389     return true;
390 }
391 
Parse(off_t new_pos)392 bool PageList::Parse(off_t new_pos)
393 {
394     for(fdpage_list_t::iterator iter = pages.begin(); iter != pages.end(); ++iter){
395         if(new_pos == iter->offset){
396             // nothing to do
397             return true;
398         }else if(iter->offset < new_pos && new_pos < iter->next()){
399             fdpage page(iter->offset, new_pos - iter->offset, iter->loaded, iter->modified);
400             iter->bytes -= (new_pos - iter->offset);
401             iter->offset = new_pos;
402             pages.insert(iter, page);
403             return true;
404         }
405     }
406     return false;
407 }
408 
Resize(off_t size,bool is_loaded,bool is_modified)409 bool PageList::Resize(off_t size, bool is_loaded, bool is_modified)
410 {
411     off_t total = Size();
412 
413     if(0 == total){
414         Init(size, is_loaded, is_modified);
415 
416     }else if(total < size){
417         // add new area
418         fdpage page(total, (size - total), is_loaded, is_modified);
419         pages.push_back(page);
420 
421     }else if(size < total){
422         // cut area
423         for(fdpage_list_t::iterator iter = pages.begin(); iter != pages.end(); ){
424             if(iter->next() <= size){
425                 ++iter;
426             }else{
427                 if(size <= iter->offset){
428                     iter = pages.erase(iter);
429                 }else{
430                     iter->bytes = size - iter->offset;
431                 }
432             }
433         }
434     }else{    // total == size
435         // nothing to do
436     }
437     // compress area
438     return Compress();
439 }
440 
IsPageLoaded(off_t start,off_t size) const441 bool PageList::IsPageLoaded(off_t start, off_t size) const
442 {
443     for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){
444         if(iter->end() < start){
445             continue;
446         }
447         if(!iter->loaded){
448             return false;
449         }
450         if(0 != size && start + size <= iter->next()){
451             break;
452         }
453     }
454     return true;
455 }
456 
SetPageLoadedStatus(off_t start,off_t size,PageList::page_status pstatus,bool is_compress)457 bool PageList::SetPageLoadedStatus(off_t start, off_t size, PageList::page_status pstatus, bool is_compress)
458 {
459     off_t now_size    = Size();
460     bool  is_loaded   = (PAGE_LOAD_MODIFIED == pstatus || PAGE_LOADED == pstatus);
461     bool  is_modified = (PAGE_LOAD_MODIFIED == pstatus || PAGE_MODIFIED == pstatus);
462 
463     if(now_size <= start){
464         if(now_size < start){
465             // add
466             Resize(start, false, is_modified);   // set modified flag from now end pos to specified start pos.
467         }
468         Resize(start + size, is_loaded, is_modified);
469 
470     }else if(now_size <= start + size){
471         // cut
472         Resize(start, false, false);            // not changed loaded/modified flags in existing area.
473         // add
474         Resize(start + size, is_loaded, is_modified);
475 
476     }else{
477         // start-size are inner pages area
478         // parse "start", and "start + size" position
479         Parse(start);
480         Parse(start + size);
481 
482         // set loaded flag
483         for(fdpage_list_t::iterator iter = pages.begin(); iter != pages.end(); ++iter){
484             if(iter->end() < start){
485                 continue;
486             }else if(start + size <= iter->offset){
487                 break;
488             }else{
489                 iter->loaded   = is_loaded;
490                 iter->modified = is_modified;
491             }
492         }
493     }
494     // compress area
495     return (is_compress ? Compress() : true);
496 }
497 
FindUnloadedPage(off_t start,off_t & resstart,off_t & ressize) const498 bool PageList::FindUnloadedPage(off_t start, off_t& resstart, off_t& ressize) const
499 {
500     for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){
501         if(start <= iter->end()){
502             if(!iter->loaded && !iter->modified){     // Do not load unloaded and modified areas
503                 resstart = iter->offset;
504                 ressize  = iter->bytes;
505                 return true;
506             }
507         }
508     }
509     return false;
510 }
511 
512 // [NOTE]
513 // Accumulates the range of unload that is smaller than the Limit size.
514 // If you want to integrate all unload ranges, set the limit size to 0.
515 //
GetTotalUnloadedPageSize(off_t start,off_t size,off_t limit_size) const516 off_t PageList::GetTotalUnloadedPageSize(off_t start, off_t size, off_t limit_size) const
517 {
518     // If size is 0, it means loading to end.
519     if(0 == size){
520         if(start < Size()){
521             size = Size() - start;
522         }
523     }
524     off_t next     = start + size;
525     off_t restsize = 0;
526     for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){
527         if(iter->next() <= start){
528             continue;
529         }
530         if(next <= iter->offset){
531             break;
532         }
533         if(iter->loaded || iter->modified){
534             continue;
535         }
536         off_t tmpsize;
537         if(iter->offset <= start){
538             if(iter->next() <= next){
539                 tmpsize = (iter->next() - start);
540             }else{
541                 tmpsize = next - start;                  // = size
542             }
543         }else{
544             if(iter->next() <= next){
545                 tmpsize = iter->next() - iter->offset;   // = iter->bytes
546             }else{
547                 tmpsize = next - iter->offset;
548             }
549         }
550         if(0 == limit_size || tmpsize < limit_size){
551             restsize += tmpsize;
552         }
553     }
554     return restsize;
555 }
556 
GetUnloadedPages(fdpage_list_t & unloaded_list,off_t start,off_t size) const557 size_t PageList::GetUnloadedPages(fdpage_list_t& unloaded_list, off_t start, off_t size) const
558 {
559     // If size is 0, it means loading to end.
560     if(0 == size){
561         if(start < Size()){
562             size = Size() - start;
563         }
564     }
565     off_t next = start + size;
566 
567     for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){
568         if(iter->next() <= start){
569             continue;
570         }
571         if(next <= iter->offset){
572             break;
573         }
574         if(iter->loaded || iter->modified){
575             continue; // already loaded or modified
576         }
577 
578         // page area
579         off_t page_start = std::max(iter->offset, start);
580         off_t page_next  = std::min(iter->next(), next);
581         off_t page_size  = page_next - page_start;
582 
583         // add list
584         fdpage_list_t::reverse_iterator riter = unloaded_list.rbegin();
585         if(riter != unloaded_list.rend() && riter->next() == page_start){
586             // merge to before page
587             riter->bytes += page_size;
588         }else{
589             fdpage page(page_start, page_size, false, false);
590             unloaded_list.push_back(page);
591         }
592     }
593     return unloaded_list.size();
594 }
595 
596 // [NOTE]
597 // This method is called in advance when mixing POST and COPY in multi-part upload.
598 // The minimum size of each part must be 5 MB, and the data area below this must be
599 // downloaded from S3.
600 // This method checks the current PageList status and returns the area that needs
601 // to be downloaded so that each part is at least 5 MB.
602 //
GetPageListsForMultipartUpload(fdpage_list_t & dlpages,fdpage_list_t & mixuppages,off_t max_partsize)603 bool PageList::GetPageListsForMultipartUpload(fdpage_list_t& dlpages, fdpage_list_t& mixuppages, off_t max_partsize)
604 {
605     // compress before this processing
606     if(!Compress()){
607         return false;
608     }
609 
610     // make a list by modified flag
611     fdpage_list_t modified_pages = compress_fdpage_list_ignore_load(pages, false);
612     fdpage_list_t download_pages;         // A non-contiguous page list showing the areas that need to be downloaded
613     fdpage_list_t mixupload_pages;        // A continuous page list showing only modified flags for mixupload
614     fdpage        prev_page;
615     for(fdpage_list_t::const_iterator iter = modified_pages.begin(); iter != modified_pages.end(); ++iter){
616         if(iter->modified){
617             // current is modified area
618             if(!prev_page.modified){
619                 // previous is not modified area
620                 if(prev_page.bytes < MIN_MULTIPART_SIZE){
621                     // previous(not modified) area is too small for one multipart size,
622                     // then all of previous area is needed to download.
623                     download_pages.push_back(prev_page);
624 
625                     // previous(not modified) area is set upload area.
626                     prev_page.modified = true;
627                     mixupload_pages.push_back(prev_page);
628                 }else{
629                     // previous(not modified) area is set copy area.
630                     prev_page.modified = false;
631                     mixupload_pages.push_back(prev_page);
632                 }
633                 // set current to previous
634                 prev_page = *iter;
635             }else{
636                 // previous is modified area, too
637                 prev_page.bytes += iter->bytes;
638             }
639 
640         }else{
641             // current is not modified area
642             if(!prev_page.modified){
643                 // previous is not modified area, too
644                 prev_page.bytes += iter->bytes;
645 
646             }else{
647                 // previous is modified area
648                 if(prev_page.bytes < MIN_MULTIPART_SIZE){
649                     // previous(modified) area is too small for one multipart size,
650                     // then part or all of current area is needed to download.
651                     off_t  missing_bytes = MIN_MULTIPART_SIZE - prev_page.bytes;
652 
653                     if((missing_bytes + MIN_MULTIPART_SIZE) < iter-> bytes){
654                         // The current size is larger than the missing size, and the remainder
655                         // after deducting the missing size is larger than the minimum size.
656 
657                         fdpage missing_page(iter->offset, missing_bytes, false, false);
658                         download_pages.push_back(missing_page);
659 
660                         // previous(not modified) area is set upload area.
661                         prev_page.bytes = MIN_MULTIPART_SIZE;
662                         mixupload_pages.push_back(prev_page);
663 
664                         // set current to previous
665                         prev_page = *iter;
666                         prev_page.offset += missing_bytes;
667                         prev_page.bytes  -= missing_bytes;
668 
669                     }else{
670                         // The current size is less than the missing size, or the remaining
671                         // size less the missing size is less than the minimum size.
672                         download_pages.push_back(*iter);
673 
674                         // add current to previous
675                         prev_page.bytes += iter->bytes;
676                     }
677 
678                 }else{
679                     // previous(modified) area is enough size for one multipart size.
680                     mixupload_pages.push_back(prev_page);
681 
682                     // set current to previous
683                     prev_page = *iter;
684                 }
685             }
686         }
687     }
688     // last area
689     if(0 < prev_page.bytes){
690         mixupload_pages.push_back(prev_page);
691     }
692 
693     // compress
694     dlpages    = compress_fdpage_list_ignore_modify(download_pages, false);
695     mixuppages = compress_fdpage_list_ignore_load(mixupload_pages, false);
696 
697     // parse by max pagesize
698     dlpages    = parse_partsize_fdpage_list(dlpages, max_partsize);
699     mixuppages = parse_partsize_fdpage_list(mixuppages, max_partsize);
700 
701     return true;
702 }
703 
GetNoDataPageLists(fdpage_list_t & nodata_pages,off_t start,size_t size)704 bool PageList::GetNoDataPageLists(fdpage_list_t& nodata_pages, off_t start, size_t size)
705 {
706     // compress before this processing
707     if(!Compress()){
708         return false;
709     }
710 
711     // extract areas without data
712     fdpage_list_t tmp_pagelist;
713     off_t         stop_pos = (0L == size ? -1 : (start + size));
714     for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){
715         if((iter->offset + iter->bytes) < start){
716             continue;
717         }
718         if(-1 != stop_pos && stop_pos <= iter->offset){
719             break;
720         }
721         if(iter->modified){
722             continue;
723         }
724 
725         fdpage  tmppage;
726         tmppage.offset   = std::max(iter->offset, start);
727         tmppage.bytes    = (-1 != stop_pos ? iter->bytes : std::min(iter->bytes, (stop_pos - tmppage.offset)));
728         tmppage.loaded   = iter->loaded;
729         tmppage.modified = iter->modified;
730 
731         tmp_pagelist.push_back(tmppage);
732     }
733 
734     if(tmp_pagelist.empty()){
735         nodata_pages.clear();
736     }else{
737         // compress
738         nodata_pages = compress_fdpage_list(tmp_pagelist);
739     }
740     return true;
741 }
742 
BytesModified() const743 off_t PageList::BytesModified() const
744 {
745     off_t total = 0;
746     for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){
747         if(iter->modified){
748             total += iter->bytes;
749         }
750     }
751     return total;
752 }
753 
IsModified() const754 bool PageList::IsModified() const
755 {
756     for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){
757         if(iter->modified){
758             return true;
759         }
760     }
761     return false;
762 }
763 
ClearAllModified()764 bool PageList::ClearAllModified()
765 {
766     for(fdpage_list_t::iterator iter = pages.begin(); iter != pages.end(); ++iter){
767         if(iter->modified){
768             iter->modified = false;
769         }
770     }
771     return Compress();
772 }
773 
Serialize(CacheFileStat & file,bool is_output,ino_t inode)774 bool PageList::Serialize(CacheFileStat& file, bool is_output, ino_t inode)
775 {
776     if(!file.Open()){
777         return false;
778     }
779     if(is_output){
780         //
781         // put to file
782         //
783         std::ostringstream ssall;
784         ssall << inode << ":" << Size();
785 
786         for(fdpage_list_t::iterator iter = pages.begin(); iter != pages.end(); ++iter){
787             ssall << "\n" << iter->offset << ":" << iter->bytes << ":" << (iter->loaded ? "1" : "0") << ":" << (iter->modified ? "1" : "0");
788         }
789 
790         if(-1 == ftruncate(file.GetFd(), 0)){
791             S3FS_PRN_ERR("failed to truncate file(to 0) for stats(%d)", errno);
792             return false;
793         }
794         std::string strall = ssall.str();
795         if(0 >= pwrite(file.GetFd(), strall.c_str(), strall.length(), 0)){
796             S3FS_PRN_ERR("failed to write stats(%d)", errno);
797             return false;
798         }
799 
800     }else{
801         //
802         // loading from file
803         //
804         struct stat st;
805         memset(&st, 0, sizeof(struct stat));
806         if(-1 == fstat(file.GetFd(), &st)){
807             S3FS_PRN_ERR("fstat is failed. errno(%d)", errno);
808             return false;
809         }
810         if(0 >= st.st_size){
811           // nothing
812             Init(0, false, false);
813             return true;
814         }
815         char* ptmp = new char[st.st_size + 1];
816         ssize_t result;
817         // read from file
818         if(0 >= (result = pread(file.GetFd(), ptmp, st.st_size, 0))){
819             S3FS_PRN_ERR("failed to read stats(%d)", errno);
820             delete[] ptmp;
821             return false;
822         }
823         ptmp[result] = '\0';
824         std::string        oneline;
825         std::istringstream ssall(ptmp);
826 
827         // loaded
828         Clear();
829 
830         // load head line(for size and inode)
831         off_t total;
832         ino_t cache_inode;                  // if this value is 0, it means old format.
833         if(!getline(ssall, oneline, '\n')){
834             S3FS_PRN_ERR("failed to parse stats.");
835             delete[] ptmp;
836             return false;
837         }else{
838             std::istringstream sshead(oneline);
839             std::string        strhead1;
840             std::string        strhead2;
841 
842             // get first part in head line.
843             if(!getline(sshead, strhead1, ':')){
844                 S3FS_PRN_ERR("failed to parse stats.");
845                 delete[] ptmp;
846                 return false;
847             }
848             // get second part in head line.
849             if(!getline(sshead, strhead2, ':')){
850                 // old head format is "<size>\n"
851                 total       = cvt_strtoofft(strhead1.c_str(), /* base= */10);
852                 cache_inode = 0;
853             }else{
854                 // current head format is "<inode>:<size>\n"
855                 total       = cvt_strtoofft(strhead2.c_str(), /* base= */10);
856                 cache_inode = static_cast<ino_t>(cvt_strtoofft(strhead1.c_str(), /* base= */10));
857                 if(0 == cache_inode){
858                     S3FS_PRN_ERR("wrong inode number in parsed cache stats.");
859                     delete[] ptmp;
860                     return false;
861                 }
862             }
863         }
864         // check inode number
865         if(0 != cache_inode && cache_inode != inode){
866             S3FS_PRN_ERR("differ inode and inode number in parsed cache stats.");
867             delete[] ptmp;
868             return false;
869         }
870 
871         // load each part
872         bool is_err = false;
873         while(getline(ssall, oneline, '\n')){
874             std::string        part;
875             std::istringstream ssparts(oneline);
876             // offset
877             if(!getline(ssparts, part, ':')){
878                 is_err = true;
879                 break;
880             }
881             off_t offset = cvt_strtoofft(part.c_str(), /* base= */10);
882             // size
883             if(!getline(ssparts, part, ':')){
884                 is_err = true;
885                 break;
886             }
887             off_t size = cvt_strtoofft(part.c_str(), /* base= */10);
888             // loaded
889             if(!getline(ssparts, part, ':')){
890                 is_err = true;
891                 break;
892             }
893             bool is_loaded = (1 == cvt_strtoofft(part.c_str(), /* base= */10) ? true : false);
894             bool is_modified;
895             if(!getline(ssparts, part, ':')){
896                 is_modified = false;        // old version does not have this part.
897             }else{
898                 is_modified = (1 == cvt_strtoofft(part.c_str(), /* base= */10) ? true : false);
899             }
900             // add new area
901             PageList::page_status pstatus =
902               ( is_loaded && is_modified  ? PageList::PAGE_LOAD_MODIFIED :
903                 !is_loaded && is_modified ? PageList::PAGE_MODIFIED      :
904                 is_loaded && !is_modified ? PageList::PAGE_LOADED        : PageList::PAGE_NOT_LOAD_MODIFIED );
905 
906             SetPageLoadedStatus(offset, size, pstatus);
907         }
908         delete[] ptmp;
909         if(is_err){
910             S3FS_PRN_ERR("failed to parse stats.");
911             Clear();
912             return false;
913         }
914 
915         // check size
916         if(total != Size()){
917             S3FS_PRN_ERR("different size(%lld - %lld).", static_cast<long long int>(total), static_cast<long long int>(Size()));
918             Clear();
919             return false;
920         }
921     }
922     return true;
923 }
924 
Dump() const925 void PageList::Dump() const
926 {
927     int cnt = 0;
928 
929     S3FS_PRN_DBG("pages = {");
930     for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter, ++cnt){
931         S3FS_PRN_DBG("  [%08d] -> {%014lld - %014lld : %s / %s}", cnt, static_cast<long long int>(iter->offset), static_cast<long long int>(iter->bytes), iter->loaded ? "loaded" : "unloaded", iter->modified ? "modified" : "not modified");
932     }
933     S3FS_PRN_DBG("}");
934 }
935 
936 //
937 // Compare the fdpage_list_t pages of the object with the state of the file.
938 //
939 // The loaded=true or modified=true area of pages must be a DATA block
940 // (not a HOLE block) in the file.
941 // The other area is a HOLE block in the file or is a DATA block(but the
942 // data of the target area in that block should be ZERO).
943 // If it is a bad area in the previous case, it will be reported as an error.
944 // If the latter case does not match, it will be reported as a warning.
945 //
CompareSparseFile(int fd,size_t file_size,fdpage_list_t & err_area_list,fdpage_list_t & warn_area_list)946 bool PageList::CompareSparseFile(int fd, size_t file_size, fdpage_list_t& err_area_list, fdpage_list_t& warn_area_list)
947 {
948     err_area_list.clear();
949     warn_area_list.clear();
950 
951     // First, list the block disk allocation area of the cache file.
952     // The cache file has holes(sparse file) and no disk block areas
953     // are assigned to any holes.
954     fdpage_list_t sparse_list;
955     if(!PageList::GetSparseFilePages(fd, file_size, sparse_list)){
956         S3FS_PRN_ERR("Something error is occurred in parsing hole/data of the cache file(physical_fd=%d).", fd);
957 
958         fdpage page(0, static_cast<off_t>(file_size), false, false);
959         err_area_list.push_back(page);
960 
961         return false;
962     }
963 
964     if(sparse_list.empty() && pages.empty()){
965         // both file and stats information are empty, it means cache file size is ZERO.
966         return true;
967     }
968 
969     // Compare each pages and sparse_list
970     bool result = true;
971     for(fdpage_list_t::const_iterator iter = pages.begin(); iter != pages.end(); ++iter){
972         if(!PageList::CheckAreaInSparseFile(*iter, sparse_list, fd, err_area_list, warn_area_list)){
973             result = false;
974         }
975     }
976     return result;
977 }
978 
979 /*
980 * Local variables:
981 * tab-width: 4
982 * c-basic-offset: 4
983 * End:
984 * vim600: expandtab sw=4 ts=4 fdm=marker
985 * vim<600: expandtab sw=4 ts=4
986 */
987