1 /*
2  * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
3  *
4  * Squid software is distributed under GPLv2+ license and includes
5  * contributions from numerous individuals and organizations.
6  * Please see the COPYING and CONTRIBUTORS files for details.
7  */
8 
9 /* DEBUG: section 47    Store Directory Routines */
10 
11 #include "squid.h"
12 #include "fs_io.h"
13 #include "globals.h"
14 #include "RebuildState.h"
15 #include "SquidConfig.h"
16 #include "SquidTime.h"
17 #include "store/Disks.h"
18 #include "store_key_md5.h"
19 #include "store_rebuild.h"
20 #include "StoreSwapLogData.h"
21 #include "tools.h"
22 #include "UFSSwapLogParser.h"
23 
24 #include <cerrno>
25 #include <cmath>
26 #if HAVE_SYS_STAT_H
27 #include <sys/stat.h>
28 #endif
29 
30 CBDATA_NAMESPACED_CLASS_INIT(Fs::Ufs,RebuildState);
31 
RebuildState(RefCount<UFSSwapDir> aSwapDir)32 Fs::Ufs::RebuildState::RebuildState(RefCount<UFSSwapDir> aSwapDir) :
33     sd(aSwapDir),
34     n_read(0),
35     LogParser(NULL),
36     curlvl1(0),
37     curlvl2(0),
38     in_dir(0),
39     done(0),
40     fn(0),
41     entry(NULL),
42     td(NULL),
43     fromLog(true),
44     _done(false),
45     cbdata(NULL)
46 {
47     *fullpath = 0;
48     *fullfilename = 0;
49 
50     /*
51      * If the swap.state file exists in the cache_dir, then
52      * we'll use commonUfsDirRebuildFromSwapLog(), otherwise we'll
53      * use commonUfsDirRebuildFromDirectory() to open up each file
54      * and suck in the meta data.
55      */
56     int clean = 0; //TODO: change to bool
57     int zeroLengthLog = 0;
58     FILE *fp = sd->openTmpSwapLog(&clean, &zeroLengthLog);
59 
60     if (fp && !zeroLengthLog)
61         LogParser = Fs::Ufs::UFSSwapLogParser::GetUFSSwapLogParser(fp);
62 
63     if (LogParser == NULL ) {
64         fromLog = false;
65 
66         if (fp != NULL)
67             fclose(fp);
68 
69     } else {
70         fromLog = true;
71         flags.clean = (clean != 0);
72     }
73 
74     if (!clean)
75         flags.need_to_validate = true;
76 
77     debugs(47, DBG_IMPORTANT, "Rebuilding storage in " << sd->path << " (" <<
78            (clean ? "clean log" : (LogParser ? "dirty log" : "no log")) << ")");
79 }
80 
~RebuildState()81 Fs::Ufs::RebuildState::~RebuildState()
82 {
83     sd->closeTmpSwapLog();
84 
85     if (LogParser)
86         delete LogParser;
87 }
88 
89 void
RebuildStep(void * data)90 Fs::Ufs::RebuildState::RebuildStep(void *data)
91 {
92     RebuildState *rb = (RebuildState *)data;
93     if (!reconfiguring)
94         rb->rebuildStep();
95 
96     // delay storeRebuildComplete() when reconfiguring to protect storeCleanup()
97     if (!rb->isDone() || reconfiguring)
98         eventAdd("storeRebuild", RebuildStep, rb, 0.01, 1);
99     else {
100         -- StoreController::store_dirs_rebuilding;
101         storeRebuildComplete(&rb->counts);
102         delete rb;
103     }
104 }
105 
106 /// load entries from swap.state or files until we run out of entries or time
107 void
rebuildStep()108 Fs::Ufs::RebuildState::rebuildStep()
109 {
110     // Balance our desire to maximize the number of entries processed at once
111     // (and, hence, minimize overheads and total rebuild time) with a
112     // requirement to also process Coordinator events, disk I/Os, etc.
113     const int maxSpentMsec = 50; // keep small: most RAM I/Os are under 1ms
114     const timeval loopStart = current_time;
115 
116     const int totalEntries = LogParser ? LogParser->SwapLogEntries() : -1;
117 
118     while (!isDone()) {
119         if (fromLog)
120             rebuildFromSwapLog();
121         else
122             rebuildFromDirectory();
123 
124         // TODO: teach storeRebuildProgress to handle totalEntries <= 0
125         if (totalEntries > 0 && (n_read % 4000 == 0))
126             storeRebuildProgress(sd->index, totalEntries, n_read);
127 
128         if (opt_foreground_rebuild)
129             continue; // skip "few entries at a time" check below
130 
131         getCurrentTime();
132         const double elapsedMsec = tvSubMsec(loopStart, current_time);
133         if (elapsedMsec > maxSpentMsec || elapsedMsec < 0) {
134             debugs(47, 5, HERE << "pausing after " << n_read << " entries in " <<
135                    elapsedMsec << "ms; " << (elapsedMsec/n_read) << "ms per entry");
136             break;
137         }
138     }
139 }
140 
141 /// process one cache file
142 void
rebuildFromDirectory()143 Fs::Ufs::RebuildState::rebuildFromDirectory()
144 {
145     cache_key key[SQUID_MD5_DIGEST_LENGTH];
146 
147     struct stat sb;
148     int fd = -1;
149     debugs(47, 3, HERE << "DIR #" << sd->index);
150 
151     assert(fd == -1);
152     sfileno filn = 0;
153     int size;
154     fd = getNextFile(&filn, &size);
155 
156     if (fd == -2) {
157         debugs(47, DBG_IMPORTANT, "Done scanning " << sd->path << " dir (" <<
158                n_read << " entries)");
159         _done = true;
160         return;
161     } else if (fd < 0) {
162         return;
163     }
164 
165     assert(fd > -1);
166     /* lets get file stats here */
167 
168     ++n_read;
169 
170     if (fstat(fd, &sb) < 0) {
171         int xerrno = errno;
172         debugs(47, DBG_IMPORTANT, MYNAME << "fstat(FD " << fd << "): " << xstrerr(xerrno));
173         file_close(fd);
174         --store_open_disk_fd;
175         fd = -1;
176         return;
177     }
178 
179     MemBuf buf;
180     buf.init(SM_PAGE_SIZE, SM_PAGE_SIZE);
181     if (!storeRebuildLoadEntry(fd, sd->index, buf, counts))
182         return;
183 
184     const uint64_t expectedSize = sb.st_size > 0 ?
185                                   static_cast<uint64_t>(sb.st_size) : 0;
186 
187     StoreEntry tmpe;
188     const bool parsed = storeRebuildParseEntry(buf, tmpe, key, counts,
189                         expectedSize);
190 
191     file_close(fd);
192     --store_open_disk_fd;
193     fd = -1;
194 
195     bool accepted = parsed && tmpe.swap_file_sz > 0;
196     if (parsed && !accepted) {
197         debugs(47, DBG_IMPORTANT, "WARNING: Ignoring ufs cache entry with " <<
198                "unknown size: " << tmpe);
199         accepted = false;
200     }
201 
202     if (!accepted) {
203         // XXX: shouldn't this be a call to commonUfsUnlink?
204         sd->unlinkFile(filn); // should we unlink in all failure cases?
205         return;
206     }
207 
208     addIfFresh(key,
209                filn,
210                tmpe.swap_file_sz,
211                tmpe.expires,
212                tmpe.timestamp,
213                tmpe.lastref,
214                tmpe.lastModified(),
215                tmpe.refcount,
216                tmpe.flags);
217 }
218 
219 /// if the loaded entry metadata is still relevant, indexes the entry
220 void
addIfFresh(const cache_key * key,sfileno file_number,uint64_t swap_file_sz,time_t expires,time_t timestamp,time_t lastref,time_t lastmod,uint32_t refcount,uint16_t newFlags)221 Fs::Ufs::RebuildState::addIfFresh(const cache_key *key,
222                                   sfileno file_number,
223                                   uint64_t swap_file_sz,
224                                   time_t expires,
225                                   time_t timestamp,
226                                   time_t lastref,
227                                   time_t lastmod,
228                                   uint32_t refcount,
229                                   uint16_t newFlags)
230 {
231     if (!evictStaleAndContinue(key, lastref, counts.dupcount))
232         return;
233 
234     ++counts.objcount;
235     const auto addedEntry = sd->addDiskRestore(key,
236                             file_number,
237                             swap_file_sz,
238                             expires,
239                             timestamp,
240                             lastref,
241                             lastmod,
242                             refcount,
243                             newFlags,
244                             0 /* XXX: unused */);
245     storeDirSwapLog(addedEntry, SWAP_LOG_ADD);
246 }
247 
248 /// Evicts a matching entry if it was last touched before caller's maxRef.
249 /// \returns false only if the matching entry was touched at or after maxRef,
250 /// indicating that the caller has supplied outdated maxRef.
251 bool
evictStaleAndContinue(const cache_key * candidateKey,const time_t maxRef,int & staleCount)252 Fs::Ufs::RebuildState::evictStaleAndContinue(const cache_key *candidateKey, const time_t maxRef, int &staleCount)
253 {
254     if (auto *indexedEntry = Store::Root().peek(candidateKey)) {
255 
256         if (indexedEntry->lastref >= maxRef) {
257             indexedEntry->abandon("Fs::Ufs::RebuildState::evictStaleAndContinue");
258             ++counts.clashcount;
259             return false;
260         }
261 
262         ++staleCount;
263         indexedEntry->release(true); // evict previously indexedEntry
264     }
265 
266     return true;
267 }
268 
269 /// process one swap log entry
270 void
rebuildFromSwapLog()271 Fs::Ufs::RebuildState::rebuildFromSwapLog()
272 {
273     StoreSwapLogData swapData;
274 
275     if (LogParser->ReadRecord(swapData) != 1) {
276         debugs(47, DBG_IMPORTANT, "Done reading " << sd->path << " swaplog (" << n_read << " entries)");
277         LogParser->Close();
278         delete LogParser;
279         LogParser = NULL;
280         _done = true;
281         return;
282     }
283 
284     ++n_read;
285 
286     if (!swapData.sane()) {
287         ++counts.invalid;
288         return;
289     }
290 
291     /*
292      * BC: during 2.4 development, we changed the way swap file
293      * numbers are assigned and stored.  The high 16 bits used
294      * to encode the SD index number.  There used to be a call
295      * to storeDirProperFileno here that re-assigned the index
296      * bits.  Now, for backwards compatibility, we just need
297      * to mask it off.
298      */
299     swapData.swap_filen &= 0x00FFFFFF;
300 
301     debugs(47, 3, HERE << swap_log_op_str[(int) swapData.op]  << " " <<
302            storeKeyText(swapData.key)  << " "<< std::setfill('0') <<
303            std::hex << std::uppercase << std::setw(8) <<
304            swapData.swap_filen);
305 
306     if (swapData.op == SWAP_LOG_ADD) {
307         (void) 0;
308     } else if (swapData.op == SWAP_LOG_DEL) {
309         // remove any older or same-age entry; +1 covers same-age entries
310         (void)evictStaleAndContinue(swapData.key, swapData.lastref+1, counts.cancelcount);
311         return;
312     } else {
313         const double
314         x = ::log(static_cast<double>(++counts.bad_log_op)) / ::log(10.0);
315 
316         if (0.0 == x - (double) (int) x)
317             debugs(47, DBG_IMPORTANT, "WARNING: " << counts.bad_log_op << " invalid swap log entries found");
318 
319         ++counts.invalid;
320 
321         return;
322     }
323 
324     ++counts.scancount; // XXX: should not this be incremented earlier?
325 
326     if (!sd->validFileno(swapData.swap_filen, 0)) {
327         ++counts.invalid;
328         return;
329     }
330 
331     if (EBIT_TEST(swapData.flags, KEY_PRIVATE)) {
332         ++counts.badflags;
333         return;
334     }
335 
336     if (sd->mapBitTest(swapData.swap_filen)) {
337         // While we were scanning the logs, some (unrelated) entry was added to
338         // our disk using our logged swap_filen. We could change our swap_filen
339         // and move the store file, but there is no Store API to do that (TODO).
340         ++counts.clashcount;
341         return;
342     }
343 
344     addIfFresh(swapData.key,
345                swapData.swap_filen,
346                swapData.swap_file_sz,
347                swapData.expires,
348                swapData.timestamp,
349                swapData.lastref,
350                swapData.lastmod,
351                swapData.refcount,
352                swapData.flags);
353 }
354 
355 int
getNextFile(sfileno * filn_p,int *)356 Fs::Ufs::RebuildState::getNextFile(sfileno * filn_p, int *)
357 {
358     int fd = -1;
359     int dirs_opened = 0;
360     debugs(47, 3, HERE << "flag=" << flags.init  << ", " <<
361            sd->index  << ": /"<< std::setfill('0') << std::hex <<
362            std::uppercase << std::setw(2) << curlvl1  << "/" << std::setw(2) <<
363            curlvl2);
364 
365     if (done)
366         return -2;
367 
368     while (fd < 0 && done == 0) {
369         fd = -1;
370 
371         if (!flags.init) {  /* initialize, open first file */
372             // XXX: 0's should not be needed, constructor inits now
373             done = 0;
374             curlvl1 = 0;
375             curlvl2 = 0;
376             in_dir = 0;
377             flags.init = true;
378             assert(Config.cacheSwap.n_configured > 0);
379         }
380 
381         if (0 == in_dir) {  /* we need to read in a new directory */
382             snprintf(fullpath, sizeof(fullpath), "%s/%02X/%02X",
383                      sd->path,
384                      curlvl1, curlvl2);
385 
386             if (dirs_opened)
387                 return -1;
388 
389             td = opendir(fullpath);
390 
391             ++dirs_opened;
392 
393             if (!td) {
394                 int xerrno = errno;
395                 debugs(47, DBG_IMPORTANT, MYNAME << "error in opendir (" << fullpath << "): " << xstrerr(xerrno));
396             } else {
397                 entry = readdir(td);    /* skip . and .. */
398                 entry = readdir(td);
399 
400                 if (entry == NULL && errno == ENOENT)
401                     debugs(47, DBG_IMPORTANT, HERE << "WARNING: directory does not exist!");
402                 debugs(47, 3, HERE << "Directory " << fullpath);
403             }
404         }
405 
406         if (td != NULL && (entry = readdir(td)) != NULL) {
407             ++in_dir;
408 
409             if (sscanf(entry->d_name, "%x", &fn) != 1) {
410                 debugs(47, 3, HERE << "invalid entry " << entry->d_name);
411                 continue;
412             }
413 
414             if (!UFSSwapDir::FilenoBelongsHere(fn, sd->index, curlvl1, curlvl2)) {
415                 debugs(47, 3, HERE << std::setfill('0') <<
416                        std::hex << std::uppercase << std::setw(8) << fn  <<
417                        " does not belong in " << std::dec << sd->index  << "/" <<
418                        curlvl1  << "/" << curlvl2);
419 
420                 continue;
421             }
422 
423             if (sd->mapBitTest(fn)) {
424                 debugs(47, 3, HERE << "Locked, continuing with next.");
425                 continue;
426             }
427 
428             snprintf(fullfilename, sizeof(fullfilename), "%s/%s",
429                      fullpath, entry->d_name);
430             debugs(47, 3, HERE << "Opening " << fullfilename);
431             fd = file_open(fullfilename, O_RDONLY | O_BINARY);
432 
433             if (fd < 0) {
434                 int xerrno = errno;
435                 debugs(47, DBG_IMPORTANT, MYNAME << "error opening " << fullfilename << ": " << xstrerr(xerrno));
436             } else
437                 ++store_open_disk_fd;
438 
439             continue;
440         }
441 
442         if (td != NULL)
443             closedir(td);
444 
445         td = NULL;
446 
447         in_dir = 0;
448 
449         if (sd->validL2(++curlvl2))
450             continue;
451 
452         curlvl2 = 0;
453 
454         if (sd->validL1(++curlvl1))
455             continue;
456 
457         curlvl1 = 0;
458 
459         done = 1;
460     }
461 
462     *filn_p = fn;
463     return fd;
464 }
465 
466 bool
error() const467 Fs::Ufs::RebuildState::error() const
468 {
469     return false;
470 }
471 
472 bool
isDone() const473 Fs::Ufs::RebuildState::isDone() const
474 {
475     return _done;
476 }
477 
478