1 /*
2  * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
3  *
4  * Squid software is distributed under GPLv2+ license and includes
5  * contributions from numerous individuals and organizations.
6  * Please see the COPYING and CONTRIBUTORS files for details.
7  */
8 
9 /* DEBUG: section 71    Store Digest Manager */
10 
11 /*
12  * TODO: We probably do not track all the cases when
13  *       storeDigestNoteStoreReady() must be called; this may prevent
14  *       storeDigestRebuild/write schedule to be activated
15  */
16 
17 #include "squid.h"
18 #include "Debug.h"
19 #include "event.h"
20 #include "globals.h"
21 #include "mgr/Registration.h"
22 #include "store_digest.h"
23 
24 #if USE_CACHE_DIGESTS
25 #include "CacheDigest.h"
26 #include "HttpReply.h"
27 #include "HttpRequest.h"
28 #include "internal.h"
29 #include "MemObject.h"
30 #include "PeerDigest.h"
31 #include "refresh.h"
32 #include "SquidConfig.h"
33 #include "SquidTime.h"
34 #include "Store.h"
35 #include "StoreSearch.h"
36 #include "util.h"
37 
38 #include <cmath>
39 
40 /*
41  * local types
42  */
43 
44 class StoreDigestState
45 {
46 public:
47     StoreDigestCBlock cblock;
48     int rebuild_lock = 0;                 ///< bucket number
49     StoreEntry * rewrite_lock = nullptr;  ///< points to store entry with the digest
50     StoreSearchPointer theSearch;
51     int rewrite_offset = 0;
52     int rebuild_count = 0;
53     int rewrite_count = 0;
54 };
55 
56 class StoreDigestStats
57 {
58 public:
59     int del_count = 0;          /* #store entries deleted from store_digest */
60     int del_lost_count = 0;     /* #store entries not found in store_digest on delete */
61     int add_count = 0;          /* #store entries accepted to store_digest */
62     int add_coll_count = 0;     /* #accepted entries that collided with existing ones */
63     int rej_count = 0;          /* #store entries not accepted to store_digest */
64     int rej_coll_count = 0;     /* #not accepted entries that collided with existing ones */
65 };
66 
67 /* local vars */
68 static StoreDigestState sd_state;
69 static StoreDigestStats sd_stats;
70 
71 /* local prototypes */
72 static void storeDigestRebuildStart(void *datanotused);
73 static void storeDigestRebuildResume(void);
74 static void storeDigestRebuildFinish(void);
75 static void storeDigestRebuildStep(void *datanotused);
76 static void storeDigestRewriteStart(void *);
77 static void storeDigestRewriteResume(void);
78 static void storeDigestRewriteFinish(StoreEntry * e);
79 static EVH storeDigestSwapOutStep;
80 static void storeDigestCBlockSwapOut(StoreEntry * e);
81 static void storeDigestAdd(const StoreEntry *);
82 
83 /// calculates digest capacity
84 static uint64_t
storeDigestCalcCap()85 storeDigestCalcCap()
86 {
87     /*
88      * To-Do: Bloom proved that the optimal filter utilization is 50% (half of
89      * the bits are off). However, we do not have a formula to calculate the
90      * number of _entries_ we want to pre-allocate for.
91      */
92     const uint64_t hi_cap = Store::Root().maxSize() / Config.Store.avgObjectSize;
93     const uint64_t lo_cap = 1 + Store::Root().currentSize() / Config.Store.avgObjectSize;
94     const uint64_t e_count = StoreEntry::inUseCount();
95     uint64_t cap = e_count ? e_count : hi_cap;
96     debugs(71, 2, "have: " << e_count << ", want " << cap <<
97            " entries; limits: [" << lo_cap << ", " << hi_cap << "]");
98 
99     if (cap < lo_cap)
100         cap = lo_cap;
101 
102     /* do not enforce hi_cap limit, average-based estimation may be wrong
103      *if (cap > hi_cap)
104      *  cap = hi_cap;
105      */
106 
107     // Bug 4534: we still have to set an upper-limit at some reasonable value though.
108     // this matches cacheDigestCalcMaskSize doing (cap*bpe)+7 < INT_MAX
109     const uint64_t absolute_max = (INT_MAX -8) / Config.digest.bits_per_entry;
110     if (cap > absolute_max) {
111         static time_t last_loud = 0;
112         if (last_loud < squid_curtime - 86400) {
113             debugs(71, DBG_IMPORTANT, "WARNING: Cache Digest cannot store " << cap << " entries. Limiting to " << absolute_max);
114             last_loud = squid_curtime;
115         } else {
116             debugs(71, 3, "WARNING: Cache Digest cannot store " << cap << " entries. Limiting to " << absolute_max);
117         }
118         cap = absolute_max;
119     }
120 
121     return cap;
122 }
123 #endif /* USE_CACHE_DIGESTS */
124 
125 void
storeDigestInit(void)126 storeDigestInit(void)
127 {
128     Mgr::RegisterAction("store_digest", "Store Digest", storeDigestReport, 0, 1);
129 
130 #if USE_CACHE_DIGESTS
131     if (!Config.onoff.digest_generation) {
132         store_digest = NULL;
133         debugs(71, 3, "Local cache digest generation disabled");
134         return;
135     }
136 
137     const uint64_t cap = storeDigestCalcCap();
138     store_digest = new CacheDigest(cap, Config.digest.bits_per_entry);
139     debugs(71, DBG_IMPORTANT, "Local cache digest enabled; rebuild/rewrite every " <<
140            (int) Config.digest.rebuild_period << "/" <<
141            (int) Config.digest.rewrite_period << " sec");
142 
143     sd_state = StoreDigestState();
144 #else
145     store_digest = NULL;
146     debugs(71, 3, "Local cache digest is 'off'");
147 #endif
148 }
149 
150 /* called when store_rebuild completes */
151 void
storeDigestNoteStoreReady(void)152 storeDigestNoteStoreReady(void)
153 {
154 #if USE_CACHE_DIGESTS
155 
156     if (Config.onoff.digest_generation) {
157         storeDigestRebuildStart(NULL);
158         storeDigestRewriteStart(NULL);
159     }
160 
161 #endif
162 }
163 
164 //TODO: this seems to be dead code. Is it needed?
165 void
storeDigestDel(const StoreEntry * entry)166 storeDigestDel(const StoreEntry * entry)
167 {
168 #if USE_CACHE_DIGESTS
169 
170     if (!Config.onoff.digest_generation) {
171         return;
172     }
173 
174     assert(entry && store_digest);
175     debugs(71, 6, "storeDigestDel: checking entry, key: " << entry->getMD5Text());
176 
177     if (!EBIT_TEST(entry->flags, KEY_PRIVATE)) {
178         if (!store_digest->contains(static_cast<const cache_key *>(entry->key))) {
179             ++sd_stats.del_lost_count;
180             debugs(71, 6, "storeDigestDel: lost entry, key: " << entry->getMD5Text() << " url: " << entry->url()  );
181         } else {
182             ++sd_stats.del_count;
183             store_digest->remove(static_cast<const cache_key *>(entry->key));
184             debugs(71, 6, "storeDigestDel: deled entry, key: " << entry->getMD5Text());
185         }
186     }
187 #endif //USE_CACHE_DIGESTS
188 }
189 
190 void
storeDigestReport(StoreEntry * e)191 storeDigestReport(StoreEntry * e)
192 {
193 #if USE_CACHE_DIGESTS
194 
195     if (!Config.onoff.digest_generation) {
196         return;
197     }
198 
199     if (store_digest) {
200         cacheDigestReport(store_digest, "store", e);
201         storeAppendPrintf(e, "\t added: %d rejected: %d ( %.2f %%) del-ed: %d\n",
202                           sd_stats.add_count,
203                           sd_stats.rej_count,
204                           xpercent(sd_stats.rej_count, sd_stats.rej_count + sd_stats.add_count),
205                           sd_stats.del_count);
206         storeAppendPrintf(e, "\t collisions: on add: %.2f %% on rej: %.2f %%\n",
207                           xpercent(sd_stats.add_coll_count, sd_stats.add_count),
208                           xpercent(sd_stats.rej_coll_count, sd_stats.rej_count));
209     } else {
210         storeAppendPrintf(e, "store digest: disabled.\n");
211     }
212 
213 #endif //USE_CACHE_DIGESTS
214 }
215 
216 /*
217  * LOCAL FUNCTIONS
218  */
219 
220 #if USE_CACHE_DIGESTS
221 
222 /* should we digest this entry? used by storeDigestAdd() */
223 static int
storeDigestAddable(const StoreEntry * e)224 storeDigestAddable(const StoreEntry * e)
225 {
226     /* add some stats! XXX */
227 
228     debugs(71, 6, "storeDigestAddable: checking entry, key: " << e->getMD5Text());
229 
230     /* check various entry flags (mimics StoreEntry::checkCachable XXX) */
231 
232     if (EBIT_TEST(e->flags, KEY_PRIVATE)) {
233         debugs(71, 6, "storeDigestAddable: NO: private key");
234         return 0;
235     }
236 
237     if (EBIT_TEST(e->flags, ENTRY_NEGCACHED)) {
238         debugs(71, 6, "storeDigestAddable: NO: negative cached");
239         return 0;
240     }
241 
242     if (EBIT_TEST(e->flags, RELEASE_REQUEST)) {
243         debugs(71, 6, "storeDigestAddable: NO: release requested");
244         return 0;
245     }
246 
247     if (e->store_status == STORE_OK && EBIT_TEST(e->flags, ENTRY_BAD_LENGTH)) {
248         debugs(71, 6, "storeDigestAddable: NO: wrong content-length");
249         return 0;
250     }
251 
252     /* do not digest huge objects */
253     if (e->swap_file_sz > (uint64_t )Config.Store.maxObjectSize) {
254         debugs(71, 6, "storeDigestAddable: NO: too big");
255         return 0;
256     }
257 
258     /* still here? check staleness */
259     /* Note: We should use the time of the next rebuild, not (cur_time+period) */
260     if (refreshCheckDigest(e, Config.digest.rebuild_period)) {
261         debugs(71, 6, "storeDigestAdd: entry expires within " << Config.digest.rebuild_period << " secs, ignoring");
262         return 0;
263     }
264 
265     /*
266      * idea: how about also skipping very fresh (thus, potentially
267      * unstable) entries? Should be configurable through
268      * cd_refresh_pattern, of course.
269      */
270     /*
271      * idea: skip objects that are going to be purged before the next
272      * update.
273      */
274     return 1;
275 }
276 
277 static void
storeDigestAdd(const StoreEntry * entry)278 storeDigestAdd(const StoreEntry * entry)
279 {
280     assert(entry && store_digest);
281 
282     if (storeDigestAddable(entry)) {
283         ++sd_stats.add_count;
284 
285         if (store_digest->contains(static_cast<const cache_key *>(entry->key)))
286             ++sd_stats.add_coll_count;
287 
288         store_digest->add(static_cast<const cache_key *>(entry->key));
289 
290         debugs(71, 6, "storeDigestAdd: added entry, key: " << entry->getMD5Text());
291     } else {
292         ++sd_stats.rej_count;
293 
294         if (store_digest->contains(static_cast<const cache_key *>(entry->key)))
295             ++sd_stats.rej_coll_count;
296     }
297 }
298 
299 /* rebuilds digest from scratch */
300 static void
storeDigestRebuildStart(void * datanotused)301 storeDigestRebuildStart(void *datanotused)
302 {
303     assert(store_digest);
304     /* prevent overlapping if rebuild schedule is too tight */
305 
306     if (sd_state.rebuild_lock) {
307         debugs(71, DBG_IMPORTANT, "storeDigestRebuildStart: overlap detected, consider increasing rebuild period");
308         return;
309     }
310 
311     sd_state.rebuild_lock = 1;
312     debugs(71, 2, "storeDigestRebuildStart: rebuild #" << sd_state.rebuild_count + 1);
313 
314     if (sd_state.rewrite_lock) {
315         debugs(71, 2, "storeDigestRebuildStart: waiting for Rewrite to finish.");
316         return;
317     }
318 
319     storeDigestRebuildResume();
320 }
321 
322 /// \returns true if we actually resized the digest
323 static bool
storeDigestResize()324 storeDigestResize()
325 {
326     const uint64_t cap = storeDigestCalcCap();
327     assert(store_digest);
328     uint64_t diff;
329     if (cap > store_digest->capacity)
330         diff = cap - store_digest->capacity;
331     else
332         diff = store_digest->capacity - cap;
333     debugs(71, 2, store_digest->capacity << " -> " << cap << "; change: " <<
334            diff << " (" << xpercentInt(diff, store_digest->capacity) << "%)" );
335     /* avoid minor adjustments */
336 
337     if (diff <= store_digest->capacity / 10) {
338         debugs(71, 2, "small change, will not resize.");
339         return false;
340     } else {
341         debugs(71, 2, "big change, resizing.");
342         store_digest->updateCapacity(cap);
343     }
344     return true;
345 }
346 
347 /* called be Rewrite to push Rebuild forward */
348 static void
storeDigestRebuildResume(void)349 storeDigestRebuildResume(void)
350 {
351     assert(sd_state.rebuild_lock);
352     assert(!sd_state.rewrite_lock);
353     sd_state.theSearch = Store::Root().search();
354     /* resize or clear */
355 
356     if (!storeDigestResize())
357         store_digest->clear();     /* not clean()! */
358 
359     sd_stats = StoreDigestStats();
360 
361     eventAdd("storeDigestRebuildStep", storeDigestRebuildStep, NULL, 0.0, 1);
362 }
363 
364 /* finishes swap out sequence for the digest; schedules next rebuild */
365 static void
storeDigestRebuildFinish(void)366 storeDigestRebuildFinish(void)
367 {
368     assert(sd_state.rebuild_lock);
369     sd_state.rebuild_lock = 0;
370     ++sd_state.rebuild_count;
371     debugs(71, 2, "storeDigestRebuildFinish: done.");
372     eventAdd("storeDigestRebuildStart", storeDigestRebuildStart, NULL, (double)
373              Config.digest.rebuild_period, 1);
374     /* resume pending Rewrite if any */
375 
376     if (sd_state.rewrite_lock)
377         storeDigestRewriteResume();
378 }
379 
380 /* recalculate a few hash buckets per invocation; schedules next step */
381 static void
storeDigestRebuildStep(void * datanotused)382 storeDigestRebuildStep(void *datanotused)
383 {
384     /* TODO: call Store::Root().size() to determine this.. */
385     int count = Config.Store.objectsPerBucket * (int) ceil((double) store_hash_buckets *
386                 (double) Config.digest.rebuild_chunk_percentage / 100.0);
387     assert(sd_state.rebuild_lock);
388 
389     debugs(71, 3, "storeDigestRebuildStep: buckets: " << store_hash_buckets << " entries to check: " << count);
390 
391     while (count-- && !sd_state.theSearch->isDone() && sd_state.theSearch->next())
392         storeDigestAdd(sd_state.theSearch->currentItem());
393 
394     /* are we done ? */
395     if (sd_state.theSearch->isDone())
396         storeDigestRebuildFinish();
397     else
398         eventAdd("storeDigestRebuildStep", storeDigestRebuildStep, NULL, 0.0, 1);
399 }
400 
401 /* starts swap out sequence for the digest */
402 static void
storeDigestRewriteStart(void * datanotused)403 storeDigestRewriteStart(void *datanotused)
404 {
405     assert(store_digest);
406     /* prevent overlapping if rewrite schedule is too tight */
407 
408     if (sd_state.rewrite_lock) {
409         debugs(71, DBG_IMPORTANT, "storeDigestRewrite: overlap detected, consider increasing rewrite period");
410         return;
411     }
412 
413     debugs(71, 2, "storeDigestRewrite: start rewrite #" << sd_state.rewrite_count + 1);
414 
415     const char *url = internalLocalUri("/squid-internal-periodic/", SBuf(StoreDigestFileName));
416     const MasterXaction::Pointer mx = new MasterXaction(XactionInitiator::initCacheDigest);
417     auto req = HttpRequest::FromUrlXXX(url, mx);
418 
419     RequestFlags flags;
420     flags.cachable = true;
421 
422     StoreEntry *e = storeCreateEntry(url, url, flags, Http::METHOD_GET);
423     assert(e);
424     sd_state.rewrite_lock = e;
425     debugs(71, 3, "storeDigestRewrite: url: " << url << " key: " << e->getMD5Text());
426     e->mem_obj->request = req;
427     HTTPMSGLOCK(e->mem_obj->request);
428 
429     /* wait for rebuild (if any) to finish */
430     if (sd_state.rebuild_lock) {
431         debugs(71, 2, "storeDigestRewriteStart: waiting for rebuild to finish.");
432         return;
433     }
434 
435     storeDigestRewriteResume();
436 }
437 
438 static void
storeDigestRewriteResume(void)439 storeDigestRewriteResume(void)
440 {
441     StoreEntry *e;
442 
443     assert(sd_state.rewrite_lock);
444     assert(!sd_state.rebuild_lock);
445     e = sd_state.rewrite_lock;
446     sd_state.rewrite_offset = 0;
447     EBIT_SET(e->flags, ENTRY_SPECIAL);
448     /* setting public key will purge old digest entry if any */
449     e->setPublicKey();
450     /* fake reply */
451     HttpReply *rep = new HttpReply;
452     rep->setHeaders(Http::scOkay, "Cache Digest OK",
453                     "application/cache-digest", (store_digest->mask_size + sizeof(sd_state.cblock)),
454                     squid_curtime, (squid_curtime + Config.digest.rewrite_period) );
455     debugs(71, 3, "storeDigestRewrite: entry expires on " << rep->expires <<
456            " (" << std::showpos << (int) (rep->expires - squid_curtime) << ")");
457     e->buffer();
458     e->replaceHttpReply(rep);
459     storeDigestCBlockSwapOut(e);
460     e->flush();
461     eventAdd("storeDigestSwapOutStep", storeDigestSwapOutStep, sd_state.rewrite_lock, 0.0, 1, false);
462 }
463 
464 /* finishes swap out sequence for the digest; schedules next rewrite */
465 static void
storeDigestRewriteFinish(StoreEntry * e)466 storeDigestRewriteFinish(StoreEntry * e)
467 {
468     assert(e == sd_state.rewrite_lock);
469     e->complete();
470     e->timestampsSet();
471     debugs(71, 2, "storeDigestRewriteFinish: digest expires at " << e->expires <<
472            " (" << std::showpos << (int) (e->expires - squid_curtime) << ")");
473     /* is this the write order? @?@ */
474     e->mem_obj->unlinkRequest();
475     e->unlock("storeDigestRewriteFinish");
476     sd_state.rewrite_lock = NULL;
477     ++sd_state.rewrite_count;
478     eventAdd("storeDigestRewriteStart", storeDigestRewriteStart, NULL, (double)
479              Config.digest.rewrite_period, 1);
480     /* resume pending Rebuild if any */
481 
482     if (sd_state.rebuild_lock)
483         storeDigestRebuildResume();
484 }
485 
486 /* swaps out one digest "chunk" per invocation; schedules next swap out */
487 static void
storeDigestSwapOutStep(void * data)488 storeDigestSwapOutStep(void *data)
489 {
490     StoreEntry *e = static_cast<StoreEntry *>(data);
491     int chunk_size = Config.digest.swapout_chunk_size;
492     assert(e == sd_state.rewrite_lock);
493     assert(e);
494     /* _add_ check that nothing bad happened while we were waiting @?@ @?@ */
495 
496     if (static_cast<uint32_t>(sd_state.rewrite_offset + chunk_size) > store_digest->mask_size)
497         chunk_size = store_digest->mask_size - sd_state.rewrite_offset;
498 
499     e->append(store_digest->mask + sd_state.rewrite_offset, chunk_size);
500 
501     debugs(71, 3, "storeDigestSwapOutStep: size: " << store_digest->mask_size <<
502            " offset: " << sd_state.rewrite_offset << " chunk: " <<
503            chunk_size << " bytes");
504 
505     sd_state.rewrite_offset += chunk_size;
506 
507     /* are we done ? */
508     if (static_cast<uint32_t>(sd_state.rewrite_offset) >= store_digest->mask_size)
509         storeDigestRewriteFinish(e);
510     else
511         eventAdd("storeDigestSwapOutStep", storeDigestSwapOutStep, data, 0.0, 1, false);
512 }
513 
514 static void
storeDigestCBlockSwapOut(StoreEntry * e)515 storeDigestCBlockSwapOut(StoreEntry * e)
516 {
517     memset(&sd_state.cblock, 0, sizeof(sd_state.cblock));
518     sd_state.cblock.ver.current = htons(CacheDigestVer.current);
519     sd_state.cblock.ver.required = htons(CacheDigestVer.required);
520     sd_state.cblock.capacity = htonl(store_digest->capacity);
521     sd_state.cblock.count = htonl(store_digest->count);
522     sd_state.cblock.del_count = htonl(store_digest->del_count);
523     sd_state.cblock.mask_size = htonl(store_digest->mask_size);
524     sd_state.cblock.bits_per_entry = Config.digest.bits_per_entry;
525     sd_state.cblock.hash_func_count = (unsigned char) CacheDigestHashFuncCount;
526     e->append((char *) &sd_state.cblock, sizeof(sd_state.cblock));
527 }
528 
529 #endif /* USE_CACHE_DIGESTS */
530 
531