1 /** @file
2 
3   A brief file description
4 
5   @section license License
6 
7   Licensed to the Apache Software Foundation (ASF) under one
8   or more contributor license agreements.  See the NOTICE file
9   distributed with this work for additional information
10   regarding copyright ownership.  The ASF licenses this file
11   to you under the Apache License, Version 2.0 (the
12   "License"); you may not use this file except in compliance
13   with the License.  You may obtain a copy of the License at
14 
15       http://www.apache.org/licenses/LICENSE-2.0
16 
17   Unless required by applicable law or agreed to in writing, software
18   distributed under the License is distributed on an "AS IS" BASIS,
19   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20   See the License for the specific language governing permissions and
21   limitations under the License.
22  */
23 
24 #include "P_Cache.h"
25 
26 #include "tscore/hugepages.h"
27 #include "tscore/Regression.h"
28 
29 // #define LOOP_CHECK_MODE 1
30 #ifdef LOOP_CHECK_MODE
31 #define DIR_LOOP_THRESHOLD 1000
32 #endif
33 #include "tscore/ink_stack_trace.h"
34 
35 #define CACHE_INC_DIR_USED(_m)                            \
36   do {                                                    \
37     ProxyMutex *mutex = _m.get();                         \
38     CACHE_INCREMENT_DYN_STAT(cache_direntries_used_stat); \
39   } while (0)
40 
41 #define CACHE_DEC_DIR_USED(_m)                            \
42   do {                                                    \
43     ProxyMutex *mutex = _m.get();                         \
44     CACHE_DECREMENT_DYN_STAT(cache_direntries_used_stat); \
45   } while (0)
46 
47 #define CACHE_INC_DIR_COLLISIONS(_m)                                \
48   do {                                                              \
49     ProxyMutex *mutex = _m.get();                                   \
50     CACHE_INCREMENT_DYN_STAT(cache_directory_collision_count_stat); \
51   } while (0);
52 
53 // Globals
54 
55 ClassAllocator<OpenDirEntry> openDirEntryAllocator("openDirEntry");
56 Dir empty_dir;
57 
58 // OpenDir
59 
OpenDir()60 OpenDir::OpenDir()
61 {
62   SET_HANDLER(&OpenDir::signal_readers);
63 }
64 
65 /*
66    If allow_if_writers is false, open_write fails if there are other writers.
67    max_writers sets the maximum number of concurrent writers that are
68    allowed. Only The first writer can set the max_writers. It is ignored
69    for later writers.
70    Returns 1 on success and 0 on failure.
71    */
72 int
open_write(CacheVC * cont,int allow_if_writers,int max_writers)73 OpenDir::open_write(CacheVC *cont, int allow_if_writers, int max_writers)
74 {
75   ink_assert(cont->vol->mutex->thread_holding == this_ethread());
76   unsigned int h = cont->first_key.slice32(0);
77   int b          = h % OPEN_DIR_BUCKETS;
78   for (OpenDirEntry *d = bucket[b].head; d; d = d->link.next) {
79     if (!(d->writers.head->first_key == cont->first_key)) {
80       continue;
81     }
82     if (allow_if_writers && d->num_writers < d->max_writers) {
83       d->writers.push(cont);
84       d->num_writers++;
85       cont->od           = d;
86       cont->write_vector = &d->vector;
87       return 1;
88     }
89     return 0;
90   }
91   OpenDirEntry *od = THREAD_ALLOC(openDirEntryAllocator, cont->mutex->thread_holding);
92   od->readers.head = nullptr;
93   od->writers.push(cont);
94   od->num_writers           = 1;
95   od->max_writers           = max_writers;
96   od->vector.data.data      = &od->vector.data.fast_data[0];
97   od->dont_update_directory = false;
98   od->move_resident_alt     = false;
99   od->reading_vec           = false;
100   od->writing_vec           = false;
101   dir_clear(&od->first_dir);
102   cont->od           = od;
103   cont->write_vector = &od->vector;
104   bucket[b].push(od);
105   return 1;
106 }
107 
108 int
signal_readers(int,Event *)109 OpenDir::signal_readers(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSED */)
110 {
111   Queue<CacheVC, Link_CacheVC_opendir_link> newly_delayed_readers;
112   EThread *t = mutex->thread_holding;
113   CacheVC *c = nullptr;
114   while ((c = delayed_readers.dequeue())) {
115     CACHE_TRY_LOCK(lock, c->mutex, t);
116     if (lock.is_locked()) {
117       c->f.open_read_timeout = 0;
118       c->handleEvent(EVENT_IMMEDIATE, nullptr);
119       continue;
120     }
121     newly_delayed_readers.push(c);
122   }
123   if (newly_delayed_readers.head) {
124     delayed_readers = newly_delayed_readers;
125     EThread *t1     = newly_delayed_readers.head->mutex->thread_holding;
126     if (!t1) {
127       t1 = mutex->thread_holding;
128     }
129     t1->schedule_in(this, HRTIME_MSECONDS(cache_config_mutex_retry_delay));
130   }
131   return 0;
132 }
133 
134 int
close_write(CacheVC * cont)135 OpenDir::close_write(CacheVC *cont)
136 {
137   ink_assert(cont->vol->mutex->thread_holding == this_ethread());
138   cont->od->writers.remove(cont);
139   cont->od->num_writers--;
140   if (!cont->od->writers.head) {
141     unsigned int h = cont->first_key.slice32(0);
142     int b          = h % OPEN_DIR_BUCKETS;
143     bucket[b].remove(cont->od);
144     delayed_readers.append(cont->od->readers);
145     signal_readers(0, nullptr);
146     cont->od->vector.clear();
147     THREAD_FREE(cont->od, openDirEntryAllocator, cont->mutex->thread_holding);
148   }
149   cont->od = nullptr;
150   return 0;
151 }
152 
153 OpenDirEntry *
open_read(const CryptoHash * key)154 OpenDir::open_read(const CryptoHash *key)
155 {
156   unsigned int h = key->slice32(0);
157   int b          = h % OPEN_DIR_BUCKETS;
158   for (OpenDirEntry *d = bucket[b].head; d; d = d->link.next) {
159     if (d->writers.head->first_key == *key) {
160       return d;
161     }
162   }
163   return nullptr;
164 }
165 
166 int
wait(CacheVC * cont,int msec)167 OpenDirEntry::wait(CacheVC *cont, int msec)
168 {
169   ink_assert(cont->vol->mutex->thread_holding == this_ethread());
170   cont->f.open_read_timeout = 1;
171   ink_assert(!cont->trigger);
172   cont->trigger = cont->vol->mutex->thread_holding->schedule_in_local(cont, HRTIME_MSECONDS(msec));
173   readers.push(cont);
174   return EVENT_CONT;
175 }
176 
177 //
178 // Cache Directory
179 //
180 
181 // return value 1 means no loop
182 // zero indicates loop
183 int
dir_bucket_loop_check(Dir * start_dir,Dir * seg)184 dir_bucket_loop_check(Dir *start_dir, Dir *seg)
185 {
186   if (start_dir == nullptr) {
187     return 1;
188   }
189 
190   Dir *p1 = start_dir;
191   Dir *p2 = start_dir;
192 
193   while (p2) {
194     // p1 moves by one entry per iteration
195     ink_assert(p1);
196     p1 = next_dir(p1, seg);
197     // p2 moves by two entries per iteration
198     p2 = next_dir(p2, seg);
199     if (p2) {
200       p2 = next_dir(p2, seg);
201     } else {
202       return 1;
203     }
204 
205     if (p2 == p1) {
206       return 0; // we have a loop
207     }
208   }
209   return 1;
210 }
211 
212 // adds all the directory entries
213 // in a segment to the segment freelist
214 void
dir_init_segment(int s,Vol * d)215 dir_init_segment(int s, Vol *d)
216 {
217   d->header->freelist[s] = 0;
218   Dir *seg               = d->dir_segment(s);
219   int l, b;
220   memset(static_cast<void *>(seg), 0, SIZEOF_DIR * DIR_DEPTH * d->buckets);
221   for (l = 1; l < DIR_DEPTH; l++) {
222     for (b = 0; b < d->buckets; b++) {
223       Dir *bucket = dir_bucket(b, seg);
224       dir_free_entry(dir_bucket_row(bucket, l), s, d);
225     }
226   }
227 }
228 
229 // break the infinite loop in directory entries
230 // Note : abuse of the token bit in dir entries
231 int
dir_bucket_loop_fix(Dir * start_dir,int s,Vol * d)232 dir_bucket_loop_fix(Dir *start_dir, int s, Vol *d)
233 {
234   if (!dir_bucket_loop_check(start_dir, d->dir_segment(s))) {
235     Warning("Dir loop exists, clearing segment %d", s);
236     dir_init_segment(s, d);
237     return 1;
238   }
239   return 0;
240 }
241 
242 int
dir_freelist_length(Vol * d,int s)243 dir_freelist_length(Vol *d, int s)
244 {
245   int free = 0;
246   Dir *seg = d->dir_segment(s);
247   Dir *e   = dir_from_offset(d->header->freelist[s], seg);
248   if (dir_bucket_loop_fix(e, s, d)) {
249     return (DIR_DEPTH - 1) * d->buckets;
250   }
251   while (e) {
252     free++;
253     e = next_dir(e, seg);
254   }
255   return free;
256 }
257 
258 int
dir_bucket_length(Dir * b,int s,Vol * d)259 dir_bucket_length(Dir *b, int s, Vol *d)
260 {
261   Dir *e   = b;
262   int i    = 0;
263   Dir *seg = d->dir_segment(s);
264 #ifdef LOOP_CHECK_MODE
265   if (dir_bucket_loop_fix(b, s, d))
266     return 1;
267 #endif
268   while (e) {
269     i++;
270     if (i > 100) {
271       return -1;
272     }
273     e = next_dir(e, seg);
274   }
275   return i;
276 }
277 
278 int
check_dir(Vol * d)279 check_dir(Vol *d)
280 {
281   int i, s;
282   Debug("cache_check_dir", "inside check dir");
283   for (s = 0; s < d->segments; s++) {
284     Dir *seg = d->dir_segment(s);
285     for (i = 0; i < d->buckets; i++) {
286       Dir *b = dir_bucket(i, seg);
287       if (!(dir_bucket_length(b, s, d) >= 0)) {
288         return 0;
289       }
290       if (!(!dir_next(b) || dir_offset(b))) {
291         return 0;
292       }
293       if (!(dir_bucket_loop_check(b, seg))) {
294         return 0;
295       }
296     }
297   }
298   return 1;
299 }
300 
301 inline void
unlink_from_freelist(Dir * e,int s,Vol * d)302 unlink_from_freelist(Dir *e, int s, Vol *d)
303 {
304   Dir *seg = d->dir_segment(s);
305   Dir *p   = dir_from_offset(dir_prev(e), seg);
306   if (p) {
307     dir_set_next(p, dir_next(e));
308   } else {
309     d->header->freelist[s] = dir_next(e);
310   }
311   Dir *n = dir_from_offset(dir_next(e), seg);
312   if (n) {
313     dir_set_prev(n, dir_prev(e));
314   }
315 }
316 
317 inline Dir *
dir_delete_entry(Dir * e,Dir * p,int s,Vol * d)318 dir_delete_entry(Dir *e, Dir *p, int s, Vol *d)
319 {
320   Dir *seg         = d->dir_segment(s);
321   int no           = dir_next(e);
322   d->header->dirty = 1;
323   if (p) {
324     unsigned int fo = d->header->freelist[s];
325     unsigned int eo = dir_to_offset(e, seg);
326     dir_clear(e);
327     dir_set_next(p, no);
328     dir_set_next(e, fo);
329     if (fo) {
330       dir_set_prev(dir_from_offset(fo, seg), eo);
331     }
332     d->header->freelist[s] = eo;
333   } else {
334     Dir *n = next_dir(e, seg);
335     if (n) {
336       dir_assign(e, n);
337       dir_delete_entry(n, e, s, d);
338       return e;
339     } else {
340       dir_clear(e);
341       return nullptr;
342     }
343   }
344   return dir_from_offset(no, seg);
345 }
346 
347 inline void
dir_clean_bucket(Dir * b,int s,Vol * vol)348 dir_clean_bucket(Dir *b, int s, Vol *vol)
349 {
350   Dir *e = b, *p = nullptr;
351   Dir *seg = vol->dir_segment(s);
352 #ifdef LOOP_CHECK_MODE
353   int loop_count = 0;
354 #endif
355   do {
356 #ifdef LOOP_CHECK_MODE
357     loop_count++;
358     if (loop_count > DIR_LOOP_THRESHOLD) {
359       if (dir_bucket_loop_fix(b, s, vol))
360         return;
361     }
362 #endif
363     if (!dir_valid(vol, e) || !dir_offset(e)) {
364       if (is_debug_tag_set("dir_clean")) {
365         Debug("dir_clean", "cleaning Vol:%s: %p tag %X boffset %" PRId64 " b %p p %p bucket len %d", vol->hash_text.get(), e,
366               dir_tag(e), dir_offset(e), b, p, dir_bucket_length(b, s, vol));
367       }
368       if (dir_offset(e)) {
369         CACHE_DEC_DIR_USED(vol->mutex);
370       }
371       e = dir_delete_entry(e, p, s, vol);
372       continue;
373     }
374     p = e;
375     e = next_dir(e, seg);
376   } while (e);
377 }
378 
379 void
dir_clean_segment(int s,Vol * d)380 dir_clean_segment(int s, Vol *d)
381 {
382   Dir *seg = d->dir_segment(s);
383   for (int64_t i = 0; i < d->buckets; i++) {
384     dir_clean_bucket(dir_bucket(i, seg), s, d);
385     ink_assert(!dir_next(dir_bucket(i, seg)) || dir_offset(dir_bucket(i, seg)));
386   }
387 }
388 
389 void
dir_clean_vol(Vol * d)390 dir_clean_vol(Vol *d)
391 {
392   for (int64_t i = 0; i < d->segments; i++) {
393     dir_clean_segment(i, d);
394   }
395   CHECK_DIR(d);
396 }
397 
398 void
dir_clear_range(off_t start,off_t end,Vol * vol)399 dir_clear_range(off_t start, off_t end, Vol *vol)
400 {
401   for (off_t i = 0; i < vol->buckets * DIR_DEPTH * vol->segments; i++) {
402     Dir *e = dir_index(vol, i);
403     if (!dir_token(e) && dir_offset(e) >= static_cast<int64_t>(start) && dir_offset(e) < static_cast<int64_t>(end)) {
404       CACHE_DEC_DIR_USED(vol->mutex);
405       dir_set_offset(e, 0); // delete
406     }
407   }
408   dir_clean_vol(vol);
409 }
410 
411 void
check_bucket_not_contains(Dir * b,Dir * e,Dir * seg)412 check_bucket_not_contains(Dir *b, Dir *e, Dir *seg)
413 {
414   Dir *x = b;
415   do {
416     if (x == e) {
417       break;
418     }
419     x = next_dir(x, seg);
420   } while (x);
421   ink_assert(!x);
422 }
423 
424 void
freelist_clean(int s,Vol * vol)425 freelist_clean(int s, Vol *vol)
426 {
427   dir_clean_segment(s, vol);
428   if (vol->header->freelist[s]) {
429     return;
430   }
431   Warning("cache directory overflow on '%s' segment %d, purging...", vol->path, s);
432   int n    = 0;
433   Dir *seg = vol->dir_segment(s);
434   for (int bi = 0; bi < vol->buckets; bi++) {
435     Dir *b = dir_bucket(bi, seg);
436     for (int l = 0; l < DIR_DEPTH; l++) {
437       Dir *e = dir_bucket_row(b, l);
438       if (dir_head(e) && !(n++ % 10)) {
439         CACHE_DEC_DIR_USED(vol->mutex);
440         dir_set_offset(e, 0); // delete
441       }
442     }
443   }
444   dir_clean_segment(s, vol);
445 }
446 
447 inline Dir *
freelist_pop(int s,Vol * d)448 freelist_pop(int s, Vol *d)
449 {
450   Dir *seg = d->dir_segment(s);
451   Dir *e   = dir_from_offset(d->header->freelist[s], seg);
452   if (!e) {
453     freelist_clean(s, d);
454     return nullptr;
455   }
456   d->header->freelist[s] = dir_next(e);
457   // if the freelist if bad, punt.
458   if (dir_offset(e)) {
459     dir_init_segment(s, d);
460     return nullptr;
461   }
462   Dir *h = dir_from_offset(d->header->freelist[s], seg);
463   if (h) {
464     dir_set_prev(h, 0);
465   }
466   return e;
467 }
468 
469 int
dir_segment_accounted(int s,Vol * d,int offby,int * f,int * u,int * et,int * v,int * av,int * as)470 dir_segment_accounted(int s, Vol *d, int offby, int *f, int *u, int *et, int *v, int *av, int *as)
471 {
472   int free = dir_freelist_length(d, s);
473   int used = 0, empty = 0;
474   int valid = 0, agg_valid = 0;
475   int64_t agg_size = 0;
476   Dir *seg         = d->dir_segment(s);
477   for (int bi = 0; bi < d->buckets; bi++) {
478     Dir *b = dir_bucket(bi, seg);
479     Dir *e = b;
480     while (e) {
481       if (!dir_offset(e)) {
482         ink_assert(e == b);
483         empty++;
484       } else {
485         used++;
486         if (dir_valid(d, e)) {
487           valid++;
488         }
489         if (dir_agg_valid(d, e)) {
490           agg_valid++;
491         }
492         agg_size += dir_approx_size(e);
493       }
494       e = next_dir(e, seg);
495       if (!e) {
496         break;
497       }
498     }
499   }
500   if (f) {
501     *f = free;
502   }
503   if (u) {
504     *u = used;
505   }
506   if (et) {
507     *et = empty;
508   }
509   if (v) {
510     *v = valid;
511   }
512   if (av) {
513     *av = agg_valid;
514   }
515   if (as) {
516     *as = used ? static_cast<int>(agg_size / used) : 0;
517   }
518   ink_assert(d->buckets * DIR_DEPTH - (free + used + empty) <= offby);
519   return d->buckets * DIR_DEPTH - (free + used + empty) <= offby;
520 }
521 
522 void
dir_free_entry(Dir * e,int s,Vol * d)523 dir_free_entry(Dir *e, int s, Vol *d)
524 {
525   Dir *seg        = d->dir_segment(s);
526   unsigned int fo = d->header->freelist[s];
527   unsigned int eo = dir_to_offset(e, seg);
528   dir_set_next(e, fo);
529   if (fo) {
530     dir_set_prev(dir_from_offset(fo, seg), eo);
531   }
532   d->header->freelist[s] = eo;
533 }
534 
535 int
dir_probe(const CacheKey * key,Vol * d,Dir * result,Dir ** last_collision)536 dir_probe(const CacheKey *key, Vol *d, Dir *result, Dir **last_collision)
537 {
538   ink_assert(d->mutex->thread_holding == this_ethread());
539   int s    = key->slice32(0) % d->segments;
540   int b    = key->slice32(1) % d->buckets;
541   Dir *seg = d->dir_segment(s);
542   Dir *e = nullptr, *p = nullptr, *collision = *last_collision;
543   Vol *vol = d;
544   CHECK_DIR(d);
545 #ifdef LOOP_CHECK_MODE
546   if (dir_bucket_loop_fix(dir_bucket(b, seg), s, d))
547     return 0;
548 #endif
549 Lagain:
550   e = dir_bucket(b, seg);
551   if (dir_offset(e)) {
552     do {
553       if (dir_compare_tag(e, key)) {
554         ink_assert(dir_offset(e));
555         // Bug: 51680. Need to check collision before checking
556         // dir_valid(). In case of a collision, if !dir_valid(), we
557         // don't want to call dir_delete_entry.
558         if (collision) {
559           if (collision == e) {
560             collision = nullptr;
561             // increment collision stat
562             // Note: dir_probe could be called multiple times
563             // for the same document and so the collision stat
564             // may not accurately reflect the number of documents
565             // having the same first_key
566             DDebug("cache_stats", "Incrementing dir collisions");
567             CACHE_INC_DIR_COLLISIONS(d->mutex);
568           }
569           goto Lcont;
570         }
571         if (dir_valid(d, e)) {
572           DDebug("dir_probe_hit", "found %X %X vol %d bucket %d boffset %" PRId64 "", key->slice32(0), key->slice32(1), d->fd, b,
573                  dir_offset(e));
574           dir_assign(result, e);
575           *last_collision = e;
576           ink_assert(dir_offset(e) * CACHE_BLOCK_SIZE < d->len);
577           return 1;
578         } else { // delete the invalid entry
579           CACHE_DEC_DIR_USED(d->mutex);
580           e = dir_delete_entry(e, p, s, d);
581           continue;
582         }
583       } else {
584         DDebug("dir_probe_tag", "tag mismatch %p %X vs expected %X", e, dir_tag(e), key->slice32(3));
585       }
586     Lcont:
587       p = e;
588       e = next_dir(e, seg);
589     } while (e);
590   }
591   if (collision) { // last collision no longer in the list, retry
592     DDebug("cache_stats", "Incrementing dir collisions");
593     CACHE_INC_DIR_COLLISIONS(d->mutex);
594     collision = nullptr;
595     goto Lagain;
596   }
597   DDebug("dir_probe_miss", "missed %X %X on vol %d bucket %d at %p", key->slice32(0), key->slice32(1), d->fd, b, seg);
598   CHECK_DIR(d);
599   return 0;
600 }
601 
602 int
dir_insert(const CacheKey * key,Vol * d,Dir * to_part)603 dir_insert(const CacheKey *key, Vol *d, Dir *to_part)
604 {
605   ink_assert(d->mutex->thread_holding == this_ethread());
606   int s  = key->slice32(0) % d->segments, l;
607   int bi = key->slice32(1) % d->buckets;
608   ink_assert(dir_approx_size(to_part) <= MAX_FRAG_SIZE + sizeof(Doc));
609   Dir *seg = d->dir_segment(s);
610   Dir *e   = nullptr;
611   Dir *b   = dir_bucket(bi, seg);
612   Vol *vol = d;
613 #if defined(DEBUG) && defined(DO_CHECK_DIR_FAST)
614   unsigned int t = DIR_MASK_TAG(key->slice32(2));
615   Dir *col       = b;
616   while (col) {
617     ink_assert((dir_tag(col) != t) || (dir_offset(col) != dir_offset(to_part)));
618     col = next_dir(col, seg);
619   }
620 #endif
621   CHECK_DIR(d);
622 
623 Lagain:
624   // get from this row first
625   e = b;
626   if (dir_is_empty(e)) {
627     goto Lfill;
628   }
629   for (l = 1; l < DIR_DEPTH; l++) {
630     e = dir_bucket_row(b, l);
631     if (dir_is_empty(e)) {
632       unlink_from_freelist(e, s, d);
633       goto Llink;
634     }
635   }
636   // get one from the freelist
637   e = freelist_pop(s, d);
638   if (!e) {
639     goto Lagain;
640   }
641 Llink:
642   dir_set_next(e, dir_next(b));
643   dir_set_next(b, dir_to_offset(e, seg));
644 Lfill:
645   dir_assign_data(e, to_part);
646   dir_set_tag(e, key->slice32(2));
647   ink_assert(d->vol_offset(e) < (d->skip + d->len));
648   DDebug("dir_insert", "insert %p %X into vol %d bucket %d at %p tag %X %X boffset %" PRId64 "", e, key->slice32(0), d->fd, bi, e,
649          key->slice32(1), dir_tag(e), dir_offset(e));
650   CHECK_DIR(d);
651   d->header->dirty = 1;
652   CACHE_INC_DIR_USED(d->mutex);
653   return 1;
654 }
655 
656 int
dir_overwrite(const CacheKey * key,Vol * d,Dir * dir,Dir * overwrite,bool must_overwrite)657 dir_overwrite(const CacheKey *key, Vol *d, Dir *dir, Dir *overwrite, bool must_overwrite)
658 {
659   ink_assert(d->mutex->thread_holding == this_ethread());
660   int s          = key->slice32(0) % d->segments, l;
661   int bi         = key->slice32(1) % d->buckets;
662   Dir *seg       = d->dir_segment(s);
663   Dir *e         = nullptr;
664   Dir *b         = dir_bucket(bi, seg);
665   unsigned int t = DIR_MASK_TAG(key->slice32(2));
666   int res        = 1;
667 #ifdef LOOP_CHECK_MODE
668   int loop_count     = 0;
669   bool loop_possible = true;
670 #endif
671   Vol *vol = d;
672   CHECK_DIR(d);
673 
674   ink_assert((unsigned int)dir_approx_size(dir) <= (unsigned int)(MAX_FRAG_SIZE + sizeof(Doc))); // XXX - size should be unsigned
675 Lagain:
676   // find entry to overwrite
677   e = b;
678   if (dir_offset(e)) {
679     do {
680 #ifdef LOOP_CHECK_MODE
681       loop_count++;
682       if (loop_count > DIR_LOOP_THRESHOLD && loop_possible) {
683         if (dir_bucket_loop_fix(b, s, d)) {
684           loop_possible = false;
685           goto Lagain;
686         }
687       }
688 #endif
689       if (dir_tag(e) == t && dir_offset(e) == dir_offset(overwrite)) {
690         goto Lfill;
691       }
692       e = next_dir(e, seg);
693     } while (e);
694   }
695   if (must_overwrite) {
696     return 0;
697   }
698   res = 0;
699   // get from this row first
700   e = b;
701   if (dir_is_empty(e)) {
702     CACHE_INC_DIR_USED(d->mutex);
703     goto Lfill;
704   }
705   for (l = 1; l < DIR_DEPTH; l++) {
706     e = dir_bucket_row(b, l);
707     if (dir_is_empty(e)) {
708       unlink_from_freelist(e, s, d);
709       goto Llink;
710     }
711   }
712   // get one from the freelist
713   e = freelist_pop(s, d);
714   if (!e) {
715     goto Lagain;
716   }
717 Llink:
718   CACHE_INC_DIR_USED(d->mutex);
719   dir_set_next(e, dir_next(b));
720   dir_set_next(b, dir_to_offset(e, seg));
721 Lfill:
722   dir_assign_data(e, dir);
723   dir_set_tag(e, t);
724   ink_assert(d->vol_offset(e) < d->skip + d->len);
725   DDebug("dir_overwrite", "overwrite %p %X into vol %d bucket %d at %p tag %X %X boffset %" PRId64 "", e, key->slice32(0), d->fd,
726          bi, e, t, dir_tag(e), dir_offset(e));
727   CHECK_DIR(d);
728   d->header->dirty = 1;
729   return res;
730 }
731 
732 int
dir_delete(const CacheKey * key,Vol * d,Dir * del)733 dir_delete(const CacheKey *key, Vol *d, Dir *del)
734 {
735   ink_assert(d->mutex->thread_holding == this_ethread());
736   int s    = key->slice32(0) % d->segments;
737   int b    = key->slice32(1) % d->buckets;
738   Dir *seg = d->dir_segment(s);
739   Dir *e = nullptr, *p = nullptr;
740 #ifdef LOOP_CHECK_MODE
741   int loop_count = 0;
742 #endif
743   Vol *vol = d;
744   CHECK_DIR(d);
745 
746   e = dir_bucket(b, seg);
747   if (dir_offset(e)) {
748     do {
749 #ifdef LOOP_CHECK_MODE
750       loop_count++;
751       if (loop_count > DIR_LOOP_THRESHOLD) {
752         if (dir_bucket_loop_fix(dir_bucket(b, seg), s, d))
753           return 0;
754       }
755 #endif
756       if (dir_compare_tag(e, key) && dir_offset(e) == dir_offset(del)) {
757         CACHE_DEC_DIR_USED(d->mutex);
758         dir_delete_entry(e, p, s, d);
759         CHECK_DIR(d);
760         return 1;
761       }
762       p = e;
763       e = next_dir(e, seg);
764     } while (e);
765   }
766   CHECK_DIR(d);
767   return 0;
768 }
769 
770 // Lookaside Cache
771 
772 int
dir_lookaside_probe(const CacheKey * key,Vol * d,Dir * result,EvacuationBlock ** eblock)773 dir_lookaside_probe(const CacheKey *key, Vol *d, Dir *result, EvacuationBlock **eblock)
774 {
775   ink_assert(d->mutex->thread_holding == this_ethread());
776   int i              = key->slice32(3) % LOOKASIDE_SIZE;
777   EvacuationBlock *b = d->lookaside[i].head;
778   while (b) {
779     if (b->evac_frags.key == *key) {
780       if (dir_valid(d, &b->new_dir)) {
781         *result = b->new_dir;
782         DDebug("dir_lookaside", "probe %X success", key->slice32(0));
783         if (eblock) {
784           *eblock = b;
785         }
786         return 1;
787       }
788     }
789     b = b->link.next;
790   }
791   DDebug("dir_lookaside", "probe %X failed", key->slice32(0));
792   return 0;
793 }
794 
795 int
dir_lookaside_insert(EvacuationBlock * eblock,Vol * d,Dir * to)796 dir_lookaside_insert(EvacuationBlock *eblock, Vol *d, Dir *to)
797 {
798   CacheKey *key = &eblock->evac_frags.earliest_key;
799   DDebug("dir_lookaside", "insert %X %X, offset %d phase %d", key->slice32(0), key->slice32(1), (int)dir_offset(to),
800          (int)dir_phase(to));
801   ink_assert(d->mutex->thread_holding == this_ethread());
802   int i                      = key->slice32(3) % LOOKASIDE_SIZE;
803   EvacuationBlock *b         = new_EvacuationBlock(d->mutex->thread_holding);
804   b->evac_frags.key          = *key;
805   b->evac_frags.earliest_key = *key;
806   b->earliest_evacuator      = eblock->earliest_evacuator;
807   ink_assert(b->earliest_evacuator);
808   b->dir     = eblock->dir;
809   b->new_dir = *to;
810   d->lookaside[i].push(b);
811   return 1;
812 }
813 
814 int
dir_lookaside_fixup(const CacheKey * key,Vol * d)815 dir_lookaside_fixup(const CacheKey *key, Vol *d)
816 {
817   ink_assert(d->mutex->thread_holding == this_ethread());
818   int i              = key->slice32(3) % LOOKASIDE_SIZE;
819   EvacuationBlock *b = d->lookaside[i].head;
820   while (b) {
821     if (b->evac_frags.key == *key) {
822       int res = dir_overwrite(key, d, &b->new_dir, &b->dir, false);
823       DDebug("dir_lookaside", "fixup %X %X offset %" PRId64 " phase %d %d", key->slice32(0), key->slice32(1),
824              dir_offset(&b->new_dir), dir_phase(&b->new_dir), res);
825       int64_t o = dir_offset(&b->dir), n = dir_offset(&b->new_dir);
826       d->ram_cache->fixup(key, static_cast<uint32_t>(o >> 32), static_cast<uint32_t>(o), static_cast<uint32_t>(n >> 32),
827                           static_cast<uint32_t>(n));
828       d->lookaside[i].remove(b);
829       free_EvacuationBlock(b, d->mutex->thread_holding);
830       return res;
831     }
832     b = b->link.next;
833   }
834   DDebug("dir_lookaside", "fixup %X %X failed", key->slice32(0), key->slice32(1));
835   return 0;
836 }
837 
838 void
dir_lookaside_cleanup(Vol * d)839 dir_lookaside_cleanup(Vol *d)
840 {
841   ink_assert(d->mutex->thread_holding == this_ethread());
842   for (auto &i : d->lookaside) {
843     EvacuationBlock *b = i.head;
844     while (b) {
845       if (!dir_valid(d, &b->new_dir)) {
846         EvacuationBlock *nb = b->link.next;
847         DDebug("dir_lookaside", "cleanup %X %X cleaned up", b->evac_frags.earliest_key.slice32(0),
848                b->evac_frags.earliest_key.slice32(1));
849         i.remove(b);
850         free_CacheVC(b->earliest_evacuator);
851         free_EvacuationBlock(b, d->mutex->thread_holding);
852         b = nb;
853         goto Lagain;
854       }
855       b = b->link.next;
856     Lagain:;
857     }
858   }
859 }
860 
861 void
dir_lookaside_remove(const CacheKey * key,Vol * d)862 dir_lookaside_remove(const CacheKey *key, Vol *d)
863 {
864   ink_assert(d->mutex->thread_holding == this_ethread());
865   int i              = key->slice32(3) % LOOKASIDE_SIZE;
866   EvacuationBlock *b = d->lookaside[i].head;
867   while (b) {
868     if (b->evac_frags.key == *key) {
869       DDebug("dir_lookaside", "remove %X %X offset %" PRId64 " phase %d", key->slice32(0), key->slice32(1), dir_offset(&b->new_dir),
870              dir_phase(&b->new_dir));
871       d->lookaside[i].remove(b);
872       free_EvacuationBlock(b, d->mutex->thread_holding);
873       return;
874     }
875     b = b->link.next;
876   }
877   DDebug("dir_lookaside", "remove %X %X failed", key->slice32(0), key->slice32(1));
878   return;
879 }
880 
881 // Cache Sync
882 //
883 
884 void
dir_sync_init()885 dir_sync_init()
886 {
887   cacheDirSync          = new CacheSync;
888   cacheDirSync->trigger = eventProcessor.schedule_in(cacheDirSync, HRTIME_SECONDS(cache_config_dir_sync_frequency));
889 }
890 
891 void
aio_write(int fd,char * b,int n,off_t o)892 CacheSync::aio_write(int fd, char *b, int n, off_t o)
893 {
894   io.aiocb.aio_fildes = fd;
895   io.aiocb.aio_offset = o;
896   io.aiocb.aio_nbytes = n;
897   io.aiocb.aio_buf    = b;
898   io.action           = this;
899   io.thread           = AIO_CALLBACK_THREAD_ANY;
900   ink_assert(ink_aio_write(&io) >= 0);
901 }
902 
903 uint64_t
dir_entries_used(Vol * d)904 dir_entries_used(Vol *d)
905 {
906   uint64_t full  = 0;
907   uint64_t sfull = 0;
908   for (int s = 0; s < d->segments; full += sfull, s++) {
909     Dir *seg = d->dir_segment(s);
910     sfull    = 0;
911     for (int b = 0; b < d->buckets; b++) {
912       Dir *e = dir_bucket(b, seg);
913       if (dir_bucket_loop_fix(e, s, d)) {
914         sfull = 0;
915         break;
916       }
917       while (e) {
918         if (dir_offset(e)) {
919           sfull++;
920         }
921         e = next_dir(e, seg);
922         if (!e) {
923           break;
924         }
925       }
926     }
927   }
928   return full;
929 }
930 
931 /*
932  * this function flushes the cache meta data to disk when
933  * the cache is shutdown. Must *NOT* be used during regular
934  * operation.
935  */
936 
937 void
sync_cache_dir_on_shutdown()938 sync_cache_dir_on_shutdown()
939 {
940   Debug("cache_dir_sync", "sync started");
941   char *buf     = nullptr;
942   size_t buflen = 0;
943   bool buf_huge = false;
944 
945   EThread *t = (EThread *)0xdeadbeef;
946   for (int i = 0; i < gnvol; i++) {
947     // the process is going down, do a blocking call
948     // dont release the volume's lock, there could
949     // be another aggWrite in progress
950     MUTEX_TAKE_LOCK(gvol[i]->mutex, t);
951     Vol *d = gvol[i];
952 
953     if (DISK_BAD(d->disk)) {
954       Debug("cache_dir_sync", "Dir %s: ignoring -- bad disk", d->hash_text.get());
955       continue;
956     }
957     size_t dirlen = d->dirlen();
958     ink_assert(dirlen > 0); // make clang happy - if not > 0 the vol is seriously messed up
959     if (!d->header->dirty && !d->dir_sync_in_progress) {
960       Debug("cache_dir_sync", "Dir %s: ignoring -- not dirty", d->hash_text.get());
961       continue;
962     }
963     // recompute hit_evacuate_window
964     d->hit_evacuate_window = (d->data_blocks * cache_config_hit_evacuate_percent) / 100;
965 
966     // check if we have data in the agg buffer
967     // dont worry about the cachevc s in the agg queue
968     // directories have not been inserted for these writes
969     if (d->agg_buf_pos) {
970       Debug("cache_dir_sync", "Dir %s: flushing agg buffer first", d->hash_text.get());
971 
972       // set write limit
973       d->header->agg_pos = d->header->write_pos + d->agg_buf_pos;
974 
975       int r = pwrite(d->fd, d->agg_buffer, d->agg_buf_pos, d->header->write_pos);
976       if (r != d->agg_buf_pos) {
977         ink_assert(!"flushing agg buffer failed");
978         continue;
979       }
980       d->header->last_write_pos = d->header->write_pos;
981       d->header->write_pos += d->agg_buf_pos;
982       ink_assert(d->header->write_pos == d->header->agg_pos);
983       d->agg_buf_pos = 0;
984       d->header->write_serial++;
985     }
986 
987     if (buflen < dirlen) {
988       if (buf) {
989         if (buf_huge) {
990           ats_free_hugepage(buf, buflen);
991         } else {
992           ats_free(buf);
993         }
994         buf = nullptr;
995       }
996       buflen = dirlen;
997       if (ats_hugepage_enabled()) {
998         buf      = static_cast<char *>(ats_alloc_hugepage(buflen));
999         buf_huge = true;
1000       }
1001       if (buf == nullptr) {
1002         buf      = static_cast<char *>(ats_memalign(ats_pagesize(), buflen));
1003         buf_huge = false;
1004       }
1005     }
1006 
1007     if (!d->dir_sync_in_progress) {
1008       d->header->sync_serial++;
1009     } else {
1010       Debug("cache_dir_sync", "Periodic dir sync in progress -- overwriting");
1011     }
1012     d->footer->sync_serial = d->header->sync_serial;
1013 
1014     CHECK_DIR(d);
1015     memcpy(buf, d->raw_dir, dirlen);
1016     size_t B    = d->header->sync_serial & 1;
1017     off_t start = d->skip + (B ? dirlen : 0);
1018     B           = pwrite(d->fd, buf, dirlen, start);
1019     ink_assert(B == dirlen);
1020     Debug("cache_dir_sync", "done syncing dir for vol %s", d->hash_text.get());
1021   }
1022   Debug("cache_dir_sync", "sync done");
1023   if (buf) {
1024     if (buf_huge) {
1025       ats_free_hugepage(buf, buflen);
1026     } else {
1027       ats_free(buf);
1028     }
1029     buf = nullptr;
1030   }
1031 }
1032 
1033 int
mainEvent(int event,Event * e)1034 CacheSync::mainEvent(int event, Event *e)
1035 {
1036   if (trigger) {
1037     trigger->cancel_action();
1038     trigger = nullptr;
1039   }
1040 
1041 Lrestart:
1042   if (vol_idx >= gnvol) {
1043     vol_idx = 0;
1044     if (buf) {
1045       if (buf_huge) {
1046         ats_free_hugepage(buf, buflen);
1047       } else {
1048         ats_free(buf);
1049       }
1050       buflen   = 0;
1051       buf      = nullptr;
1052       buf_huge = false;
1053     }
1054     Debug("cache_dir_sync", "sync done");
1055     if (event == EVENT_INTERVAL) {
1056       trigger = e->ethread->schedule_in(this, HRTIME_SECONDS(cache_config_dir_sync_frequency));
1057     } else {
1058       trigger = eventProcessor.schedule_in(this, HRTIME_SECONDS(cache_config_dir_sync_frequency));
1059     }
1060     return EVENT_CONT;
1061   }
1062 
1063   Vol *vol = gvol[vol_idx]; // must be named "vol" to make STAT macros work.
1064 
1065   if (event == AIO_EVENT_DONE) {
1066     // AIO Thread
1067     if (io.aio_result != static_cast<int64_t>(io.aiocb.aio_nbytes)) {
1068       Warning("vol write error during directory sync '%s'", gvol[vol_idx]->hash_text.get());
1069       event = EVENT_NONE;
1070       goto Ldone;
1071     }
1072     CACHE_SUM_DYN_STAT(cache_directory_sync_bytes_stat, io.aio_result);
1073 
1074     trigger = eventProcessor.schedule_in(this, SYNC_DELAY);
1075     return EVENT_CONT;
1076   }
1077   {
1078     CACHE_TRY_LOCK(lock, gvol[vol_idx]->mutex, mutex->thread_holding);
1079     if (!lock.is_locked()) {
1080       trigger = eventProcessor.schedule_in(this, HRTIME_MSECONDS(cache_config_mutex_retry_delay));
1081       return EVENT_CONT;
1082     }
1083 
1084     if (!vol->dir_sync_in_progress) {
1085       start_time = Thread::get_hrtime();
1086     }
1087 
1088     // recompute hit_evacuate_window
1089     vol->hit_evacuate_window = (vol->data_blocks * cache_config_hit_evacuate_percent) / 100;
1090 
1091     if (DISK_BAD(vol->disk)) {
1092       goto Ldone;
1093     }
1094 
1095     int headerlen = ROUND_TO_STORE_BLOCK(sizeof(VolHeaderFooter));
1096     size_t dirlen = vol->dirlen();
1097     if (!writepos) {
1098       // start
1099       Debug("cache_dir_sync", "sync started");
1100       /* Don't sync the directory to disk if its not dirty. Syncing the
1101          clean directory to disk is also the cause of INKqa07151. Increasing
1102          the serial serial causes the cache to recover more data
1103          than necessary.
1104          The dirty bit it set in dir_insert, dir_overwrite and dir_delete_entry
1105        */
1106       if (!vol->header->dirty) {
1107         Debug("cache_dir_sync", "Dir %s not dirty", vol->hash_text.get());
1108         goto Ldone;
1109       }
1110       if (vol->is_io_in_progress() || vol->agg_buf_pos) {
1111         Debug("cache_dir_sync", "Dir %s: waiting for agg buffer", vol->hash_text.get());
1112         vol->dir_sync_waiting = true;
1113         if (!vol->is_io_in_progress()) {
1114           vol->aggWrite(EVENT_IMMEDIATE, nullptr);
1115         }
1116         return EVENT_CONT;
1117       }
1118       Debug("cache_dir_sync", "pos: %" PRIu64 " Dir %s dirty...syncing to disk", vol->header->write_pos, vol->hash_text.get());
1119       vol->header->dirty = 0;
1120       if (buflen < dirlen) {
1121         if (buf) {
1122           if (buf_huge) {
1123             ats_free_hugepage(buf, buflen);
1124           } else {
1125             ats_free(buf);
1126           }
1127           buf = nullptr;
1128         }
1129         buflen = dirlen;
1130         if (ats_hugepage_enabled()) {
1131           buf      = static_cast<char *>(ats_alloc_hugepage(buflen));
1132           buf_huge = true;
1133         }
1134         if (buf == nullptr) {
1135           buf      = static_cast<char *>(ats_memalign(ats_pagesize(), buflen));
1136           buf_huge = false;
1137         }
1138       }
1139       vol->header->sync_serial++;
1140       vol->footer->sync_serial = vol->header->sync_serial;
1141       CHECK_DIR(d);
1142       memcpy(buf, vol->raw_dir, dirlen);
1143       vol->dir_sync_in_progress = true;
1144     }
1145     size_t B    = vol->header->sync_serial & 1;
1146     off_t start = vol->skip + (B ? dirlen : 0);
1147 
1148     if (!writepos) {
1149       // write header
1150       aio_write(vol->fd, buf + writepos, headerlen, start + writepos);
1151       writepos += headerlen;
1152     } else if (writepos < static_cast<off_t>(dirlen) - headerlen) {
1153       // write part of body
1154       int l = SYNC_MAX_WRITE;
1155       if (writepos + l > static_cast<off_t>(dirlen) - headerlen) {
1156         l = dirlen - headerlen - writepos;
1157       }
1158       aio_write(vol->fd, buf + writepos, l, start + writepos);
1159       writepos += l;
1160     } else if (writepos < static_cast<off_t>(dirlen)) {
1161       ink_assert(writepos == (off_t)dirlen - headerlen);
1162       // write footer
1163       aio_write(vol->fd, buf + writepos, headerlen, start + writepos);
1164       writepos += headerlen;
1165     } else {
1166       vol->dir_sync_in_progress = false;
1167       CACHE_INCREMENT_DYN_STAT(cache_directory_sync_count_stat);
1168       CACHE_SUM_DYN_STAT(cache_directory_sync_time_stat, Thread::get_hrtime() - start_time);
1169       start_time = 0;
1170       goto Ldone;
1171     }
1172     return EVENT_CONT;
1173   }
1174 Ldone:
1175   // done
1176   writepos = 0;
1177   ++vol_idx;
1178   goto Lrestart;
1179 }
1180 
1181 namespace
1182 {
1183 int
compare_ushort(void const * a,void const * b)1184 compare_ushort(void const *a, void const *b)
1185 {
1186   return *static_cast<unsigned short const *>(a) - *static_cast<unsigned short const *>(b);
1187 }
1188 } // namespace
1189 
1190 //
1191 // Check
1192 //
1193 
1194 int
dir_check(bool)1195 Vol::dir_check(bool /* fix ATS_UNUSED */) // TODO: we should eliminate this parameter ?
1196 {
1197   static int const SEGMENT_HISTOGRAM_WIDTH = 16;
1198   int hist[SEGMENT_HISTOGRAM_WIDTH + 1]    = {0};
1199   unsigned short chain_tag[MAX_ENTRIES_PER_SEGMENT];
1200   int32_t chain_mark[MAX_ENTRIES_PER_SEGMENT];
1201   uint64_t total_buckets = buckets * segments;
1202   uint64_t total_entries = total_buckets * DIR_DEPTH;
1203   int frag_demographics[1 << DIR_SIZE_WIDTH][DIR_BLOCK_SIZES];
1204 
1205   int j;
1206   int stale = 0, in_use = 0, empty = 0;
1207   int free = 0, head = 0, buckets_in_use = 0;
1208 
1209   int max_chain_length = 0;
1210   int64_t bytes_in_use = 0;
1211 
1212   ink_zero(frag_demographics);
1213 
1214   printf("Stripe '[%s]'\n", hash_text.get());
1215   printf("  Directory Bytes: %" PRIu64 "\n", total_buckets * SIZEOF_DIR);
1216   printf("  Segments:  %d\n", segments);
1217   printf("  Buckets per segment:   %" PRIu64 "\n", buckets);
1218   printf("  Entries:   %" PRIu64 "\n", total_entries);
1219 
1220   for (int s = 0; s < segments; s++) {
1221     Dir *seg               = this->dir_segment(s);
1222     int seg_chain_max      = 0;
1223     int seg_empty          = 0;
1224     int seg_in_use         = 0;
1225     int seg_stale          = 0;
1226     int seg_bytes_in_use   = 0;
1227     int seg_dups           = 0;
1228     int seg_buckets_in_use = 0;
1229 
1230     ink_zero(chain_tag);
1231     memset(chain_mark, -1, sizeof(chain_mark));
1232 
1233     for (int b = 0; b < buckets; b++) {
1234       Dir *root = dir_bucket(b, seg);
1235       int h     = 0; // chain length starting in this bucket
1236 
1237       // Walk the chain starting in this bucket
1238       int chain_idx = 0;
1239       int mark      = 0;
1240       ++seg_buckets_in_use;
1241       for (Dir *e = root; e; e = next_dir(e, seg)) {
1242         if (!dir_offset(e)) {
1243           ++seg_empty;
1244           --seg_buckets_in_use;
1245           // this should only happen on the first dir in a bucket
1246           ink_assert(nullptr == next_dir(e, seg));
1247           break;
1248         } else {
1249           int e_idx = e - seg;
1250           ++h;
1251           chain_tag[chain_idx++] = dir_tag(e);
1252           if (chain_mark[e_idx] == mark) {
1253             printf("    - Cycle of length %d detected for bucket %d\n", h, b);
1254           } else if (chain_mark[e_idx] >= 0) {
1255             printf("    - Entry %d is in chain %d and %d", e_idx, chain_mark[e_idx], mark);
1256           } else {
1257             chain_mark[e_idx] = mark;
1258           }
1259 
1260           if (!dir_valid(this, e)) {
1261             ++seg_stale;
1262           } else {
1263             uint64_t size = dir_approx_size(e);
1264             if (dir_head(e)) {
1265               ++head;
1266             }
1267             ++seg_in_use;
1268             seg_bytes_in_use += size;
1269             ++frag_demographics[dir_size(e)][dir_big(e)];
1270           }
1271         }
1272       }
1273 
1274       // Check for duplicates (identical tags in the same bucket).
1275       if (h > 1) {
1276         unsigned short last;
1277         qsort(chain_tag, h, sizeof(chain_tag[0]), &compare_ushort);
1278         last = chain_tag[0];
1279         for (int k = 1; k < h; ++k) {
1280           if (last == chain_tag[k]) {
1281             ++seg_dups;
1282           }
1283           last = chain_tag[k];
1284         }
1285       }
1286 
1287       ++hist[std::min(h, SEGMENT_HISTOGRAM_WIDTH)];
1288       seg_chain_max = std::max(seg_chain_max, h);
1289     }
1290     int fl_size = dir_freelist_length(this, s);
1291     in_use += seg_in_use;
1292     empty += seg_empty;
1293     stale += seg_stale;
1294     free += fl_size;
1295     buckets_in_use += seg_buckets_in_use;
1296     max_chain_length = std::max(max_chain_length, seg_chain_max);
1297     bytes_in_use += seg_bytes_in_use;
1298 
1299     printf("  - Segment-%d | Entries: used=%d stale=%d free=%d disk-bytes=%d Buckets: used=%d empty=%d max=%d avg=%.2f dups=%d\n",
1300            s, seg_in_use, seg_stale, fl_size, seg_bytes_in_use, seg_buckets_in_use, seg_empty, seg_chain_max,
1301            seg_buckets_in_use ? static_cast<float>(seg_in_use + seg_stale) / seg_buckets_in_use : 0.0, seg_dups);
1302   }
1303 
1304   printf("  - Stripe | Entries: in-use=%d stale=%d free=%d Buckets: empty=%d max=%d avg=%.2f\n", in_use, stale, free, empty,
1305          max_chain_length, buckets_in_use ? static_cast<float>(in_use + stale) / buckets_in_use : 0);
1306 
1307   printf("    Chain lengths:  ");
1308   for (j = 0; j < SEGMENT_HISTOGRAM_WIDTH; ++j) {
1309     printf(" %d=%d ", j, hist[j]);
1310   }
1311   printf(" %d>=%d\n", SEGMENT_HISTOGRAM_WIDTH, hist[SEGMENT_HISTOGRAM_WIDTH]);
1312 
1313   char tt[256];
1314   printf("    Total Size:      %" PRIu64 "\n", static_cast<uint64_t>(len));
1315   printf("    Bytes in Use:    %" PRIu64 " [%0.2f%%]\n", bytes_in_use, 100.0 * (static_cast<float>(bytes_in_use) / len));
1316   printf("    Objects:         %d\n", head);
1317   printf("    Average Size:    %" PRIu64 "\n", head ? (bytes_in_use / head) : 0);
1318   printf("    Average Frags:   %.2f\n", head ? static_cast<float>(in_use) / head : 0);
1319   printf("    Write Position:  %" PRIu64 "\n", header->write_pos - start);
1320   printf("    Wrap Count:      %d\n", header->cycle);
1321   printf("    Phase:           %s\n", header->phase ? "true" : "false");
1322   ink_ctime_r(&header->create_time, tt);
1323   tt[strlen(tt) - 1] = 0;
1324   printf("    Sync Serial:     %u\n", header->sync_serial);
1325   printf("    Write Serial:    %u\n", header->write_serial);
1326   printf("    Create Time:     %s\n", tt);
1327   printf("\n");
1328   printf("  Fragment size demographics\n");
1329   for (int b = 0; b < DIR_BLOCK_SIZES; ++b) {
1330     int block_size = DIR_BLOCK_SIZE(b);
1331     int s          = 0;
1332     while (s < 1 << DIR_SIZE_WIDTH) {
1333       for (int j = 0; j < 8; ++j, ++s) {
1334         // The size markings are redundant. Low values (less than DIR_SHIFT_WIDTH) for larger
1335         // base block sizes should never be used. Such entries should use the next smaller base block size.
1336         if (b > 0 && s < 1 << DIR_BLOCK_SHIFT(1)) {
1337           ink_assert(frag_demographics[s][b] == 0);
1338           continue;
1339         }
1340         printf(" %8d[%2d:%1d]:%06d", (s + 1) * block_size, s, b, frag_demographics[s][b]);
1341       }
1342       printf("\n");
1343     }
1344   }
1345   printf("\n");
1346 
1347   return 0;
1348 }
1349 
1350 //
1351 // Static Tables
1352 //
1353 
1354 // permutation table
1355 uint8_t CacheKey_next_table[256] = {
1356   21,  53,  167, 51,  255, 126, 241, 151, 115, 66,  155, 174, 226, 215, 80,  188, 12,  95,  8,   24,  162, 201, 46,  104, 79,  172,
1357   39,  68,  56,  144, 142, 217, 101, 62,  14,  108, 120, 90,  61,  47,  132, 199, 110, 166, 83,  125, 57,  65,  19,  130, 148, 116,
1358   228, 189, 170, 1,   71,  0,   252, 184, 168, 177, 88,  229, 242, 237, 183, 55,  13,  212, 240, 81,  211, 74,  195, 205, 147, 93,
1359   30,  87,  86,  63,  135, 102, 233, 106, 118, 163, 107, 10,  243, 136, 160, 119, 43,  161, 206, 141, 203, 78,  175, 36,  37,  140,
1360   224, 197, 185, 196, 248, 84,  122, 73,  152, 157, 18,  225, 219, 145, 45,  2,   171, 249, 173, 32,  143, 137, 69,  41,  35,  89,
1361   33,  98,  179, 214, 114, 231, 251, 123, 180, 194, 29,  3,   178, 31,  192, 164, 15,  234, 26,  230, 91,  156, 5,   16,  23,  244,
1362   58,  50,  4,   67,  134, 165, 60,  235, 250, 7,   138, 216, 49,  139, 191, 154, 11,  52,  239, 59,  111, 245, 9,   64,  25,  129,
1363   247, 232, 190, 246, 109, 22,  112, 210, 221, 181, 92,  169, 48,  100, 193, 77,  103, 133, 70,  220, 207, 223, 176, 204, 76,  186,
1364   200, 208, 158, 182, 227, 222, 131, 38,  187, 238, 6,   34,  253, 128, 146, 44,  94,  127, 105, 153, 113, 20,  27,  124, 159, 17,
1365   72,  218, 96,  149, 213, 42,  28,  254, 202, 40,  117, 82,  97,  209, 54,  236, 121, 75,  85,  150, 99,  198,
1366 };
1367 
1368 // permutation table
1369 uint8_t CacheKey_prev_table[256] = {
1370   57,  55,  119, 141, 158, 152, 218, 165, 18,  178, 89,  172, 16,  68,  34,  146, 153, 233, 114, 48,  229, 0,   187, 154, 19,  180,
1371   148, 230, 240, 140, 78,  143, 123, 130, 219, 128, 101, 102, 215, 26,  243, 127, 239, 94,  223, 118, 22,  39,  194, 168, 157, 3,
1372   173, 1,   248, 67,  28,  46,  156, 175, 162, 38,  33,  81,  179, 47,  9,   159, 27,  126, 200, 56,  234, 111, 73,  251, 206, 197,
1373   99,  24,  14,  71,  245, 44,  109, 252, 80,  79,  62,  129, 37,  150, 192, 77,  224, 17,  236, 246, 131, 254, 195, 32,  83,  198,
1374   23,  226, 85,  88,  35,  186, 42,  176, 188, 228, 134, 8,   51,  244, 86,  93,  36,  250, 110, 137, 231, 45,  5,   225, 221, 181,
1375   49,  214, 40,  199, 160, 82,  91,  125, 166, 169, 103, 97,  30,  124, 29,  117, 222, 76,  50,  237, 253, 7,   112, 227, 171, 10,
1376   151, 113, 210, 232, 92,  95,  20,  87,  145, 161, 43,  2,   60,  193, 54,  120, 25,  122, 11,  100, 204, 61,  142, 132, 138, 191,
1377   211, 66,  59,  106, 207, 216, 15,  53,  184, 170, 144, 196, 139, 74,  107, 105, 255, 41,  208, 21,  242, 98,  205, 75,  96,  202,
1378   209, 247, 189, 72,  69,  238, 133, 13,  167, 31,  235, 116, 201, 190, 213, 203, 104, 115, 12,  212, 52,  63,  149, 135, 183, 84,
1379   147, 163, 249, 65,  217, 174, 70,  6,   64,  90,  155, 177, 185, 182, 108, 121, 164, 136, 58,  220, 241, 4,
1380 };
1381 
1382 //
1383 // Regression
1384 //
1385 unsigned int regress_rand_seed = 0;
1386 void
regress_rand_init(unsigned int i)1387 regress_rand_init(unsigned int i)
1388 {
1389   regress_rand_seed = i;
1390 }
1391 
1392 static void
regress_rand_CacheKey(const CacheKey * key)1393 regress_rand_CacheKey(const CacheKey *key)
1394 {
1395   unsigned int *x = (unsigned int *)key;
1396   for (int i = 0; i < 4; i++) {
1397     x[i] = next_rand(&regress_rand_seed);
1398   }
1399 }
1400 
1401 void
dir_corrupt_bucket(Dir * b,int s,Vol * d)1402 dir_corrupt_bucket(Dir *b, int s, Vol *d)
1403 {
1404   // coverity[dont_call]
1405   int l    = (static_cast<int>(dir_bucket_length(b, s, d) * drand48()));
1406   Dir *e   = b;
1407   Dir *seg = d->dir_segment(s);
1408   for (int i = 0; i < l; i++) {
1409     ink_release_assert(e);
1410     e = next_dir(e, seg);
1411   }
1412   ink_release_assert(e);
1413   dir_set_next(e, dir_to_offset(e, seg));
1414 }
1415 
EXCLUSIVE_REGRESSION_TEST(Cache_dir)1416 EXCLUSIVE_REGRESSION_TEST(Cache_dir)(RegressionTest *t, int /* atype ATS_UNUSED */, int *status)
1417 {
1418   ink_hrtime ttime;
1419   int ret = REGRESSION_TEST_PASSED;
1420 
1421   if ((CacheProcessor::IsCacheEnabled() != CACHE_INITIALIZED) || gnvol < 1) {
1422     rprintf(t, "cache not ready/configured");
1423     *status = REGRESSION_TEST_FAILED;
1424     return;
1425   }
1426   Vol *d          = gvol[0];
1427   EThread *thread = this_ethread();
1428   MUTEX_TRY_LOCK(lock, d->mutex, thread);
1429   ink_release_assert(lock.is_locked());
1430   rprintf(t, "clearing vol 0\n", free);
1431   vol_dir_clear(d);
1432 
1433   // coverity[var_decl]
1434   Dir dir;
1435   dir_clear(&dir);
1436   dir_set_phase(&dir, 0);
1437   dir_set_head(&dir, true);
1438   dir_set_offset(&dir, 1);
1439 
1440   d->header->agg_pos = d->header->write_pos += 1024;
1441 
1442   CacheKey key;
1443   rand_CacheKey(&key, thread->mutex);
1444 
1445   int s    = key.slice32(0) % d->segments, i, j;
1446   Dir *seg = d->dir_segment(s);
1447 
1448   // test insert
1449   rprintf(t, "insert test\n", free);
1450   int inserted = 0;
1451   int free     = dir_freelist_length(d, s);
1452   int n        = free;
1453   rprintf(t, "free: %d\n", free);
1454   while (n--) {
1455     if (!dir_insert(&key, d, &dir)) {
1456       break;
1457     }
1458     inserted++;
1459   }
1460   rprintf(t, "inserted: %d\n", inserted);
1461   if (static_cast<unsigned int>(inserted - free) > 1) {
1462     ret = REGRESSION_TEST_FAILED;
1463   }
1464 
1465   // test delete
1466   rprintf(t, "delete test\n");
1467   for (i = 0; i < d->buckets; i++) {
1468     for (j = 0; j < DIR_DEPTH; j++) {
1469       dir_set_offset(dir_bucket_row(dir_bucket(i, seg), j), 0); // delete
1470     }
1471   }
1472   dir_clean_segment(s, d);
1473   int newfree = dir_freelist_length(d, s);
1474   rprintf(t, "newfree: %d\n", newfree);
1475   if (static_cast<unsigned int>(newfree - free) > 1) {
1476     ret = REGRESSION_TEST_FAILED;
1477   }
1478 
1479   // test insert-delete
1480   rprintf(t, "insert-delete test\n");
1481   regress_rand_init(13);
1482   ttime = Thread::get_hrtime_updated();
1483   for (i = 0; i < newfree; i++) {
1484     regress_rand_CacheKey(&key);
1485     dir_insert(&key, d, &dir);
1486   }
1487   uint64_t us = (Thread::get_hrtime_updated() - ttime) / HRTIME_USECOND;
1488   // On windows us is sometimes 0. I don't know why.
1489   // printout the insert rate only if its not 0
1490   if (us) {
1491     rprintf(t, "insert rate = %d / second\n", static_cast<int>((newfree * static_cast<uint64_t>(1000000)) / us));
1492   }
1493   regress_rand_init(13);
1494   ttime = Thread::get_hrtime_updated();
1495   for (i = 0; i < newfree; i++) {
1496     Dir *last_collision = nullptr;
1497     regress_rand_CacheKey(&key);
1498     if (!dir_probe(&key, d, &dir, &last_collision)) {
1499       ret = REGRESSION_TEST_FAILED;
1500     }
1501   }
1502   us = (Thread::get_hrtime_updated() - ttime) / HRTIME_USECOND;
1503   // On windows us is sometimes 0. I don't know why.
1504   // printout the probe rate only if its not 0
1505   if (us) {
1506     rprintf(t, "probe rate = %d / second\n", static_cast<int>((newfree * static_cast<uint64_t>(1000000)) / us));
1507   }
1508 
1509   for (int c = 0; c < d->direntries() * 0.75; c++) {
1510     regress_rand_CacheKey(&key);
1511     dir_insert(&key, d, &dir);
1512   }
1513 
1514   Dir dir1;
1515   memset(static_cast<void *>(&dir1), 0, sizeof(dir1));
1516   int s1, b1;
1517 
1518   rprintf(t, "corrupt_bucket test\n");
1519   for (int ntimes = 0; ntimes < 10; ntimes++) {
1520 #ifdef LOOP_CHECK_MODE
1521     // dir_probe in bucket with loop
1522     rand_CacheKey(&key, thread->mutex);
1523     s1 = key.slice32(0) % d->segments;
1524     b1 = key.slice32(1) % d->buckets;
1525     dir_corrupt_bucket(dir_bucket(b1, d->dir_segment(s1)), s1, d);
1526     dir_insert(&key, d, &dir);
1527     Dir *last_collision = 0;
1528     dir_probe(&key, d, &dir, &last_collision);
1529 
1530     rand_CacheKey(&key, thread->mutex);
1531     s1 = key.slice32(0) % d->segments;
1532     b1 = key.slice32(1) % d->buckets;
1533     dir_corrupt_bucket(dir_bucket(b1, d->dir_segment(s1)), s1, d);
1534 
1535     last_collision = 0;
1536     dir_probe(&key, d, &dir, &last_collision);
1537 
1538     // dir_overwrite in bucket with loop
1539     rand_CacheKey(&key, thread->mutex);
1540     s1 = key.slice32(0) % d->segments;
1541     b1 = key.slice32(1) % d->buckets;
1542     CacheKey key1;
1543     key1.b[1] = 127;
1544     dir1      = dir;
1545     dir_set_offset(&dir1, 23);
1546     dir_insert(&key1, d, &dir1);
1547     dir_insert(&key, d, &dir);
1548     key1.b[1] = 80;
1549     dir_insert(&key1, d, &dir1);
1550     dir_corrupt_bucket(dir_bucket(b1, d->dir_segment(s1)), s1, d);
1551     dir_overwrite(&key, d, &dir, &dir, 1);
1552 
1553     rand_CacheKey(&key, thread->mutex);
1554     s1       = key.slice32(0) % d->segments;
1555     b1       = key.slice32(1) % d->buckets;
1556     key.b[1] = 23;
1557     dir_insert(&key, d, &dir1);
1558     dir_corrupt_bucket(dir_bucket(b1, d->dir_segment(s1)), s1, d);
1559     dir_overwrite(&key, d, &dir, &dir, 0);
1560 
1561     rand_CacheKey(&key, thread->mutex);
1562     s1        = key.slice32(0) % d->segments;
1563     Dir *seg1 = d->dir_segment(s1);
1564     // dir_freelist_length in freelist with loop
1565     dir_corrupt_bucket(dir_from_offset(d->header->freelist[s], seg1), s1, d);
1566     dir_freelist_length(d, s1);
1567 
1568     rand_CacheKey(&key, thread->mutex);
1569     s1 = key.slice32(0) % d->segments;
1570     b1 = key.slice32(1) % d->buckets;
1571     // dir_bucket_length in bucket with loop
1572     dir_corrupt_bucket(dir_bucket(b1, d->dir_segment(s1)), s1, d);
1573     dir_bucket_length(dir_bucket(b1, d->dir_segment(s1)), s1, d);
1574     if (!check_dir(d))
1575       ret = REGRESSION_TEST_FAILED;
1576 #else
1577     // test corruption detection
1578     rand_CacheKey(&key, thread->mutex);
1579     s1 = key.slice32(0) % d->segments;
1580     b1 = key.slice32(1) % d->buckets;
1581 
1582     dir_insert(&key, d, &dir1);
1583     dir_insert(&key, d, &dir1);
1584     dir_insert(&key, d, &dir1);
1585     dir_insert(&key, d, &dir1);
1586     dir_insert(&key, d, &dir1);
1587     dir_corrupt_bucket(dir_bucket(b1, d->dir_segment(s1)), s1, d);
1588     if (check_dir(d)) {
1589       ret = REGRESSION_TEST_FAILED;
1590     }
1591 #endif
1592   }
1593   vol_dir_clear(d);
1594   *status = ret;
1595 }
1596