1 /** @file
2
3 A brief file description
4
5 @section license License
6
7 Licensed to the Apache Software Foundation (ASF) under one
8 or more contributor license agreements. See the NOTICE file
9 distributed with this work for additional information
10 regarding copyright ownership. The ASF licenses this file
11 to you under the Apache License, Version 2.0 (the
12 "License"); you may not use this file except in compliance
13 with the License. You may obtain a copy of the License at
14
15 http://www.apache.org/licenses/LICENSE-2.0
16
17 Unless required by applicable law or agreed to in writing, software
18 distributed under the License is distributed on an "AS IS" BASIS,
19 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 See the License for the specific language governing permissions and
21 limitations under the License.
22 */
23
24 #include "P_Cache.h"
25
26 #include "tscore/hugepages.h"
27 #include "tscore/Regression.h"
28
29 // #define LOOP_CHECK_MODE 1
30 #ifdef LOOP_CHECK_MODE
31 #define DIR_LOOP_THRESHOLD 1000
32 #endif
33 #include "tscore/ink_stack_trace.h"
34
35 #define CACHE_INC_DIR_USED(_m) \
36 do { \
37 ProxyMutex *mutex = _m.get(); \
38 CACHE_INCREMENT_DYN_STAT(cache_direntries_used_stat); \
39 } while (0)
40
41 #define CACHE_DEC_DIR_USED(_m) \
42 do { \
43 ProxyMutex *mutex = _m.get(); \
44 CACHE_DECREMENT_DYN_STAT(cache_direntries_used_stat); \
45 } while (0)
46
47 #define CACHE_INC_DIR_COLLISIONS(_m) \
48 do { \
49 ProxyMutex *mutex = _m.get(); \
50 CACHE_INCREMENT_DYN_STAT(cache_directory_collision_count_stat); \
51 } while (0);
52
53 // Globals
54
55 ClassAllocator<OpenDirEntry> openDirEntryAllocator("openDirEntry");
56 Dir empty_dir;
57
58 // OpenDir
59
OpenDir()60 OpenDir::OpenDir()
61 {
62 SET_HANDLER(&OpenDir::signal_readers);
63 }
64
65 /*
66 If allow_if_writers is false, open_write fails if there are other writers.
67 max_writers sets the maximum number of concurrent writers that are
68 allowed. Only The first writer can set the max_writers. It is ignored
69 for later writers.
70 Returns 1 on success and 0 on failure.
71 */
72 int
open_write(CacheVC * cont,int allow_if_writers,int max_writers)73 OpenDir::open_write(CacheVC *cont, int allow_if_writers, int max_writers)
74 {
75 ink_assert(cont->vol->mutex->thread_holding == this_ethread());
76 unsigned int h = cont->first_key.slice32(0);
77 int b = h % OPEN_DIR_BUCKETS;
78 for (OpenDirEntry *d = bucket[b].head; d; d = d->link.next) {
79 if (!(d->writers.head->first_key == cont->first_key)) {
80 continue;
81 }
82 if (allow_if_writers && d->num_writers < d->max_writers) {
83 d->writers.push(cont);
84 d->num_writers++;
85 cont->od = d;
86 cont->write_vector = &d->vector;
87 return 1;
88 }
89 return 0;
90 }
91 OpenDirEntry *od = THREAD_ALLOC(openDirEntryAllocator, cont->mutex->thread_holding);
92 od->readers.head = nullptr;
93 od->writers.push(cont);
94 od->num_writers = 1;
95 od->max_writers = max_writers;
96 od->vector.data.data = &od->vector.data.fast_data[0];
97 od->dont_update_directory = false;
98 od->move_resident_alt = false;
99 od->reading_vec = false;
100 od->writing_vec = false;
101 dir_clear(&od->first_dir);
102 cont->od = od;
103 cont->write_vector = &od->vector;
104 bucket[b].push(od);
105 return 1;
106 }
107
108 int
signal_readers(int,Event *)109 OpenDir::signal_readers(int /* event ATS_UNUSED */, Event * /* e ATS_UNUSED */)
110 {
111 Queue<CacheVC, Link_CacheVC_opendir_link> newly_delayed_readers;
112 EThread *t = mutex->thread_holding;
113 CacheVC *c = nullptr;
114 while ((c = delayed_readers.dequeue())) {
115 CACHE_TRY_LOCK(lock, c->mutex, t);
116 if (lock.is_locked()) {
117 c->f.open_read_timeout = 0;
118 c->handleEvent(EVENT_IMMEDIATE, nullptr);
119 continue;
120 }
121 newly_delayed_readers.push(c);
122 }
123 if (newly_delayed_readers.head) {
124 delayed_readers = newly_delayed_readers;
125 EThread *t1 = newly_delayed_readers.head->mutex->thread_holding;
126 if (!t1) {
127 t1 = mutex->thread_holding;
128 }
129 t1->schedule_in(this, HRTIME_MSECONDS(cache_config_mutex_retry_delay));
130 }
131 return 0;
132 }
133
134 int
close_write(CacheVC * cont)135 OpenDir::close_write(CacheVC *cont)
136 {
137 ink_assert(cont->vol->mutex->thread_holding == this_ethread());
138 cont->od->writers.remove(cont);
139 cont->od->num_writers--;
140 if (!cont->od->writers.head) {
141 unsigned int h = cont->first_key.slice32(0);
142 int b = h % OPEN_DIR_BUCKETS;
143 bucket[b].remove(cont->od);
144 delayed_readers.append(cont->od->readers);
145 signal_readers(0, nullptr);
146 cont->od->vector.clear();
147 THREAD_FREE(cont->od, openDirEntryAllocator, cont->mutex->thread_holding);
148 }
149 cont->od = nullptr;
150 return 0;
151 }
152
153 OpenDirEntry *
open_read(const CryptoHash * key)154 OpenDir::open_read(const CryptoHash *key)
155 {
156 unsigned int h = key->slice32(0);
157 int b = h % OPEN_DIR_BUCKETS;
158 for (OpenDirEntry *d = bucket[b].head; d; d = d->link.next) {
159 if (d->writers.head->first_key == *key) {
160 return d;
161 }
162 }
163 return nullptr;
164 }
165
166 int
wait(CacheVC * cont,int msec)167 OpenDirEntry::wait(CacheVC *cont, int msec)
168 {
169 ink_assert(cont->vol->mutex->thread_holding == this_ethread());
170 cont->f.open_read_timeout = 1;
171 ink_assert(!cont->trigger);
172 cont->trigger = cont->vol->mutex->thread_holding->schedule_in_local(cont, HRTIME_MSECONDS(msec));
173 readers.push(cont);
174 return EVENT_CONT;
175 }
176
177 //
178 // Cache Directory
179 //
180
181 // return value 1 means no loop
182 // zero indicates loop
183 int
dir_bucket_loop_check(Dir * start_dir,Dir * seg)184 dir_bucket_loop_check(Dir *start_dir, Dir *seg)
185 {
186 if (start_dir == nullptr) {
187 return 1;
188 }
189
190 Dir *p1 = start_dir;
191 Dir *p2 = start_dir;
192
193 while (p2) {
194 // p1 moves by one entry per iteration
195 ink_assert(p1);
196 p1 = next_dir(p1, seg);
197 // p2 moves by two entries per iteration
198 p2 = next_dir(p2, seg);
199 if (p2) {
200 p2 = next_dir(p2, seg);
201 } else {
202 return 1;
203 }
204
205 if (p2 == p1) {
206 return 0; // we have a loop
207 }
208 }
209 return 1;
210 }
211
212 // adds all the directory entries
213 // in a segment to the segment freelist
214 void
dir_init_segment(int s,Vol * d)215 dir_init_segment(int s, Vol *d)
216 {
217 d->header->freelist[s] = 0;
218 Dir *seg = d->dir_segment(s);
219 int l, b;
220 memset(static_cast<void *>(seg), 0, SIZEOF_DIR * DIR_DEPTH * d->buckets);
221 for (l = 1; l < DIR_DEPTH; l++) {
222 for (b = 0; b < d->buckets; b++) {
223 Dir *bucket = dir_bucket(b, seg);
224 dir_free_entry(dir_bucket_row(bucket, l), s, d);
225 }
226 }
227 }
228
229 // break the infinite loop in directory entries
230 // Note : abuse of the token bit in dir entries
231 int
dir_bucket_loop_fix(Dir * start_dir,int s,Vol * d)232 dir_bucket_loop_fix(Dir *start_dir, int s, Vol *d)
233 {
234 if (!dir_bucket_loop_check(start_dir, d->dir_segment(s))) {
235 Warning("Dir loop exists, clearing segment %d", s);
236 dir_init_segment(s, d);
237 return 1;
238 }
239 return 0;
240 }
241
242 int
dir_freelist_length(Vol * d,int s)243 dir_freelist_length(Vol *d, int s)
244 {
245 int free = 0;
246 Dir *seg = d->dir_segment(s);
247 Dir *e = dir_from_offset(d->header->freelist[s], seg);
248 if (dir_bucket_loop_fix(e, s, d)) {
249 return (DIR_DEPTH - 1) * d->buckets;
250 }
251 while (e) {
252 free++;
253 e = next_dir(e, seg);
254 }
255 return free;
256 }
257
258 int
dir_bucket_length(Dir * b,int s,Vol * d)259 dir_bucket_length(Dir *b, int s, Vol *d)
260 {
261 Dir *e = b;
262 int i = 0;
263 Dir *seg = d->dir_segment(s);
264 #ifdef LOOP_CHECK_MODE
265 if (dir_bucket_loop_fix(b, s, d))
266 return 1;
267 #endif
268 while (e) {
269 i++;
270 if (i > 100) {
271 return -1;
272 }
273 e = next_dir(e, seg);
274 }
275 return i;
276 }
277
278 int
check_dir(Vol * d)279 check_dir(Vol *d)
280 {
281 int i, s;
282 Debug("cache_check_dir", "inside check dir");
283 for (s = 0; s < d->segments; s++) {
284 Dir *seg = d->dir_segment(s);
285 for (i = 0; i < d->buckets; i++) {
286 Dir *b = dir_bucket(i, seg);
287 if (!(dir_bucket_length(b, s, d) >= 0)) {
288 return 0;
289 }
290 if (!(!dir_next(b) || dir_offset(b))) {
291 return 0;
292 }
293 if (!(dir_bucket_loop_check(b, seg))) {
294 return 0;
295 }
296 }
297 }
298 return 1;
299 }
300
301 inline void
unlink_from_freelist(Dir * e,int s,Vol * d)302 unlink_from_freelist(Dir *e, int s, Vol *d)
303 {
304 Dir *seg = d->dir_segment(s);
305 Dir *p = dir_from_offset(dir_prev(e), seg);
306 if (p) {
307 dir_set_next(p, dir_next(e));
308 } else {
309 d->header->freelist[s] = dir_next(e);
310 }
311 Dir *n = dir_from_offset(dir_next(e), seg);
312 if (n) {
313 dir_set_prev(n, dir_prev(e));
314 }
315 }
316
317 inline Dir *
dir_delete_entry(Dir * e,Dir * p,int s,Vol * d)318 dir_delete_entry(Dir *e, Dir *p, int s, Vol *d)
319 {
320 Dir *seg = d->dir_segment(s);
321 int no = dir_next(e);
322 d->header->dirty = 1;
323 if (p) {
324 unsigned int fo = d->header->freelist[s];
325 unsigned int eo = dir_to_offset(e, seg);
326 dir_clear(e);
327 dir_set_next(p, no);
328 dir_set_next(e, fo);
329 if (fo) {
330 dir_set_prev(dir_from_offset(fo, seg), eo);
331 }
332 d->header->freelist[s] = eo;
333 } else {
334 Dir *n = next_dir(e, seg);
335 if (n) {
336 dir_assign(e, n);
337 dir_delete_entry(n, e, s, d);
338 return e;
339 } else {
340 dir_clear(e);
341 return nullptr;
342 }
343 }
344 return dir_from_offset(no, seg);
345 }
346
347 inline void
dir_clean_bucket(Dir * b,int s,Vol * vol)348 dir_clean_bucket(Dir *b, int s, Vol *vol)
349 {
350 Dir *e = b, *p = nullptr;
351 Dir *seg = vol->dir_segment(s);
352 #ifdef LOOP_CHECK_MODE
353 int loop_count = 0;
354 #endif
355 do {
356 #ifdef LOOP_CHECK_MODE
357 loop_count++;
358 if (loop_count > DIR_LOOP_THRESHOLD) {
359 if (dir_bucket_loop_fix(b, s, vol))
360 return;
361 }
362 #endif
363 if (!dir_valid(vol, e) || !dir_offset(e)) {
364 if (is_debug_tag_set("dir_clean")) {
365 Debug("dir_clean", "cleaning Vol:%s: %p tag %X boffset %" PRId64 " b %p p %p bucket len %d", vol->hash_text.get(), e,
366 dir_tag(e), dir_offset(e), b, p, dir_bucket_length(b, s, vol));
367 }
368 if (dir_offset(e)) {
369 CACHE_DEC_DIR_USED(vol->mutex);
370 }
371 e = dir_delete_entry(e, p, s, vol);
372 continue;
373 }
374 p = e;
375 e = next_dir(e, seg);
376 } while (e);
377 }
378
379 void
dir_clean_segment(int s,Vol * d)380 dir_clean_segment(int s, Vol *d)
381 {
382 Dir *seg = d->dir_segment(s);
383 for (int64_t i = 0; i < d->buckets; i++) {
384 dir_clean_bucket(dir_bucket(i, seg), s, d);
385 ink_assert(!dir_next(dir_bucket(i, seg)) || dir_offset(dir_bucket(i, seg)));
386 }
387 }
388
389 void
dir_clean_vol(Vol * d)390 dir_clean_vol(Vol *d)
391 {
392 for (int64_t i = 0; i < d->segments; i++) {
393 dir_clean_segment(i, d);
394 }
395 CHECK_DIR(d);
396 }
397
398 void
dir_clear_range(off_t start,off_t end,Vol * vol)399 dir_clear_range(off_t start, off_t end, Vol *vol)
400 {
401 for (off_t i = 0; i < vol->buckets * DIR_DEPTH * vol->segments; i++) {
402 Dir *e = dir_index(vol, i);
403 if (!dir_token(e) && dir_offset(e) >= static_cast<int64_t>(start) && dir_offset(e) < static_cast<int64_t>(end)) {
404 CACHE_DEC_DIR_USED(vol->mutex);
405 dir_set_offset(e, 0); // delete
406 }
407 }
408 dir_clean_vol(vol);
409 }
410
411 void
check_bucket_not_contains(Dir * b,Dir * e,Dir * seg)412 check_bucket_not_contains(Dir *b, Dir *e, Dir *seg)
413 {
414 Dir *x = b;
415 do {
416 if (x == e) {
417 break;
418 }
419 x = next_dir(x, seg);
420 } while (x);
421 ink_assert(!x);
422 }
423
424 void
freelist_clean(int s,Vol * vol)425 freelist_clean(int s, Vol *vol)
426 {
427 dir_clean_segment(s, vol);
428 if (vol->header->freelist[s]) {
429 return;
430 }
431 Warning("cache directory overflow on '%s' segment %d, purging...", vol->path, s);
432 int n = 0;
433 Dir *seg = vol->dir_segment(s);
434 for (int bi = 0; bi < vol->buckets; bi++) {
435 Dir *b = dir_bucket(bi, seg);
436 for (int l = 0; l < DIR_DEPTH; l++) {
437 Dir *e = dir_bucket_row(b, l);
438 if (dir_head(e) && !(n++ % 10)) {
439 CACHE_DEC_DIR_USED(vol->mutex);
440 dir_set_offset(e, 0); // delete
441 }
442 }
443 }
444 dir_clean_segment(s, vol);
445 }
446
447 inline Dir *
freelist_pop(int s,Vol * d)448 freelist_pop(int s, Vol *d)
449 {
450 Dir *seg = d->dir_segment(s);
451 Dir *e = dir_from_offset(d->header->freelist[s], seg);
452 if (!e) {
453 freelist_clean(s, d);
454 return nullptr;
455 }
456 d->header->freelist[s] = dir_next(e);
457 // if the freelist if bad, punt.
458 if (dir_offset(e)) {
459 dir_init_segment(s, d);
460 return nullptr;
461 }
462 Dir *h = dir_from_offset(d->header->freelist[s], seg);
463 if (h) {
464 dir_set_prev(h, 0);
465 }
466 return e;
467 }
468
469 int
dir_segment_accounted(int s,Vol * d,int offby,int * f,int * u,int * et,int * v,int * av,int * as)470 dir_segment_accounted(int s, Vol *d, int offby, int *f, int *u, int *et, int *v, int *av, int *as)
471 {
472 int free = dir_freelist_length(d, s);
473 int used = 0, empty = 0;
474 int valid = 0, agg_valid = 0;
475 int64_t agg_size = 0;
476 Dir *seg = d->dir_segment(s);
477 for (int bi = 0; bi < d->buckets; bi++) {
478 Dir *b = dir_bucket(bi, seg);
479 Dir *e = b;
480 while (e) {
481 if (!dir_offset(e)) {
482 ink_assert(e == b);
483 empty++;
484 } else {
485 used++;
486 if (dir_valid(d, e)) {
487 valid++;
488 }
489 if (dir_agg_valid(d, e)) {
490 agg_valid++;
491 }
492 agg_size += dir_approx_size(e);
493 }
494 e = next_dir(e, seg);
495 if (!e) {
496 break;
497 }
498 }
499 }
500 if (f) {
501 *f = free;
502 }
503 if (u) {
504 *u = used;
505 }
506 if (et) {
507 *et = empty;
508 }
509 if (v) {
510 *v = valid;
511 }
512 if (av) {
513 *av = agg_valid;
514 }
515 if (as) {
516 *as = used ? static_cast<int>(agg_size / used) : 0;
517 }
518 ink_assert(d->buckets * DIR_DEPTH - (free + used + empty) <= offby);
519 return d->buckets * DIR_DEPTH - (free + used + empty) <= offby;
520 }
521
522 void
dir_free_entry(Dir * e,int s,Vol * d)523 dir_free_entry(Dir *e, int s, Vol *d)
524 {
525 Dir *seg = d->dir_segment(s);
526 unsigned int fo = d->header->freelist[s];
527 unsigned int eo = dir_to_offset(e, seg);
528 dir_set_next(e, fo);
529 if (fo) {
530 dir_set_prev(dir_from_offset(fo, seg), eo);
531 }
532 d->header->freelist[s] = eo;
533 }
534
535 int
dir_probe(const CacheKey * key,Vol * d,Dir * result,Dir ** last_collision)536 dir_probe(const CacheKey *key, Vol *d, Dir *result, Dir **last_collision)
537 {
538 ink_assert(d->mutex->thread_holding == this_ethread());
539 int s = key->slice32(0) % d->segments;
540 int b = key->slice32(1) % d->buckets;
541 Dir *seg = d->dir_segment(s);
542 Dir *e = nullptr, *p = nullptr, *collision = *last_collision;
543 Vol *vol = d;
544 CHECK_DIR(d);
545 #ifdef LOOP_CHECK_MODE
546 if (dir_bucket_loop_fix(dir_bucket(b, seg), s, d))
547 return 0;
548 #endif
549 Lagain:
550 e = dir_bucket(b, seg);
551 if (dir_offset(e)) {
552 do {
553 if (dir_compare_tag(e, key)) {
554 ink_assert(dir_offset(e));
555 // Bug: 51680. Need to check collision before checking
556 // dir_valid(). In case of a collision, if !dir_valid(), we
557 // don't want to call dir_delete_entry.
558 if (collision) {
559 if (collision == e) {
560 collision = nullptr;
561 // increment collision stat
562 // Note: dir_probe could be called multiple times
563 // for the same document and so the collision stat
564 // may not accurately reflect the number of documents
565 // having the same first_key
566 DDebug("cache_stats", "Incrementing dir collisions");
567 CACHE_INC_DIR_COLLISIONS(d->mutex);
568 }
569 goto Lcont;
570 }
571 if (dir_valid(d, e)) {
572 DDebug("dir_probe_hit", "found %X %X vol %d bucket %d boffset %" PRId64 "", key->slice32(0), key->slice32(1), d->fd, b,
573 dir_offset(e));
574 dir_assign(result, e);
575 *last_collision = e;
576 ink_assert(dir_offset(e) * CACHE_BLOCK_SIZE < d->len);
577 return 1;
578 } else { // delete the invalid entry
579 CACHE_DEC_DIR_USED(d->mutex);
580 e = dir_delete_entry(e, p, s, d);
581 continue;
582 }
583 } else {
584 DDebug("dir_probe_tag", "tag mismatch %p %X vs expected %X", e, dir_tag(e), key->slice32(3));
585 }
586 Lcont:
587 p = e;
588 e = next_dir(e, seg);
589 } while (e);
590 }
591 if (collision) { // last collision no longer in the list, retry
592 DDebug("cache_stats", "Incrementing dir collisions");
593 CACHE_INC_DIR_COLLISIONS(d->mutex);
594 collision = nullptr;
595 goto Lagain;
596 }
597 DDebug("dir_probe_miss", "missed %X %X on vol %d bucket %d at %p", key->slice32(0), key->slice32(1), d->fd, b, seg);
598 CHECK_DIR(d);
599 return 0;
600 }
601
602 int
dir_insert(const CacheKey * key,Vol * d,Dir * to_part)603 dir_insert(const CacheKey *key, Vol *d, Dir *to_part)
604 {
605 ink_assert(d->mutex->thread_holding == this_ethread());
606 int s = key->slice32(0) % d->segments, l;
607 int bi = key->slice32(1) % d->buckets;
608 ink_assert(dir_approx_size(to_part) <= MAX_FRAG_SIZE + sizeof(Doc));
609 Dir *seg = d->dir_segment(s);
610 Dir *e = nullptr;
611 Dir *b = dir_bucket(bi, seg);
612 Vol *vol = d;
613 #if defined(DEBUG) && defined(DO_CHECK_DIR_FAST)
614 unsigned int t = DIR_MASK_TAG(key->slice32(2));
615 Dir *col = b;
616 while (col) {
617 ink_assert((dir_tag(col) != t) || (dir_offset(col) != dir_offset(to_part)));
618 col = next_dir(col, seg);
619 }
620 #endif
621 CHECK_DIR(d);
622
623 Lagain:
624 // get from this row first
625 e = b;
626 if (dir_is_empty(e)) {
627 goto Lfill;
628 }
629 for (l = 1; l < DIR_DEPTH; l++) {
630 e = dir_bucket_row(b, l);
631 if (dir_is_empty(e)) {
632 unlink_from_freelist(e, s, d);
633 goto Llink;
634 }
635 }
636 // get one from the freelist
637 e = freelist_pop(s, d);
638 if (!e) {
639 goto Lagain;
640 }
641 Llink:
642 dir_set_next(e, dir_next(b));
643 dir_set_next(b, dir_to_offset(e, seg));
644 Lfill:
645 dir_assign_data(e, to_part);
646 dir_set_tag(e, key->slice32(2));
647 ink_assert(d->vol_offset(e) < (d->skip + d->len));
648 DDebug("dir_insert", "insert %p %X into vol %d bucket %d at %p tag %X %X boffset %" PRId64 "", e, key->slice32(0), d->fd, bi, e,
649 key->slice32(1), dir_tag(e), dir_offset(e));
650 CHECK_DIR(d);
651 d->header->dirty = 1;
652 CACHE_INC_DIR_USED(d->mutex);
653 return 1;
654 }
655
656 int
dir_overwrite(const CacheKey * key,Vol * d,Dir * dir,Dir * overwrite,bool must_overwrite)657 dir_overwrite(const CacheKey *key, Vol *d, Dir *dir, Dir *overwrite, bool must_overwrite)
658 {
659 ink_assert(d->mutex->thread_holding == this_ethread());
660 int s = key->slice32(0) % d->segments, l;
661 int bi = key->slice32(1) % d->buckets;
662 Dir *seg = d->dir_segment(s);
663 Dir *e = nullptr;
664 Dir *b = dir_bucket(bi, seg);
665 unsigned int t = DIR_MASK_TAG(key->slice32(2));
666 int res = 1;
667 #ifdef LOOP_CHECK_MODE
668 int loop_count = 0;
669 bool loop_possible = true;
670 #endif
671 Vol *vol = d;
672 CHECK_DIR(d);
673
674 ink_assert((unsigned int)dir_approx_size(dir) <= (unsigned int)(MAX_FRAG_SIZE + sizeof(Doc))); // XXX - size should be unsigned
675 Lagain:
676 // find entry to overwrite
677 e = b;
678 if (dir_offset(e)) {
679 do {
680 #ifdef LOOP_CHECK_MODE
681 loop_count++;
682 if (loop_count > DIR_LOOP_THRESHOLD && loop_possible) {
683 if (dir_bucket_loop_fix(b, s, d)) {
684 loop_possible = false;
685 goto Lagain;
686 }
687 }
688 #endif
689 if (dir_tag(e) == t && dir_offset(e) == dir_offset(overwrite)) {
690 goto Lfill;
691 }
692 e = next_dir(e, seg);
693 } while (e);
694 }
695 if (must_overwrite) {
696 return 0;
697 }
698 res = 0;
699 // get from this row first
700 e = b;
701 if (dir_is_empty(e)) {
702 CACHE_INC_DIR_USED(d->mutex);
703 goto Lfill;
704 }
705 for (l = 1; l < DIR_DEPTH; l++) {
706 e = dir_bucket_row(b, l);
707 if (dir_is_empty(e)) {
708 unlink_from_freelist(e, s, d);
709 goto Llink;
710 }
711 }
712 // get one from the freelist
713 e = freelist_pop(s, d);
714 if (!e) {
715 goto Lagain;
716 }
717 Llink:
718 CACHE_INC_DIR_USED(d->mutex);
719 dir_set_next(e, dir_next(b));
720 dir_set_next(b, dir_to_offset(e, seg));
721 Lfill:
722 dir_assign_data(e, dir);
723 dir_set_tag(e, t);
724 ink_assert(d->vol_offset(e) < d->skip + d->len);
725 DDebug("dir_overwrite", "overwrite %p %X into vol %d bucket %d at %p tag %X %X boffset %" PRId64 "", e, key->slice32(0), d->fd,
726 bi, e, t, dir_tag(e), dir_offset(e));
727 CHECK_DIR(d);
728 d->header->dirty = 1;
729 return res;
730 }
731
732 int
dir_delete(const CacheKey * key,Vol * d,Dir * del)733 dir_delete(const CacheKey *key, Vol *d, Dir *del)
734 {
735 ink_assert(d->mutex->thread_holding == this_ethread());
736 int s = key->slice32(0) % d->segments;
737 int b = key->slice32(1) % d->buckets;
738 Dir *seg = d->dir_segment(s);
739 Dir *e = nullptr, *p = nullptr;
740 #ifdef LOOP_CHECK_MODE
741 int loop_count = 0;
742 #endif
743 Vol *vol = d;
744 CHECK_DIR(d);
745
746 e = dir_bucket(b, seg);
747 if (dir_offset(e)) {
748 do {
749 #ifdef LOOP_CHECK_MODE
750 loop_count++;
751 if (loop_count > DIR_LOOP_THRESHOLD) {
752 if (dir_bucket_loop_fix(dir_bucket(b, seg), s, d))
753 return 0;
754 }
755 #endif
756 if (dir_compare_tag(e, key) && dir_offset(e) == dir_offset(del)) {
757 CACHE_DEC_DIR_USED(d->mutex);
758 dir_delete_entry(e, p, s, d);
759 CHECK_DIR(d);
760 return 1;
761 }
762 p = e;
763 e = next_dir(e, seg);
764 } while (e);
765 }
766 CHECK_DIR(d);
767 return 0;
768 }
769
770 // Lookaside Cache
771
772 int
dir_lookaside_probe(const CacheKey * key,Vol * d,Dir * result,EvacuationBlock ** eblock)773 dir_lookaside_probe(const CacheKey *key, Vol *d, Dir *result, EvacuationBlock **eblock)
774 {
775 ink_assert(d->mutex->thread_holding == this_ethread());
776 int i = key->slice32(3) % LOOKASIDE_SIZE;
777 EvacuationBlock *b = d->lookaside[i].head;
778 while (b) {
779 if (b->evac_frags.key == *key) {
780 if (dir_valid(d, &b->new_dir)) {
781 *result = b->new_dir;
782 DDebug("dir_lookaside", "probe %X success", key->slice32(0));
783 if (eblock) {
784 *eblock = b;
785 }
786 return 1;
787 }
788 }
789 b = b->link.next;
790 }
791 DDebug("dir_lookaside", "probe %X failed", key->slice32(0));
792 return 0;
793 }
794
795 int
dir_lookaside_insert(EvacuationBlock * eblock,Vol * d,Dir * to)796 dir_lookaside_insert(EvacuationBlock *eblock, Vol *d, Dir *to)
797 {
798 CacheKey *key = &eblock->evac_frags.earliest_key;
799 DDebug("dir_lookaside", "insert %X %X, offset %d phase %d", key->slice32(0), key->slice32(1), (int)dir_offset(to),
800 (int)dir_phase(to));
801 ink_assert(d->mutex->thread_holding == this_ethread());
802 int i = key->slice32(3) % LOOKASIDE_SIZE;
803 EvacuationBlock *b = new_EvacuationBlock(d->mutex->thread_holding);
804 b->evac_frags.key = *key;
805 b->evac_frags.earliest_key = *key;
806 b->earliest_evacuator = eblock->earliest_evacuator;
807 ink_assert(b->earliest_evacuator);
808 b->dir = eblock->dir;
809 b->new_dir = *to;
810 d->lookaside[i].push(b);
811 return 1;
812 }
813
814 int
dir_lookaside_fixup(const CacheKey * key,Vol * d)815 dir_lookaside_fixup(const CacheKey *key, Vol *d)
816 {
817 ink_assert(d->mutex->thread_holding == this_ethread());
818 int i = key->slice32(3) % LOOKASIDE_SIZE;
819 EvacuationBlock *b = d->lookaside[i].head;
820 while (b) {
821 if (b->evac_frags.key == *key) {
822 int res = dir_overwrite(key, d, &b->new_dir, &b->dir, false);
823 DDebug("dir_lookaside", "fixup %X %X offset %" PRId64 " phase %d %d", key->slice32(0), key->slice32(1),
824 dir_offset(&b->new_dir), dir_phase(&b->new_dir), res);
825 int64_t o = dir_offset(&b->dir), n = dir_offset(&b->new_dir);
826 d->ram_cache->fixup(key, static_cast<uint32_t>(o >> 32), static_cast<uint32_t>(o), static_cast<uint32_t>(n >> 32),
827 static_cast<uint32_t>(n));
828 d->lookaside[i].remove(b);
829 free_EvacuationBlock(b, d->mutex->thread_holding);
830 return res;
831 }
832 b = b->link.next;
833 }
834 DDebug("dir_lookaside", "fixup %X %X failed", key->slice32(0), key->slice32(1));
835 return 0;
836 }
837
838 void
dir_lookaside_cleanup(Vol * d)839 dir_lookaside_cleanup(Vol *d)
840 {
841 ink_assert(d->mutex->thread_holding == this_ethread());
842 for (auto &i : d->lookaside) {
843 EvacuationBlock *b = i.head;
844 while (b) {
845 if (!dir_valid(d, &b->new_dir)) {
846 EvacuationBlock *nb = b->link.next;
847 DDebug("dir_lookaside", "cleanup %X %X cleaned up", b->evac_frags.earliest_key.slice32(0),
848 b->evac_frags.earliest_key.slice32(1));
849 i.remove(b);
850 free_CacheVC(b->earliest_evacuator);
851 free_EvacuationBlock(b, d->mutex->thread_holding);
852 b = nb;
853 goto Lagain;
854 }
855 b = b->link.next;
856 Lagain:;
857 }
858 }
859 }
860
861 void
dir_lookaside_remove(const CacheKey * key,Vol * d)862 dir_lookaside_remove(const CacheKey *key, Vol *d)
863 {
864 ink_assert(d->mutex->thread_holding == this_ethread());
865 int i = key->slice32(3) % LOOKASIDE_SIZE;
866 EvacuationBlock *b = d->lookaside[i].head;
867 while (b) {
868 if (b->evac_frags.key == *key) {
869 DDebug("dir_lookaside", "remove %X %X offset %" PRId64 " phase %d", key->slice32(0), key->slice32(1), dir_offset(&b->new_dir),
870 dir_phase(&b->new_dir));
871 d->lookaside[i].remove(b);
872 free_EvacuationBlock(b, d->mutex->thread_holding);
873 return;
874 }
875 b = b->link.next;
876 }
877 DDebug("dir_lookaside", "remove %X %X failed", key->slice32(0), key->slice32(1));
878 return;
879 }
880
881 // Cache Sync
882 //
883
884 void
dir_sync_init()885 dir_sync_init()
886 {
887 cacheDirSync = new CacheSync;
888 cacheDirSync->trigger = eventProcessor.schedule_in(cacheDirSync, HRTIME_SECONDS(cache_config_dir_sync_frequency));
889 }
890
891 void
aio_write(int fd,char * b,int n,off_t o)892 CacheSync::aio_write(int fd, char *b, int n, off_t o)
893 {
894 io.aiocb.aio_fildes = fd;
895 io.aiocb.aio_offset = o;
896 io.aiocb.aio_nbytes = n;
897 io.aiocb.aio_buf = b;
898 io.action = this;
899 io.thread = AIO_CALLBACK_THREAD_ANY;
900 ink_assert(ink_aio_write(&io) >= 0);
901 }
902
903 uint64_t
dir_entries_used(Vol * d)904 dir_entries_used(Vol *d)
905 {
906 uint64_t full = 0;
907 uint64_t sfull = 0;
908 for (int s = 0; s < d->segments; full += sfull, s++) {
909 Dir *seg = d->dir_segment(s);
910 sfull = 0;
911 for (int b = 0; b < d->buckets; b++) {
912 Dir *e = dir_bucket(b, seg);
913 if (dir_bucket_loop_fix(e, s, d)) {
914 sfull = 0;
915 break;
916 }
917 while (e) {
918 if (dir_offset(e)) {
919 sfull++;
920 }
921 e = next_dir(e, seg);
922 if (!e) {
923 break;
924 }
925 }
926 }
927 }
928 return full;
929 }
930
931 /*
932 * this function flushes the cache meta data to disk when
933 * the cache is shutdown. Must *NOT* be used during regular
934 * operation.
935 */
936
937 void
sync_cache_dir_on_shutdown()938 sync_cache_dir_on_shutdown()
939 {
940 Debug("cache_dir_sync", "sync started");
941 char *buf = nullptr;
942 size_t buflen = 0;
943 bool buf_huge = false;
944
945 EThread *t = (EThread *)0xdeadbeef;
946 for (int i = 0; i < gnvol; i++) {
947 // the process is going down, do a blocking call
948 // dont release the volume's lock, there could
949 // be another aggWrite in progress
950 MUTEX_TAKE_LOCK(gvol[i]->mutex, t);
951 Vol *d = gvol[i];
952
953 if (DISK_BAD(d->disk)) {
954 Debug("cache_dir_sync", "Dir %s: ignoring -- bad disk", d->hash_text.get());
955 continue;
956 }
957 size_t dirlen = d->dirlen();
958 ink_assert(dirlen > 0); // make clang happy - if not > 0 the vol is seriously messed up
959 if (!d->header->dirty && !d->dir_sync_in_progress) {
960 Debug("cache_dir_sync", "Dir %s: ignoring -- not dirty", d->hash_text.get());
961 continue;
962 }
963 // recompute hit_evacuate_window
964 d->hit_evacuate_window = (d->data_blocks * cache_config_hit_evacuate_percent) / 100;
965
966 // check if we have data in the agg buffer
967 // dont worry about the cachevc s in the agg queue
968 // directories have not been inserted for these writes
969 if (d->agg_buf_pos) {
970 Debug("cache_dir_sync", "Dir %s: flushing agg buffer first", d->hash_text.get());
971
972 // set write limit
973 d->header->agg_pos = d->header->write_pos + d->agg_buf_pos;
974
975 int r = pwrite(d->fd, d->agg_buffer, d->agg_buf_pos, d->header->write_pos);
976 if (r != d->agg_buf_pos) {
977 ink_assert(!"flushing agg buffer failed");
978 continue;
979 }
980 d->header->last_write_pos = d->header->write_pos;
981 d->header->write_pos += d->agg_buf_pos;
982 ink_assert(d->header->write_pos == d->header->agg_pos);
983 d->agg_buf_pos = 0;
984 d->header->write_serial++;
985 }
986
987 if (buflen < dirlen) {
988 if (buf) {
989 if (buf_huge) {
990 ats_free_hugepage(buf, buflen);
991 } else {
992 ats_free(buf);
993 }
994 buf = nullptr;
995 }
996 buflen = dirlen;
997 if (ats_hugepage_enabled()) {
998 buf = static_cast<char *>(ats_alloc_hugepage(buflen));
999 buf_huge = true;
1000 }
1001 if (buf == nullptr) {
1002 buf = static_cast<char *>(ats_memalign(ats_pagesize(), buflen));
1003 buf_huge = false;
1004 }
1005 }
1006
1007 if (!d->dir_sync_in_progress) {
1008 d->header->sync_serial++;
1009 } else {
1010 Debug("cache_dir_sync", "Periodic dir sync in progress -- overwriting");
1011 }
1012 d->footer->sync_serial = d->header->sync_serial;
1013
1014 CHECK_DIR(d);
1015 memcpy(buf, d->raw_dir, dirlen);
1016 size_t B = d->header->sync_serial & 1;
1017 off_t start = d->skip + (B ? dirlen : 0);
1018 B = pwrite(d->fd, buf, dirlen, start);
1019 ink_assert(B == dirlen);
1020 Debug("cache_dir_sync", "done syncing dir for vol %s", d->hash_text.get());
1021 }
1022 Debug("cache_dir_sync", "sync done");
1023 if (buf) {
1024 if (buf_huge) {
1025 ats_free_hugepage(buf, buflen);
1026 } else {
1027 ats_free(buf);
1028 }
1029 buf = nullptr;
1030 }
1031 }
1032
1033 int
mainEvent(int event,Event * e)1034 CacheSync::mainEvent(int event, Event *e)
1035 {
1036 if (trigger) {
1037 trigger->cancel_action();
1038 trigger = nullptr;
1039 }
1040
1041 Lrestart:
1042 if (vol_idx >= gnvol) {
1043 vol_idx = 0;
1044 if (buf) {
1045 if (buf_huge) {
1046 ats_free_hugepage(buf, buflen);
1047 } else {
1048 ats_free(buf);
1049 }
1050 buflen = 0;
1051 buf = nullptr;
1052 buf_huge = false;
1053 }
1054 Debug("cache_dir_sync", "sync done");
1055 if (event == EVENT_INTERVAL) {
1056 trigger = e->ethread->schedule_in(this, HRTIME_SECONDS(cache_config_dir_sync_frequency));
1057 } else {
1058 trigger = eventProcessor.schedule_in(this, HRTIME_SECONDS(cache_config_dir_sync_frequency));
1059 }
1060 return EVENT_CONT;
1061 }
1062
1063 Vol *vol = gvol[vol_idx]; // must be named "vol" to make STAT macros work.
1064
1065 if (event == AIO_EVENT_DONE) {
1066 // AIO Thread
1067 if (io.aio_result != static_cast<int64_t>(io.aiocb.aio_nbytes)) {
1068 Warning("vol write error during directory sync '%s'", gvol[vol_idx]->hash_text.get());
1069 event = EVENT_NONE;
1070 goto Ldone;
1071 }
1072 CACHE_SUM_DYN_STAT(cache_directory_sync_bytes_stat, io.aio_result);
1073
1074 trigger = eventProcessor.schedule_in(this, SYNC_DELAY);
1075 return EVENT_CONT;
1076 }
1077 {
1078 CACHE_TRY_LOCK(lock, gvol[vol_idx]->mutex, mutex->thread_holding);
1079 if (!lock.is_locked()) {
1080 trigger = eventProcessor.schedule_in(this, HRTIME_MSECONDS(cache_config_mutex_retry_delay));
1081 return EVENT_CONT;
1082 }
1083
1084 if (!vol->dir_sync_in_progress) {
1085 start_time = Thread::get_hrtime();
1086 }
1087
1088 // recompute hit_evacuate_window
1089 vol->hit_evacuate_window = (vol->data_blocks * cache_config_hit_evacuate_percent) / 100;
1090
1091 if (DISK_BAD(vol->disk)) {
1092 goto Ldone;
1093 }
1094
1095 int headerlen = ROUND_TO_STORE_BLOCK(sizeof(VolHeaderFooter));
1096 size_t dirlen = vol->dirlen();
1097 if (!writepos) {
1098 // start
1099 Debug("cache_dir_sync", "sync started");
1100 /* Don't sync the directory to disk if its not dirty. Syncing the
1101 clean directory to disk is also the cause of INKqa07151. Increasing
1102 the serial serial causes the cache to recover more data
1103 than necessary.
1104 The dirty bit it set in dir_insert, dir_overwrite and dir_delete_entry
1105 */
1106 if (!vol->header->dirty) {
1107 Debug("cache_dir_sync", "Dir %s not dirty", vol->hash_text.get());
1108 goto Ldone;
1109 }
1110 if (vol->is_io_in_progress() || vol->agg_buf_pos) {
1111 Debug("cache_dir_sync", "Dir %s: waiting for agg buffer", vol->hash_text.get());
1112 vol->dir_sync_waiting = true;
1113 if (!vol->is_io_in_progress()) {
1114 vol->aggWrite(EVENT_IMMEDIATE, nullptr);
1115 }
1116 return EVENT_CONT;
1117 }
1118 Debug("cache_dir_sync", "pos: %" PRIu64 " Dir %s dirty...syncing to disk", vol->header->write_pos, vol->hash_text.get());
1119 vol->header->dirty = 0;
1120 if (buflen < dirlen) {
1121 if (buf) {
1122 if (buf_huge) {
1123 ats_free_hugepage(buf, buflen);
1124 } else {
1125 ats_free(buf);
1126 }
1127 buf = nullptr;
1128 }
1129 buflen = dirlen;
1130 if (ats_hugepage_enabled()) {
1131 buf = static_cast<char *>(ats_alloc_hugepage(buflen));
1132 buf_huge = true;
1133 }
1134 if (buf == nullptr) {
1135 buf = static_cast<char *>(ats_memalign(ats_pagesize(), buflen));
1136 buf_huge = false;
1137 }
1138 }
1139 vol->header->sync_serial++;
1140 vol->footer->sync_serial = vol->header->sync_serial;
1141 CHECK_DIR(d);
1142 memcpy(buf, vol->raw_dir, dirlen);
1143 vol->dir_sync_in_progress = true;
1144 }
1145 size_t B = vol->header->sync_serial & 1;
1146 off_t start = vol->skip + (B ? dirlen : 0);
1147
1148 if (!writepos) {
1149 // write header
1150 aio_write(vol->fd, buf + writepos, headerlen, start + writepos);
1151 writepos += headerlen;
1152 } else if (writepos < static_cast<off_t>(dirlen) - headerlen) {
1153 // write part of body
1154 int l = SYNC_MAX_WRITE;
1155 if (writepos + l > static_cast<off_t>(dirlen) - headerlen) {
1156 l = dirlen - headerlen - writepos;
1157 }
1158 aio_write(vol->fd, buf + writepos, l, start + writepos);
1159 writepos += l;
1160 } else if (writepos < static_cast<off_t>(dirlen)) {
1161 ink_assert(writepos == (off_t)dirlen - headerlen);
1162 // write footer
1163 aio_write(vol->fd, buf + writepos, headerlen, start + writepos);
1164 writepos += headerlen;
1165 } else {
1166 vol->dir_sync_in_progress = false;
1167 CACHE_INCREMENT_DYN_STAT(cache_directory_sync_count_stat);
1168 CACHE_SUM_DYN_STAT(cache_directory_sync_time_stat, Thread::get_hrtime() - start_time);
1169 start_time = 0;
1170 goto Ldone;
1171 }
1172 return EVENT_CONT;
1173 }
1174 Ldone:
1175 // done
1176 writepos = 0;
1177 ++vol_idx;
1178 goto Lrestart;
1179 }
1180
1181 namespace
1182 {
1183 int
compare_ushort(void const * a,void const * b)1184 compare_ushort(void const *a, void const *b)
1185 {
1186 return *static_cast<unsigned short const *>(a) - *static_cast<unsigned short const *>(b);
1187 }
1188 } // namespace
1189
1190 //
1191 // Check
1192 //
1193
1194 int
dir_check(bool)1195 Vol::dir_check(bool /* fix ATS_UNUSED */) // TODO: we should eliminate this parameter ?
1196 {
1197 static int const SEGMENT_HISTOGRAM_WIDTH = 16;
1198 int hist[SEGMENT_HISTOGRAM_WIDTH + 1] = {0};
1199 unsigned short chain_tag[MAX_ENTRIES_PER_SEGMENT];
1200 int32_t chain_mark[MAX_ENTRIES_PER_SEGMENT];
1201 uint64_t total_buckets = buckets * segments;
1202 uint64_t total_entries = total_buckets * DIR_DEPTH;
1203 int frag_demographics[1 << DIR_SIZE_WIDTH][DIR_BLOCK_SIZES];
1204
1205 int j;
1206 int stale = 0, in_use = 0, empty = 0;
1207 int free = 0, head = 0, buckets_in_use = 0;
1208
1209 int max_chain_length = 0;
1210 int64_t bytes_in_use = 0;
1211
1212 ink_zero(frag_demographics);
1213
1214 printf("Stripe '[%s]'\n", hash_text.get());
1215 printf(" Directory Bytes: %" PRIu64 "\n", total_buckets * SIZEOF_DIR);
1216 printf(" Segments: %d\n", segments);
1217 printf(" Buckets per segment: %" PRIu64 "\n", buckets);
1218 printf(" Entries: %" PRIu64 "\n", total_entries);
1219
1220 for (int s = 0; s < segments; s++) {
1221 Dir *seg = this->dir_segment(s);
1222 int seg_chain_max = 0;
1223 int seg_empty = 0;
1224 int seg_in_use = 0;
1225 int seg_stale = 0;
1226 int seg_bytes_in_use = 0;
1227 int seg_dups = 0;
1228 int seg_buckets_in_use = 0;
1229
1230 ink_zero(chain_tag);
1231 memset(chain_mark, -1, sizeof(chain_mark));
1232
1233 for (int b = 0; b < buckets; b++) {
1234 Dir *root = dir_bucket(b, seg);
1235 int h = 0; // chain length starting in this bucket
1236
1237 // Walk the chain starting in this bucket
1238 int chain_idx = 0;
1239 int mark = 0;
1240 ++seg_buckets_in_use;
1241 for (Dir *e = root; e; e = next_dir(e, seg)) {
1242 if (!dir_offset(e)) {
1243 ++seg_empty;
1244 --seg_buckets_in_use;
1245 // this should only happen on the first dir in a bucket
1246 ink_assert(nullptr == next_dir(e, seg));
1247 break;
1248 } else {
1249 int e_idx = e - seg;
1250 ++h;
1251 chain_tag[chain_idx++] = dir_tag(e);
1252 if (chain_mark[e_idx] == mark) {
1253 printf(" - Cycle of length %d detected for bucket %d\n", h, b);
1254 } else if (chain_mark[e_idx] >= 0) {
1255 printf(" - Entry %d is in chain %d and %d", e_idx, chain_mark[e_idx], mark);
1256 } else {
1257 chain_mark[e_idx] = mark;
1258 }
1259
1260 if (!dir_valid(this, e)) {
1261 ++seg_stale;
1262 } else {
1263 uint64_t size = dir_approx_size(e);
1264 if (dir_head(e)) {
1265 ++head;
1266 }
1267 ++seg_in_use;
1268 seg_bytes_in_use += size;
1269 ++frag_demographics[dir_size(e)][dir_big(e)];
1270 }
1271 }
1272 }
1273
1274 // Check for duplicates (identical tags in the same bucket).
1275 if (h > 1) {
1276 unsigned short last;
1277 qsort(chain_tag, h, sizeof(chain_tag[0]), &compare_ushort);
1278 last = chain_tag[0];
1279 for (int k = 1; k < h; ++k) {
1280 if (last == chain_tag[k]) {
1281 ++seg_dups;
1282 }
1283 last = chain_tag[k];
1284 }
1285 }
1286
1287 ++hist[std::min(h, SEGMENT_HISTOGRAM_WIDTH)];
1288 seg_chain_max = std::max(seg_chain_max, h);
1289 }
1290 int fl_size = dir_freelist_length(this, s);
1291 in_use += seg_in_use;
1292 empty += seg_empty;
1293 stale += seg_stale;
1294 free += fl_size;
1295 buckets_in_use += seg_buckets_in_use;
1296 max_chain_length = std::max(max_chain_length, seg_chain_max);
1297 bytes_in_use += seg_bytes_in_use;
1298
1299 printf(" - Segment-%d | Entries: used=%d stale=%d free=%d disk-bytes=%d Buckets: used=%d empty=%d max=%d avg=%.2f dups=%d\n",
1300 s, seg_in_use, seg_stale, fl_size, seg_bytes_in_use, seg_buckets_in_use, seg_empty, seg_chain_max,
1301 seg_buckets_in_use ? static_cast<float>(seg_in_use + seg_stale) / seg_buckets_in_use : 0.0, seg_dups);
1302 }
1303
1304 printf(" - Stripe | Entries: in-use=%d stale=%d free=%d Buckets: empty=%d max=%d avg=%.2f\n", in_use, stale, free, empty,
1305 max_chain_length, buckets_in_use ? static_cast<float>(in_use + stale) / buckets_in_use : 0);
1306
1307 printf(" Chain lengths: ");
1308 for (j = 0; j < SEGMENT_HISTOGRAM_WIDTH; ++j) {
1309 printf(" %d=%d ", j, hist[j]);
1310 }
1311 printf(" %d>=%d\n", SEGMENT_HISTOGRAM_WIDTH, hist[SEGMENT_HISTOGRAM_WIDTH]);
1312
1313 char tt[256];
1314 printf(" Total Size: %" PRIu64 "\n", static_cast<uint64_t>(len));
1315 printf(" Bytes in Use: %" PRIu64 " [%0.2f%%]\n", bytes_in_use, 100.0 * (static_cast<float>(bytes_in_use) / len));
1316 printf(" Objects: %d\n", head);
1317 printf(" Average Size: %" PRIu64 "\n", head ? (bytes_in_use / head) : 0);
1318 printf(" Average Frags: %.2f\n", head ? static_cast<float>(in_use) / head : 0);
1319 printf(" Write Position: %" PRIu64 "\n", header->write_pos - start);
1320 printf(" Wrap Count: %d\n", header->cycle);
1321 printf(" Phase: %s\n", header->phase ? "true" : "false");
1322 ink_ctime_r(&header->create_time, tt);
1323 tt[strlen(tt) - 1] = 0;
1324 printf(" Sync Serial: %u\n", header->sync_serial);
1325 printf(" Write Serial: %u\n", header->write_serial);
1326 printf(" Create Time: %s\n", tt);
1327 printf("\n");
1328 printf(" Fragment size demographics\n");
1329 for (int b = 0; b < DIR_BLOCK_SIZES; ++b) {
1330 int block_size = DIR_BLOCK_SIZE(b);
1331 int s = 0;
1332 while (s < 1 << DIR_SIZE_WIDTH) {
1333 for (int j = 0; j < 8; ++j, ++s) {
1334 // The size markings are redundant. Low values (less than DIR_SHIFT_WIDTH) for larger
1335 // base block sizes should never be used. Such entries should use the next smaller base block size.
1336 if (b > 0 && s < 1 << DIR_BLOCK_SHIFT(1)) {
1337 ink_assert(frag_demographics[s][b] == 0);
1338 continue;
1339 }
1340 printf(" %8d[%2d:%1d]:%06d", (s + 1) * block_size, s, b, frag_demographics[s][b]);
1341 }
1342 printf("\n");
1343 }
1344 }
1345 printf("\n");
1346
1347 return 0;
1348 }
1349
1350 //
1351 // Static Tables
1352 //
1353
1354 // permutation table
1355 uint8_t CacheKey_next_table[256] = {
1356 21, 53, 167, 51, 255, 126, 241, 151, 115, 66, 155, 174, 226, 215, 80, 188, 12, 95, 8, 24, 162, 201, 46, 104, 79, 172,
1357 39, 68, 56, 144, 142, 217, 101, 62, 14, 108, 120, 90, 61, 47, 132, 199, 110, 166, 83, 125, 57, 65, 19, 130, 148, 116,
1358 228, 189, 170, 1, 71, 0, 252, 184, 168, 177, 88, 229, 242, 237, 183, 55, 13, 212, 240, 81, 211, 74, 195, 205, 147, 93,
1359 30, 87, 86, 63, 135, 102, 233, 106, 118, 163, 107, 10, 243, 136, 160, 119, 43, 161, 206, 141, 203, 78, 175, 36, 37, 140,
1360 224, 197, 185, 196, 248, 84, 122, 73, 152, 157, 18, 225, 219, 145, 45, 2, 171, 249, 173, 32, 143, 137, 69, 41, 35, 89,
1361 33, 98, 179, 214, 114, 231, 251, 123, 180, 194, 29, 3, 178, 31, 192, 164, 15, 234, 26, 230, 91, 156, 5, 16, 23, 244,
1362 58, 50, 4, 67, 134, 165, 60, 235, 250, 7, 138, 216, 49, 139, 191, 154, 11, 52, 239, 59, 111, 245, 9, 64, 25, 129,
1363 247, 232, 190, 246, 109, 22, 112, 210, 221, 181, 92, 169, 48, 100, 193, 77, 103, 133, 70, 220, 207, 223, 176, 204, 76, 186,
1364 200, 208, 158, 182, 227, 222, 131, 38, 187, 238, 6, 34, 253, 128, 146, 44, 94, 127, 105, 153, 113, 20, 27, 124, 159, 17,
1365 72, 218, 96, 149, 213, 42, 28, 254, 202, 40, 117, 82, 97, 209, 54, 236, 121, 75, 85, 150, 99, 198,
1366 };
1367
1368 // permutation table
1369 uint8_t CacheKey_prev_table[256] = {
1370 57, 55, 119, 141, 158, 152, 218, 165, 18, 178, 89, 172, 16, 68, 34, 146, 153, 233, 114, 48, 229, 0, 187, 154, 19, 180,
1371 148, 230, 240, 140, 78, 143, 123, 130, 219, 128, 101, 102, 215, 26, 243, 127, 239, 94, 223, 118, 22, 39, 194, 168, 157, 3,
1372 173, 1, 248, 67, 28, 46, 156, 175, 162, 38, 33, 81, 179, 47, 9, 159, 27, 126, 200, 56, 234, 111, 73, 251, 206, 197,
1373 99, 24, 14, 71, 245, 44, 109, 252, 80, 79, 62, 129, 37, 150, 192, 77, 224, 17, 236, 246, 131, 254, 195, 32, 83, 198,
1374 23, 226, 85, 88, 35, 186, 42, 176, 188, 228, 134, 8, 51, 244, 86, 93, 36, 250, 110, 137, 231, 45, 5, 225, 221, 181,
1375 49, 214, 40, 199, 160, 82, 91, 125, 166, 169, 103, 97, 30, 124, 29, 117, 222, 76, 50, 237, 253, 7, 112, 227, 171, 10,
1376 151, 113, 210, 232, 92, 95, 20, 87, 145, 161, 43, 2, 60, 193, 54, 120, 25, 122, 11, 100, 204, 61, 142, 132, 138, 191,
1377 211, 66, 59, 106, 207, 216, 15, 53, 184, 170, 144, 196, 139, 74, 107, 105, 255, 41, 208, 21, 242, 98, 205, 75, 96, 202,
1378 209, 247, 189, 72, 69, 238, 133, 13, 167, 31, 235, 116, 201, 190, 213, 203, 104, 115, 12, 212, 52, 63, 149, 135, 183, 84,
1379 147, 163, 249, 65, 217, 174, 70, 6, 64, 90, 155, 177, 185, 182, 108, 121, 164, 136, 58, 220, 241, 4,
1380 };
1381
1382 //
1383 // Regression
1384 //
1385 unsigned int regress_rand_seed = 0;
1386 void
regress_rand_init(unsigned int i)1387 regress_rand_init(unsigned int i)
1388 {
1389 regress_rand_seed = i;
1390 }
1391
1392 static void
regress_rand_CacheKey(const CacheKey * key)1393 regress_rand_CacheKey(const CacheKey *key)
1394 {
1395 unsigned int *x = (unsigned int *)key;
1396 for (int i = 0; i < 4; i++) {
1397 x[i] = next_rand(®ress_rand_seed);
1398 }
1399 }
1400
1401 void
dir_corrupt_bucket(Dir * b,int s,Vol * d)1402 dir_corrupt_bucket(Dir *b, int s, Vol *d)
1403 {
1404 // coverity[dont_call]
1405 int l = (static_cast<int>(dir_bucket_length(b, s, d) * drand48()));
1406 Dir *e = b;
1407 Dir *seg = d->dir_segment(s);
1408 for (int i = 0; i < l; i++) {
1409 ink_release_assert(e);
1410 e = next_dir(e, seg);
1411 }
1412 ink_release_assert(e);
1413 dir_set_next(e, dir_to_offset(e, seg));
1414 }
1415
EXCLUSIVE_REGRESSION_TEST(Cache_dir)1416 EXCLUSIVE_REGRESSION_TEST(Cache_dir)(RegressionTest *t, int /* atype ATS_UNUSED */, int *status)
1417 {
1418 ink_hrtime ttime;
1419 int ret = REGRESSION_TEST_PASSED;
1420
1421 if ((CacheProcessor::IsCacheEnabled() != CACHE_INITIALIZED) || gnvol < 1) {
1422 rprintf(t, "cache not ready/configured");
1423 *status = REGRESSION_TEST_FAILED;
1424 return;
1425 }
1426 Vol *d = gvol[0];
1427 EThread *thread = this_ethread();
1428 MUTEX_TRY_LOCK(lock, d->mutex, thread);
1429 ink_release_assert(lock.is_locked());
1430 rprintf(t, "clearing vol 0\n", free);
1431 vol_dir_clear(d);
1432
1433 // coverity[var_decl]
1434 Dir dir;
1435 dir_clear(&dir);
1436 dir_set_phase(&dir, 0);
1437 dir_set_head(&dir, true);
1438 dir_set_offset(&dir, 1);
1439
1440 d->header->agg_pos = d->header->write_pos += 1024;
1441
1442 CacheKey key;
1443 rand_CacheKey(&key, thread->mutex);
1444
1445 int s = key.slice32(0) % d->segments, i, j;
1446 Dir *seg = d->dir_segment(s);
1447
1448 // test insert
1449 rprintf(t, "insert test\n", free);
1450 int inserted = 0;
1451 int free = dir_freelist_length(d, s);
1452 int n = free;
1453 rprintf(t, "free: %d\n", free);
1454 while (n--) {
1455 if (!dir_insert(&key, d, &dir)) {
1456 break;
1457 }
1458 inserted++;
1459 }
1460 rprintf(t, "inserted: %d\n", inserted);
1461 if (static_cast<unsigned int>(inserted - free) > 1) {
1462 ret = REGRESSION_TEST_FAILED;
1463 }
1464
1465 // test delete
1466 rprintf(t, "delete test\n");
1467 for (i = 0; i < d->buckets; i++) {
1468 for (j = 0; j < DIR_DEPTH; j++) {
1469 dir_set_offset(dir_bucket_row(dir_bucket(i, seg), j), 0); // delete
1470 }
1471 }
1472 dir_clean_segment(s, d);
1473 int newfree = dir_freelist_length(d, s);
1474 rprintf(t, "newfree: %d\n", newfree);
1475 if (static_cast<unsigned int>(newfree - free) > 1) {
1476 ret = REGRESSION_TEST_FAILED;
1477 }
1478
1479 // test insert-delete
1480 rprintf(t, "insert-delete test\n");
1481 regress_rand_init(13);
1482 ttime = Thread::get_hrtime_updated();
1483 for (i = 0; i < newfree; i++) {
1484 regress_rand_CacheKey(&key);
1485 dir_insert(&key, d, &dir);
1486 }
1487 uint64_t us = (Thread::get_hrtime_updated() - ttime) / HRTIME_USECOND;
1488 // On windows us is sometimes 0. I don't know why.
1489 // printout the insert rate only if its not 0
1490 if (us) {
1491 rprintf(t, "insert rate = %d / second\n", static_cast<int>((newfree * static_cast<uint64_t>(1000000)) / us));
1492 }
1493 regress_rand_init(13);
1494 ttime = Thread::get_hrtime_updated();
1495 for (i = 0; i < newfree; i++) {
1496 Dir *last_collision = nullptr;
1497 regress_rand_CacheKey(&key);
1498 if (!dir_probe(&key, d, &dir, &last_collision)) {
1499 ret = REGRESSION_TEST_FAILED;
1500 }
1501 }
1502 us = (Thread::get_hrtime_updated() - ttime) / HRTIME_USECOND;
1503 // On windows us is sometimes 0. I don't know why.
1504 // printout the probe rate only if its not 0
1505 if (us) {
1506 rprintf(t, "probe rate = %d / second\n", static_cast<int>((newfree * static_cast<uint64_t>(1000000)) / us));
1507 }
1508
1509 for (int c = 0; c < d->direntries() * 0.75; c++) {
1510 regress_rand_CacheKey(&key);
1511 dir_insert(&key, d, &dir);
1512 }
1513
1514 Dir dir1;
1515 memset(static_cast<void *>(&dir1), 0, sizeof(dir1));
1516 int s1, b1;
1517
1518 rprintf(t, "corrupt_bucket test\n");
1519 for (int ntimes = 0; ntimes < 10; ntimes++) {
1520 #ifdef LOOP_CHECK_MODE
1521 // dir_probe in bucket with loop
1522 rand_CacheKey(&key, thread->mutex);
1523 s1 = key.slice32(0) % d->segments;
1524 b1 = key.slice32(1) % d->buckets;
1525 dir_corrupt_bucket(dir_bucket(b1, d->dir_segment(s1)), s1, d);
1526 dir_insert(&key, d, &dir);
1527 Dir *last_collision = 0;
1528 dir_probe(&key, d, &dir, &last_collision);
1529
1530 rand_CacheKey(&key, thread->mutex);
1531 s1 = key.slice32(0) % d->segments;
1532 b1 = key.slice32(1) % d->buckets;
1533 dir_corrupt_bucket(dir_bucket(b1, d->dir_segment(s1)), s1, d);
1534
1535 last_collision = 0;
1536 dir_probe(&key, d, &dir, &last_collision);
1537
1538 // dir_overwrite in bucket with loop
1539 rand_CacheKey(&key, thread->mutex);
1540 s1 = key.slice32(0) % d->segments;
1541 b1 = key.slice32(1) % d->buckets;
1542 CacheKey key1;
1543 key1.b[1] = 127;
1544 dir1 = dir;
1545 dir_set_offset(&dir1, 23);
1546 dir_insert(&key1, d, &dir1);
1547 dir_insert(&key, d, &dir);
1548 key1.b[1] = 80;
1549 dir_insert(&key1, d, &dir1);
1550 dir_corrupt_bucket(dir_bucket(b1, d->dir_segment(s1)), s1, d);
1551 dir_overwrite(&key, d, &dir, &dir, 1);
1552
1553 rand_CacheKey(&key, thread->mutex);
1554 s1 = key.slice32(0) % d->segments;
1555 b1 = key.slice32(1) % d->buckets;
1556 key.b[1] = 23;
1557 dir_insert(&key, d, &dir1);
1558 dir_corrupt_bucket(dir_bucket(b1, d->dir_segment(s1)), s1, d);
1559 dir_overwrite(&key, d, &dir, &dir, 0);
1560
1561 rand_CacheKey(&key, thread->mutex);
1562 s1 = key.slice32(0) % d->segments;
1563 Dir *seg1 = d->dir_segment(s1);
1564 // dir_freelist_length in freelist with loop
1565 dir_corrupt_bucket(dir_from_offset(d->header->freelist[s], seg1), s1, d);
1566 dir_freelist_length(d, s1);
1567
1568 rand_CacheKey(&key, thread->mutex);
1569 s1 = key.slice32(0) % d->segments;
1570 b1 = key.slice32(1) % d->buckets;
1571 // dir_bucket_length in bucket with loop
1572 dir_corrupt_bucket(dir_bucket(b1, d->dir_segment(s1)), s1, d);
1573 dir_bucket_length(dir_bucket(b1, d->dir_segment(s1)), s1, d);
1574 if (!check_dir(d))
1575 ret = REGRESSION_TEST_FAILED;
1576 #else
1577 // test corruption detection
1578 rand_CacheKey(&key, thread->mutex);
1579 s1 = key.slice32(0) % d->segments;
1580 b1 = key.slice32(1) % d->buckets;
1581
1582 dir_insert(&key, d, &dir1);
1583 dir_insert(&key, d, &dir1);
1584 dir_insert(&key, d, &dir1);
1585 dir_insert(&key, d, &dir1);
1586 dir_insert(&key, d, &dir1);
1587 dir_corrupt_bucket(dir_bucket(b1, d->dir_segment(s1)), s1, d);
1588 if (check_dir(d)) {
1589 ret = REGRESSION_TEST_FAILED;
1590 }
1591 #endif
1592 }
1593 vol_dir_clear(d);
1594 *status = ret;
1595 }
1596