1 /*-
2 * Copyright (c) 2006 Verdens Gang AS
3 * Copyright (c) 2006-2015 Varnish Software AS
4 * All rights reserved.
5 *
6 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
7 *
8 * SPDX-License-Identifier: BSD-2-Clause
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * This is the central hash-table code, it relies on a chosen hash
32 * implementation only for the actual hashing, all the housekeeping
33 * happens here.
34 *
35 * We have two kinds of structures, objecthead and object. An objecthead
36 * corresponds to a given (Host:, URL) tupple, and the objects hung from
37 * the objecthead may represent various variations (ie: Vary: header,
38 * different TTL etc) instances of that web-entity.
39 *
40 * Each objecthead has a mutex which locks both its own fields, the
41 * list of objects and fields in the objects.
42 *
43 * The hash implementation must supply a reference count facility on
44 * the objecthead, and return with a reference held after a lookup.
45 *
46 * Lookups in the hash implementation returns with a ref held and each
47 * object hung from the objhead holds a ref as well.
48 *
49 * Objects have refcounts which are locked by the objecthead mutex.
50 *
51 * New objects are always marked busy, and they can go from busy to
52 * not busy only once.
53 */
54
55 #include "config.h"
56
57 #include <stdio.h>
58 #include <stdlib.h>
59
60 #include "cache_varnishd.h"
61
62 #include "cache/cache_objhead.h"
63 #include "cache/cache_transport.h"
64
65 #include "hash/hash_slinger.h"
66
67 #include "vsha256.h"
68
69 struct rush {
70 unsigned magic;
71 #define RUSH_MAGIC 0xa1af5f01
72 VTAILQ_HEAD(,req) reqs;
73 };
74
75 static const struct hash_slinger *hash;
76 static struct objhead *private_oh;
77
78 static void hsh_rush1(const struct worker *, struct objhead *,
79 struct rush *, int);
80 static void hsh_rush2(struct worker *, struct rush *);
81 static int hsh_deref_objhead(struct worker *wrk, struct objhead **poh);
82 static int hsh_deref_objhead_unlock(struct worker *wrk, struct objhead **poh,
83 int);
84
85 /*---------------------------------------------------------------------*/
86
87 #define VCF_RETURN(x) const struct vcf_return VCF_##x[1] = { \
88 { .name = #x, } \
89 };
90
VCF_RETURNS()91 VCF_RETURNS()
92 #undef VCF_RETURN
93
94 /*---------------------------------------------------------------------*/
95
96 static struct objhead *
97 hsh_newobjhead(void)
98 {
99 struct objhead *oh;
100
101 ALLOC_OBJ(oh, OBJHEAD_MAGIC);
102 XXXAN(oh);
103 oh->refcnt = 1;
104 VTAILQ_INIT(&oh->objcs);
105 VTAILQ_INIT(&oh->waitinglist);
106 Lck_New(&oh->mtx, lck_objhdr);
107 return (oh);
108 }
109
110 /*---------------------------------------------------------------------*/
111 /* Precreate an objhead and object for later use */
112 static void
hsh_prealloc(struct worker * wrk)113 hsh_prealloc(struct worker *wrk)
114 {
115
116 CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
117
118 if (wrk->nobjcore == NULL)
119 wrk->nobjcore = ObjNew(wrk);
120 CHECK_OBJ_NOTNULL(wrk->nobjcore, OBJCORE_MAGIC);
121
122 if (wrk->nobjhead == NULL) {
123 wrk->nobjhead = hsh_newobjhead();
124 wrk->stats->n_objecthead++;
125 }
126 CHECK_OBJ_NOTNULL(wrk->nobjhead, OBJHEAD_MAGIC);
127
128 if (hash->prep != NULL)
129 hash->prep(wrk);
130 }
131
132 /*---------------------------------------------------------------------*/
133
134 struct objcore *
HSH_Private(const struct worker * wrk)135 HSH_Private(const struct worker *wrk)
136 {
137 struct objcore *oc;
138
139 CHECK_OBJ_NOTNULL(private_oh, OBJHEAD_MAGIC);
140
141 oc = ObjNew(wrk);
142 AN(oc);
143 oc->refcnt = 1;
144 oc->objhead = private_oh;
145 oc->flags |= OC_F_PRIVATE;
146 Lck_Lock(&private_oh->mtx);
147 VTAILQ_INSERT_TAIL(&private_oh->objcs, oc, hsh_list);
148 private_oh->refcnt++;
149 Lck_Unlock(&private_oh->mtx);
150 return (oc);
151 }
152
153 /*---------------------------------------------------------------------*/
154 void
HSH_Cleanup(struct worker * wrk)155 HSH_Cleanup(struct worker *wrk)
156 {
157
158 if (wrk->nobjcore != NULL)
159 ObjDestroy(wrk, &wrk->nobjcore);
160
161 if (wrk->nobjhead != NULL) {
162 Lck_Delete(&wrk->nobjhead->mtx);
163 FREE_OBJ(wrk->nobjhead);
164 wrk->nobjhead = NULL;
165 wrk->stats->n_objecthead--;
166 }
167 if (wrk->nhashpriv != NULL) {
168 /* XXX: If needed, add slinger method for this */
169 free(wrk->nhashpriv);
170 wrk->nhashpriv = NULL;
171 }
172 }
173
174 void
HSH_DeleteObjHead(const struct worker * wrk,struct objhead * oh)175 HSH_DeleteObjHead(const struct worker *wrk, struct objhead *oh)
176 {
177
178 AZ(oh->refcnt);
179 assert(VTAILQ_EMPTY(&oh->objcs));
180 assert(VTAILQ_EMPTY(&oh->waitinglist));
181 Lck_Delete(&oh->mtx);
182 wrk->stats->n_objecthead--;
183 FREE_OBJ(oh);
184 }
185
186 void
HSH_AddString(struct req * req,void * ctx,const char * str)187 HSH_AddString(struct req *req, void *ctx, const char *str)
188 {
189
190 CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
191 AN(ctx);
192 if (str != NULL) {
193 VSHA256_Update(ctx, str, strlen(str));
194 VSLb(req->vsl, SLT_Hash, "%s", str);
195 } else
196 VSHA256_Update(ctx, &str, 1);
197 }
198
199 /*---------------------------------------------------------------------
200 * This is a debugging hack to enable testing of boundary conditions
201 * in the hash algorithm.
202 * We trap the first 9 different digests and translate them to different
203 * digests with edge bit conditions
204 */
205
206 static struct hsh_magiclist {
207 unsigned char was[VSHA256_LEN];
208 unsigned char now[VSHA256_LEN];
209 } hsh_magiclist[] = {
210 { .now = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
211 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
212 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
213 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
214 { .now = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
215 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
216 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
217 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 } },
218 { .now = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
219 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
220 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
221 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 } },
222 { .now = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
223 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
224 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
225 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40 } },
226 { .now = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
227 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
228 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
229 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 } },
230 { .now = { 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
231 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
232 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
233 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
234 { .now = { 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
235 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
236 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
237 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
238 { .now = { 0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
239 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
240 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
241 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
242 { .now = { 0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
243 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
244 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
245 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
246 };
247
248 #define HSH_NMAGIC (sizeof hsh_magiclist / sizeof hsh_magiclist[0])
249
250 static void
hsh_testmagic(void * result)251 hsh_testmagic(void *result)
252 {
253 size_t i, j;
254 static size_t nused = 0;
255
256 for (i = 0; i < nused; i++)
257 if (!memcmp(hsh_magiclist[i].was, result, VSHA256_LEN))
258 break;
259 if (i == nused && i < HSH_NMAGIC)
260 memcpy(hsh_magiclist[nused++].was, result, VSHA256_LEN);
261 if (i == nused)
262 return;
263 assert(i < HSH_NMAGIC);
264 fprintf(stderr, "HASHMAGIC: <");
265 for (j = 0; j < VSHA256_LEN; j++)
266 fprintf(stderr, "%02x", ((unsigned char*)result)[j]);
267 fprintf(stderr, "> -> <");
268 memcpy(result, hsh_magiclist[i].now, VSHA256_LEN);
269 for (j = 0; j < VSHA256_LEN; j++)
270 fprintf(stderr, "%02x", ((unsigned char*)result)[j]);
271 fprintf(stderr, ">\n");
272 }
273
274 /*---------------------------------------------------------------------
275 * Insert an object which magically appears out of nowhere or, more likely,
276 * comes off some persistent storage device.
277 * Insert it with a reference held.
278 */
279
280 void
HSH_Insert(struct worker * wrk,const void * digest,struct objcore * oc,struct ban * ban)281 HSH_Insert(struct worker *wrk, const void *digest, struct objcore *oc,
282 struct ban *ban)
283 {
284 struct objhead *oh;
285 struct rush rush;
286
287 CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
288 AN(digest);
289 CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
290 AN(ban);
291 AN(oc->flags & OC_F_BUSY);
292 AZ(oc->flags & OC_F_PRIVATE);
293 assert(oc->refcnt == 1);
294 INIT_OBJ(&rush, RUSH_MAGIC);
295
296 hsh_prealloc(wrk);
297
298 AN(wrk->nobjhead);
299 oh = hash->lookup(wrk, digest, &wrk->nobjhead);
300 CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
301 Lck_AssertHeld(&oh->mtx);
302 assert(oh->refcnt > 0);
303
304 /* Mark object busy and insert (precreated) objcore in
305 objecthead. The new object inherits our objhead reference. */
306 oc->objhead = oh;
307 VTAILQ_INSERT_TAIL(&oh->objcs, oc, hsh_list);
308 EXP_RefNewObjcore(oc);
309 Lck_Unlock(&oh->mtx);
310
311 BAN_RefBan(oc, ban);
312 AN(oc->ban);
313
314 /* Move the object first in the oh list, unbusy it and run the
315 waitinglist if necessary */
316 Lck_Lock(&oh->mtx);
317 VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
318 VTAILQ_INSERT_HEAD(&oh->objcs, oc, hsh_list);
319 oc->flags &= ~OC_F_BUSY;
320 if (!VTAILQ_EMPTY(&oh->waitinglist))
321 hsh_rush1(wrk, oh, &rush, HSH_RUSH_POLICY);
322 Lck_Unlock(&oh->mtx);
323 hsh_rush2(wrk, &rush);
324
325 EXP_Insert(wrk, oc);
326 }
327
328 /*---------------------------------------------------------------------
329 */
330
331 static struct objcore *
hsh_insert_busyobj(struct worker * wrk,struct objhead * oh)332 hsh_insert_busyobj(struct worker *wrk, struct objhead *oh)
333 {
334 struct objcore *oc;
335
336 CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
337 Lck_AssertHeld(&oh->mtx);
338
339 oc = wrk->nobjcore;
340 wrk->nobjcore = NULL;
341 CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
342
343 AN(oc->flags & OC_F_BUSY);
344 oc->refcnt = 1; /* Owned by busyobj */
345 oc->objhead = oh;
346 VTAILQ_INSERT_TAIL(&oh->objcs, oc, hsh_list);
347 return (oc);
348 }
349
350 /*---------------------------------------------------------------------
351 */
352
353 enum lookup_e
HSH_Lookup(struct req * req,struct objcore ** ocp,struct objcore ** bocp)354 HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
355 {
356 struct worker *wrk;
357 struct objhead *oh;
358 struct objcore *oc;
359 struct objcore *exp_oc;
360 const struct vcf_return *vr;
361 vtim_real exp_t_origin;
362 int busy_found;
363 const uint8_t *vary;
364 unsigned xid = 0;
365 float dttl = 0.0;
366
367 AN(ocp);
368 *ocp = NULL;
369 AN(bocp);
370 *bocp = NULL;
371
372 CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
373 wrk = req->wrk;
374 CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
375 CHECK_OBJ_NOTNULL(req->http, HTTP_MAGIC);
376 CHECK_OBJ_ORNULL(req->vcf, VCF_MAGIC);
377 AN(hash);
378
379 hsh_prealloc(wrk);
380 if (DO_DEBUG(DBG_HASHEDGE))
381 hsh_testmagic(req->digest);
382
383 if (req->hash_objhead != NULL) {
384 /*
385 * This req came off the waiting list, and brings an
386 * oh refcnt with it.
387 */
388 CHECK_OBJ_NOTNULL(req->hash_objhead, OBJHEAD_MAGIC);
389 oh = req->hash_objhead;
390 Lck_Lock(&oh->mtx);
391 req->hash_objhead = NULL;
392 } else {
393 AN(wrk->nobjhead);
394 oh = hash->lookup(wrk, req->digest, &wrk->nobjhead);
395 }
396
397 CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
398 Lck_AssertHeld(&oh->mtx);
399
400 if (req->hash_always_miss) {
401 /* XXX: should we do predictive Vary in this case ? */
402 /* Insert new objcore in objecthead and release mutex */
403 *bocp = hsh_insert_busyobj(wrk, oh);
404 /* NB: no deref of objhead, new object inherits reference */
405 Lck_Unlock(&oh->mtx);
406 return (HSH_MISS);
407 }
408
409 assert(oh->refcnt > 0);
410 busy_found = 0;
411 exp_oc = NULL;
412 exp_t_origin = 0.0;
413 VTAILQ_FOREACH(oc, &oh->objcs, hsh_list) {
414 /* Must be at least our own ref + the objcore we examine */
415 assert(oh->refcnt > 1);
416 CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
417 assert(oc->objhead == oh);
418 assert(oc->refcnt > 0);
419
420 if (oc->flags & OC_F_DYING)
421 continue;
422 if (oc->flags & OC_F_FAILED)
423 continue;
424
425 CHECK_OBJ_ORNULL(oc->boc, BOC_MAGIC);
426 if (oc->flags & OC_F_BUSY) {
427 if (req->hash_ignore_busy)
428 continue;
429
430 if (oc->boc && oc->boc->vary != NULL &&
431 !VRY_Match(req, oc->boc->vary)) {
432 wrk->strangelove++;
433 continue;
434 }
435
436 busy_found = 1;
437 continue;
438 }
439
440 if (oc->ttl <= 0.)
441 continue;
442
443 if (BAN_CheckObject(wrk, oc, req)) {
444 oc->flags |= OC_F_DYING;
445 EXP_Remove(oc);
446 continue;
447 }
448
449 if (ObjHasAttr(wrk, oc, OA_VARY)) {
450 vary = ObjGetAttr(wrk, oc, OA_VARY, NULL);
451 AN(vary);
452 if (!VRY_Match(req, vary)) {
453 wrk->strangelove++;
454 continue;
455 }
456 }
457
458 if (req->vcf != NULL) {
459 vr = req->vcf->func(req, &oc, &exp_oc, 0);
460 if (vr == VCF_CONTINUE)
461 continue;
462 if (vr == VCF_MISS) {
463 oc = NULL;
464 break;
465 }
466 if (vr == VCF_HIT)
467 break;
468 assert(vr == VCF_DEFAULT);
469 }
470
471 if (EXP_Ttl(req, oc) > req->t_req) {
472 assert(oh->refcnt > 1);
473 assert(oc->objhead == oh);
474 break;
475 }
476
477 if (EXP_Ttl(NULL, oc) < req->t_req && /* ignore req.ttl */
478 oc->t_origin > exp_t_origin) {
479 /* record the newest object */
480 exp_oc = oc;
481 exp_t_origin = oc->t_origin;
482 assert(oh->refcnt > 1);
483 assert(exp_oc->objhead == oh);
484 }
485 }
486
487 if (req->vcf != NULL)
488 (void)req->vcf->func(req, &oc, &exp_oc, 1);
489
490 if (oc != NULL && oc->flags & OC_F_HFP) {
491 xid = ObjGetXID(wrk, oc);
492 dttl = EXP_Dttl(req, oc);
493 AN(hsh_deref_objhead_unlock(wrk, &oh, HSH_RUSH_POLICY));
494 wrk->stats->cache_hitpass++;
495 VSLb(req->vsl, SLT_HitPass, "%u %.6f", xid, dttl);
496 return (HSH_HITPASS);
497 }
498
499 if (oc != NULL) {
500 *ocp = oc;
501 oc->refcnt++;
502 if (oc->flags & OC_F_HFM) {
503 xid = ObjGetXID(wrk, oc);
504 dttl = EXP_Dttl(req, oc);
505 *bocp = hsh_insert_busyobj(wrk, oh);
506 Lck_Unlock(&oh->mtx);
507 wrk->stats->cache_hitmiss++;
508 VSLb(req->vsl, SLT_HitMiss, "%u %.6f", xid, dttl);
509 return (HSH_HITMISS);
510 }
511 oc->hits++;
512 AN(hsh_deref_objhead_unlock(wrk, &oh, HSH_RUSH_POLICY));
513 return (HSH_HIT);
514 }
515
516 if (exp_oc != NULL && exp_oc->flags & OC_F_HFM) {
517 /*
518 * expired HFM ("grace/keep HFM")
519 *
520 * XXX should HFM objects actually have grace/keep ?
521 * XXX also: why isn't *ocp = exp_oc ?
522 */
523 xid = ObjGetXID(wrk, exp_oc);
524 dttl = EXP_Dttl(req, exp_oc);
525 *bocp = hsh_insert_busyobj(wrk, oh);
526 Lck_Unlock(&oh->mtx);
527 wrk->stats->cache_hitmiss++;
528 VSLb(req->vsl, SLT_HitMiss, "%u %.6f", xid, dttl);
529 return (HSH_HITMISS);
530 }
531
532 if (!busy_found) {
533 *bocp = hsh_insert_busyobj(wrk, oh);
534
535 if (exp_oc != NULL) {
536 exp_oc->refcnt++;
537 *ocp = exp_oc;
538 if (EXP_Ttl_grace(req, exp_oc) >= req->t_req) {
539 exp_oc->hits++;
540 Lck_Unlock(&oh->mtx);
541 return (HSH_GRACE);
542 }
543 }
544 Lck_Unlock(&oh->mtx);
545 return (HSH_MISS);
546 }
547
548 AN(busy_found);
549 if (exp_oc != NULL && EXP_Ttl_grace(req, exp_oc) >= req->t_req) {
550 /* we do not wait on the busy object if in grace */
551 exp_oc->refcnt++;
552 *ocp = exp_oc;
553 exp_oc->hits++;
554 AN(hsh_deref_objhead_unlock(wrk, &oh, 0));
555 return (HSH_GRACE);
556 }
557
558 /* There are one or more busy objects, wait for them */
559 VTAILQ_INSERT_TAIL(&oh->waitinglist, req, w_list);
560
561 AZ(req->hash_ignore_busy);
562
563 /*
564 * The objhead reference transfers to the sess, we get it
565 * back when the sess comes off the waiting list and
566 * calls us again
567 */
568 req->hash_objhead = oh;
569 req->wrk = NULL;
570 req->waitinglist = 1;
571
572 if (DO_DEBUG(DBG_WAITINGLIST))
573 VSLb(req->vsl, SLT_Debug, "on waiting list <%p>", oh);
574
575 Lck_Unlock(&oh->mtx);
576
577 wrk->stats->busy_sleep++;
578 return (HSH_BUSY);
579 }
580
581 /*---------------------------------------------------------------------
582 * Pick the req's we are going to rush from the waiting list
583 */
584
585 static void
hsh_rush1(const struct worker * wrk,struct objhead * oh,struct rush * r,int max)586 hsh_rush1(const struct worker *wrk, struct objhead *oh, struct rush *r, int max)
587 {
588 int i;
589 struct req *req;
590
591 if (max == 0)
592 return;
593 if (max == HSH_RUSH_POLICY)
594 max = cache_param->rush_exponent;
595 assert(max > 0);
596
597 CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
598 CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
599 CHECK_OBJ_NOTNULL(r, RUSH_MAGIC);
600 VTAILQ_INIT(&r->reqs);
601 Lck_AssertHeld(&oh->mtx);
602 for (i = 0; i < max; i++) {
603 req = VTAILQ_FIRST(&oh->waitinglist);
604 if (req == NULL)
605 break;
606 CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
607 wrk->stats->busy_wakeup++;
608 AZ(req->wrk);
609 VTAILQ_REMOVE(&oh->waitinglist, req, w_list);
610 VTAILQ_INSERT_TAIL(&r->reqs, req, w_list);
611 req->waitinglist = 0;
612 }
613 }
614
615 /*---------------------------------------------------------------------
616 * Rush req's that came from waiting list.
617 */
618
619 static void
hsh_rush2(struct worker * wrk,struct rush * r)620 hsh_rush2(struct worker *wrk, struct rush *r)
621 {
622 struct req *req;
623
624 CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
625 CHECK_OBJ_NOTNULL(r, RUSH_MAGIC);
626
627 while (!VTAILQ_EMPTY(&r->reqs)) {
628 req = VTAILQ_FIRST(&r->reqs);
629 CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
630 VTAILQ_REMOVE(&r->reqs, req, w_list);
631 DSL(DBG_WAITINGLIST, req->vsl->wid, "off waiting list");
632 if (req->transport->reembark != NULL) {
633 // For ESI includes
634 req->transport->reembark(wrk, req);
635 } else {
636 /*
637 * We ignore the queue limits which apply to new
638 * requests because if we fail to reschedule there
639 * may be vmod_privs to cleanup and we need a proper
640 * workerthread for that.
641 */
642 AZ(Pool_Task(req->sp->pool, req->task, TASK_QUEUE_RUSH));
643 }
644 }
645 }
646
647 /*---------------------------------------------------------------------
648 * Purge an entire objhead
649 */
650
651 unsigned
HSH_Purge(struct worker * wrk,struct objhead * oh,vtim_real ttl_now,vtim_dur ttl,vtim_dur grace,vtim_dur keep)652 HSH_Purge(struct worker *wrk, struct objhead *oh, vtim_real ttl_now,
653 vtim_dur ttl, vtim_dur grace, vtim_dur keep)
654 {
655 struct objcore *oc, **ocp;
656 unsigned spc, ospc, nobj, n, n_tot = 0;
657 int more = 0;
658
659 CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
660 CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
661 ospc = WS_ReserveAll(wrk->aws);
662 assert(ospc >= sizeof *ocp);
663 /*
664 * Because of "soft" purges, there might be oc's in the list that has
665 * the OC_F_PURGED flag set. We do not want to let these slip through,
666 * so we need to clear the flag before entering the do..while loop.
667 */
668 Lck_Lock(&oh->mtx);
669 assert(oh->refcnt > 0);
670 VTAILQ_FOREACH(oc, &oh->objcs, hsh_list) {
671 CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
672 assert(oc->objhead == oh);
673 oc->flags &= ~OC_F_PURGED;
674 }
675 Lck_Unlock(&oh->mtx);
676
677 do {
678 more = 0;
679 spc = ospc;
680 nobj = 0;
681 ocp = WS_Reservation(wrk->aws);
682 Lck_Lock(&oh->mtx);
683 assert(oh->refcnt > 0);
684 VTAILQ_FOREACH(oc, &oh->objcs, hsh_list) {
685 CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
686 assert(oc->objhead == oh);
687 if (oc->flags & OC_F_BUSY) {
688 /*
689 * We cannot purge busy objects here, because
690 * their owners have special rights to them,
691 * and may nuke them without concern for the
692 * refcount, which by definition always must
693 * be one, so they don't check.
694 */
695 continue;
696 }
697 if (oc->flags & OC_F_DYING)
698 continue;
699 if (oc->flags & OC_F_PURGED) {
700 /*
701 * We have already called EXP_Rearm on this
702 * object, and we do not want to do it
703 * again. Plus the space in the ocp array may
704 * be limited.
705 */
706 continue;
707 }
708 if (spc < sizeof *ocp) {
709 /* Iterate if aws is not big enough */
710 more = 1;
711 break;
712 }
713 oc->refcnt++;
714 spc -= sizeof *ocp;
715 ocp[nobj++] = oc;
716 oc->flags |= OC_F_PURGED;
717 }
718 Lck_Unlock(&oh->mtx);
719
720 for (n = 0; n < nobj; n++) {
721 oc = ocp[n];
722 CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
723 EXP_Rearm(oc, ttl_now, ttl, grace, keep);
724 (void)HSH_DerefObjCore(wrk, &oc, 0);
725 }
726 n_tot += nobj;
727 } while (more);
728 WS_Release(wrk->aws, 0);
729 Pool_PurgeStat(n_tot);
730 return (n_tot);
731 }
732
733 /*---------------------------------------------------------------------
734 * Fail an objcore
735 */
736
737 void
HSH_Fail(struct objcore * oc)738 HSH_Fail(struct objcore *oc)
739 {
740 struct objhead *oh;
741
742 CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
743 oh = oc->objhead;
744 CHECK_OBJ(oh, OBJHEAD_MAGIC);
745
746 /*
747 * We have to have either a busy bit, so that HSH_Lookup
748 * will not consider this oc, or an object hung of the oc
749 * so that it can consider it.
750 */
751 assert((oc->flags & OC_F_BUSY) || (oc->stobj->stevedore != NULL));
752
753 Lck_Lock(&oh->mtx);
754 oc->flags |= OC_F_FAILED;
755 Lck_Unlock(&oh->mtx);
756 }
757
758 /*---------------------------------------------------------------------
759 * Abandon a fetch we will not need
760 */
761
762 static void
hsh_abandon(struct objcore * oc)763 hsh_abandon(struct objcore *oc)
764 {
765 struct objhead *oh;
766
767 CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
768 oh = oc->objhead;
769 CHECK_OBJ(oh, OBJHEAD_MAGIC);
770
771 Lck_Lock(&oh->mtx);
772 oc->flags |= OC_F_ABANDON;
773 Lck_Unlock(&oh->mtx);
774 }
775
776 /*---------------------------------------------------------------------
777 * Cancel a fetch when the client does not need it any more
778 */
779
780 void
HSH_Cancel(struct worker * wrk,struct objcore * oc,struct boc * boc)781 HSH_Cancel(struct worker *wrk, struct objcore *oc, struct boc *boc)
782 {
783 struct boc *bocref = NULL;
784
785 CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
786
787 if ((oc->flags & (OC_F_PRIVATE | OC_F_HFM | OC_F_HFP)) == 0)
788 return;
789
790 /*
791 * NB: we use two distinct variables to only release the reference if
792 * we had to acquire one. The caller-provided boc is optional.
793 */
794 if (boc == NULL)
795 bocref = boc = HSH_RefBoc(oc);
796
797 CHECK_OBJ_ORNULL(boc, BOC_MAGIC);
798
799 if (oc->flags & OC_F_HFP)
800 AN(oc->flags & OC_F_HFM);
801
802 if (boc != NULL) {
803 hsh_abandon(oc);
804 ObjWaitState(oc, BOS_FINISHED);
805 }
806
807 if (bocref != NULL)
808 HSH_DerefBoc(wrk, oc);
809
810 ObjSlim(wrk, oc);
811 }
812
813 /*---------------------------------------------------------------------
814 * Unbusy an objcore when the object is completely fetched.
815 */
816
817 void
HSH_Unbusy(struct worker * wrk,struct objcore * oc)818 HSH_Unbusy(struct worker *wrk, struct objcore *oc)
819 {
820 struct objhead *oh;
821 struct rush rush;
822
823 CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
824 CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
825 oh = oc->objhead;
826 CHECK_OBJ(oh, OBJHEAD_MAGIC);
827 INIT_OBJ(&rush, RUSH_MAGIC);
828
829 AN(oc->stobj->stevedore);
830 AN(oc->flags & OC_F_BUSY);
831 assert(oh->refcnt > 0);
832 assert(oc->refcnt > 0);
833
834 if (!(oc->flags & OC_F_PRIVATE)) {
835 BAN_NewObjCore(oc);
836 AN(oc->ban);
837 }
838
839 /* XXX: pretouch neighbors on oh->objcs to prevent page-on under mtx */
840 Lck_Lock(&oh->mtx);
841 assert(oh->refcnt > 0);
842 assert(oc->refcnt > 0);
843 if (!(oc->flags & OC_F_PRIVATE))
844 EXP_RefNewObjcore(oc); /* Takes a ref for expiry */
845 /* XXX: strictly speaking, we should sort in Date: order. */
846 VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
847 VTAILQ_INSERT_HEAD(&oh->objcs, oc, hsh_list);
848 oc->flags &= ~OC_F_BUSY;
849 if (!VTAILQ_EMPTY(&oh->waitinglist)) {
850 assert(oh->refcnt > 1);
851 hsh_rush1(wrk, oh, &rush, HSH_RUSH_POLICY);
852 }
853 Lck_Unlock(&oh->mtx);
854 EXP_Insert(wrk, oc); /* Does nothing unless EXP_RefNewObjcore was
855 * called */
856 hsh_rush2(wrk, &rush);
857 }
858
859 /*====================================================================
860 * HSH_Kill()
861 *
862 * It's dead Jim, kick it...
863 */
864
865 void
HSH_Kill(struct objcore * oc)866 HSH_Kill(struct objcore *oc)
867 {
868
869 CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
870 CHECK_OBJ_NOTNULL(oc->objhead, OBJHEAD_MAGIC);
871
872 Lck_Lock(&oc->objhead->mtx);
873 oc->flags |= OC_F_DYING;
874 Lck_Unlock(&oc->objhead->mtx);
875 EXP_Remove(oc);
876 }
877
878 /*====================================================================
879 * HSH_Snipe()
880 *
881 * If objcore is idle, gain a ref and mark it dead.
882 */
883
884 int
HSH_Snipe(const struct worker * wrk,struct objcore * oc)885 HSH_Snipe(const struct worker *wrk, struct objcore *oc)
886 {
887 int retval = 0;
888
889 CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
890 CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
891 CHECK_OBJ_NOTNULL(oc->objhead, OBJHEAD_MAGIC);
892
893 if (oc->refcnt == 1 && !Lck_Trylock(&oc->objhead->mtx)) {
894 if (oc->refcnt == 1 && !(oc->flags & OC_F_DYING)) {
895 oc->flags |= OC_F_DYING;
896 oc->refcnt++;
897 retval = 1;
898 }
899 Lck_Unlock(&oc->objhead->mtx);
900 }
901 if (retval)
902 EXP_Remove(oc);
903 return (retval);
904 }
905
906
907 /*---------------------------------------------------------------------
908 * Gain a reference on an objcore
909 */
910
911 void
HSH_Ref(struct objcore * oc)912 HSH_Ref(struct objcore *oc)
913 {
914 struct objhead *oh;
915
916 CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
917 oh = oc->objhead;
918 CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
919 Lck_Lock(&oh->mtx);
920 assert(oc->refcnt > 0);
921 oc->refcnt++;
922 Lck_Unlock(&oh->mtx);
923 }
924
925 /*---------------------------------------------------------------------
926 * Gain a reference on the busyobj, if the objcore has one
927 */
928
929 struct boc *
HSH_RefBoc(const struct objcore * oc)930 HSH_RefBoc(const struct objcore *oc)
931 {
932 struct objhead *oh;
933 struct boc *boc;
934
935 CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
936 oh = oc->objhead;
937 CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
938 if (oc->boc == NULL)
939 return (NULL);
940 Lck_Lock(&oh->mtx);
941 assert(oc->refcnt > 0);
942 boc = oc->boc;
943 CHECK_OBJ_ORNULL(boc, BOC_MAGIC);
944 if (boc != NULL) {
945 assert(boc->refcount > 0);
946 if (boc->state < BOS_FINISHED)
947 boc->refcount++;
948 else
949 boc = NULL;
950 }
951 Lck_Unlock(&oh->mtx);
952 return (boc);
953 }
954
955 void
HSH_DerefBoc(struct worker * wrk,struct objcore * oc)956 HSH_DerefBoc(struct worker *wrk, struct objcore *oc)
957 {
958 struct boc *boc;
959 unsigned r;
960
961 CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
962 CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
963 boc = oc->boc;
964 CHECK_OBJ_NOTNULL(boc, BOC_MAGIC);
965 Lck_Lock(&oc->objhead->mtx);
966 assert(oc->refcnt > 0);
967 assert(boc->refcount > 0);
968 r = --boc->refcount;
969 if (r == 0)
970 oc->boc = NULL;
971 Lck_Unlock(&oc->objhead->mtx);
972 if (r == 0)
973 ObjBocDone(wrk, oc, &boc);
974 }
975
976 /*--------------------------------------------------------------------
977 * Dereference objcore
978 *
979 * Returns zero if target was destroyed.
980 */
981
982 int
HSH_DerefObjCore(struct worker * wrk,struct objcore ** ocp,int rushmax)983 HSH_DerefObjCore(struct worker *wrk, struct objcore **ocp, int rushmax)
984 {
985 struct objcore *oc;
986 struct objhead *oh;
987 struct rush rush;
988 int r;
989
990 CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
991 TAKE_OBJ_NOTNULL(oc, ocp, OBJCORE_MAGIC);
992 assert(oc->refcnt > 0);
993 INIT_OBJ(&rush, RUSH_MAGIC);
994
995 oh = oc->objhead;
996 CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
997
998 Lck_Lock(&oh->mtx);
999 assert(oh->refcnt > 0);
1000 r = --oc->refcnt;
1001 if (!r)
1002 VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
1003 if (!VTAILQ_EMPTY(&oh->waitinglist)) {
1004 assert(oh->refcnt > 1);
1005 hsh_rush1(wrk, oh, &rush, rushmax);
1006 }
1007 Lck_Unlock(&oh->mtx);
1008 hsh_rush2(wrk, &rush);
1009 if (r != 0)
1010 return (r);
1011
1012 AZ(oc->exp_flags);
1013
1014 BAN_DestroyObj(oc);
1015 AZ(oc->ban);
1016
1017 if (oc->stobj->stevedore != NULL)
1018 ObjFreeObj(wrk, oc);
1019 ObjDestroy(wrk, &oc);
1020
1021 /* Drop our ref on the objhead */
1022 assert(oh->refcnt > 0);
1023 (void)hsh_deref_objhead(wrk, &oh);
1024 return (0);
1025 }
1026
1027 static int
hsh_deref_objhead_unlock(struct worker * wrk,struct objhead ** poh,int max)1028 hsh_deref_objhead_unlock(struct worker *wrk, struct objhead **poh, int max)
1029 {
1030 struct objhead *oh;
1031 struct rush rush;
1032 int r;
1033
1034 CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
1035 TAKE_OBJ_NOTNULL(oh, poh, OBJHEAD_MAGIC);
1036
1037 Lck_AssertHeld(&oh->mtx);
1038
1039 if (oh == private_oh) {
1040 assert(VTAILQ_EMPTY(&oh->waitinglist));
1041 assert(oh->refcnt > 1);
1042 oh->refcnt--;
1043 Lck_Unlock(&oh->mtx);
1044 return (1);
1045 }
1046
1047 INIT_OBJ(&rush, RUSH_MAGIC);
1048 if (!VTAILQ_EMPTY(&oh->waitinglist)) {
1049 assert(oh->refcnt > 1);
1050 hsh_rush1(wrk, oh, &rush, max);
1051 }
1052
1053 if (oh->refcnt == 1)
1054 assert(VTAILQ_EMPTY(&oh->waitinglist));
1055
1056 assert(oh->refcnt > 0);
1057 r = hash->deref(wrk, oh); /* Unlocks oh->mtx */
1058 hsh_rush2(wrk, &rush);
1059 return (r);
1060 }
1061
1062 static int
hsh_deref_objhead(struct worker * wrk,struct objhead ** poh)1063 hsh_deref_objhead(struct worker *wrk, struct objhead **poh)
1064 {
1065 struct objhead *oh;
1066
1067 CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
1068 TAKE_OBJ_NOTNULL(oh, poh, OBJHEAD_MAGIC);
1069
1070 Lck_Lock(&oh->mtx);
1071 return (hsh_deref_objhead_unlock(wrk, &oh, 0));
1072 }
1073
1074 void
HSH_Init(const struct hash_slinger * slinger)1075 HSH_Init(const struct hash_slinger *slinger)
1076 {
1077
1078 assert(DIGEST_LEN == VSHA256_LEN); /* avoid #include pollution */
1079 hash = slinger;
1080 if (hash->start != NULL)
1081 hash->start();
1082 private_oh = hsh_newobjhead();
1083 private_oh->refcnt = 1;
1084 }
1085