1 /*-
2  * Copyright (c) 2006 Verdens Gang AS
3  * Copyright (c) 2006-2015 Varnish Software AS
4  * All rights reserved.
5  *
6  * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
7  *
8  * SPDX-License-Identifier: BSD-2-Clause
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  *
31  * This is the central hash-table code, it relies on a chosen hash
32  * implementation only for the actual hashing, all the housekeeping
33  * happens here.
34  *
35  * We have two kinds of structures, objecthead and object.  An objecthead
36  * corresponds to a given (Host:, URL) tupple, and the objects hung from
37  * the objecthead may represent various variations (ie: Vary: header,
38  * different TTL etc) instances of that web-entity.
39  *
40  * Each objecthead has a mutex which locks both its own fields, the
41  * list of objects and fields in the objects.
42  *
43  * The hash implementation must supply a reference count facility on
44  * the objecthead, and return with a reference held after a lookup.
45  *
46  * Lookups in the hash implementation returns with a ref held and each
47  * object hung from the objhead holds a ref as well.
48  *
49  * Objects have refcounts which are locked by the objecthead mutex.
50  *
51  * New objects are always marked busy, and they can go from busy to
52  * not busy only once.
53  */
54 
55 #include "config.h"
56 
57 #include <stdio.h>
58 #include <stdlib.h>
59 
60 #include "cache_varnishd.h"
61 
62 #include "cache/cache_objhead.h"
63 #include "cache/cache_transport.h"
64 
65 #include "hash/hash_slinger.h"
66 
67 #include "vsha256.h"
68 
69 struct rush {
70 	unsigned		magic;
71 #define RUSH_MAGIC		0xa1af5f01
72 	VTAILQ_HEAD(,req)	reqs;
73 };
74 
75 static const struct hash_slinger *hash;
76 static struct objhead *private_oh;
77 
78 static void hsh_rush1(const struct worker *, struct objhead *,
79     struct rush *, int);
80 static void hsh_rush2(struct worker *, struct rush *);
81 static int hsh_deref_objhead(struct worker *wrk, struct objhead **poh);
82 static int hsh_deref_objhead_unlock(struct worker *wrk, struct objhead **poh,
83     int);
84 
85 /*---------------------------------------------------------------------*/
86 
87 #define VCF_RETURN(x) const struct vcf_return VCF_##x[1] = { \
88 	{ .name = #x, } \
89 };
90 
VCF_RETURNS()91 VCF_RETURNS()
92 #undef VCF_RETURN
93 
94 /*---------------------------------------------------------------------*/
95 
96 static struct objhead *
97 hsh_newobjhead(void)
98 {
99 	struct objhead *oh;
100 
101 	ALLOC_OBJ(oh, OBJHEAD_MAGIC);
102 	XXXAN(oh);
103 	oh->refcnt = 1;
104 	VTAILQ_INIT(&oh->objcs);
105 	VTAILQ_INIT(&oh->waitinglist);
106 	Lck_New(&oh->mtx, lck_objhdr);
107 	return (oh);
108 }
109 
110 /*---------------------------------------------------------------------*/
111 /* Precreate an objhead and object for later use */
112 static void
hsh_prealloc(struct worker * wrk)113 hsh_prealloc(struct worker *wrk)
114 {
115 
116 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
117 
118 	if (wrk->nobjcore == NULL)
119 		wrk->nobjcore = ObjNew(wrk);
120 	CHECK_OBJ_NOTNULL(wrk->nobjcore, OBJCORE_MAGIC);
121 
122 	if (wrk->nobjhead == NULL) {
123 		wrk->nobjhead = hsh_newobjhead();
124 		wrk->stats->n_objecthead++;
125 	}
126 	CHECK_OBJ_NOTNULL(wrk->nobjhead, OBJHEAD_MAGIC);
127 
128 	if (hash->prep != NULL)
129 		hash->prep(wrk);
130 }
131 
132 /*---------------------------------------------------------------------*/
133 
134 struct objcore *
HSH_Private(const struct worker * wrk)135 HSH_Private(const struct worker *wrk)
136 {
137 	struct objcore *oc;
138 
139 	CHECK_OBJ_NOTNULL(private_oh, OBJHEAD_MAGIC);
140 
141 	oc = ObjNew(wrk);
142 	AN(oc);
143 	oc->refcnt = 1;
144 	oc->objhead = private_oh;
145 	oc->flags |= OC_F_PRIVATE;
146 	Lck_Lock(&private_oh->mtx);
147 	VTAILQ_INSERT_TAIL(&private_oh->objcs, oc, hsh_list);
148 	private_oh->refcnt++;
149 	Lck_Unlock(&private_oh->mtx);
150 	return (oc);
151 }
152 
153 /*---------------------------------------------------------------------*/
154 void
HSH_Cleanup(struct worker * wrk)155 HSH_Cleanup(struct worker *wrk)
156 {
157 
158 	if (wrk->nobjcore != NULL)
159 		ObjDestroy(wrk, &wrk->nobjcore);
160 
161 	if (wrk->nobjhead != NULL) {
162 		Lck_Delete(&wrk->nobjhead->mtx);
163 		FREE_OBJ(wrk->nobjhead);
164 		wrk->nobjhead = NULL;
165 		wrk->stats->n_objecthead--;
166 	}
167 	if (wrk->nhashpriv != NULL) {
168 		/* XXX: If needed, add slinger method for this */
169 		free(wrk->nhashpriv);
170 		wrk->nhashpriv = NULL;
171 	}
172 }
173 
174 void
HSH_DeleteObjHead(const struct worker * wrk,struct objhead * oh)175 HSH_DeleteObjHead(const struct worker *wrk, struct objhead *oh)
176 {
177 
178 	AZ(oh->refcnt);
179 	assert(VTAILQ_EMPTY(&oh->objcs));
180 	assert(VTAILQ_EMPTY(&oh->waitinglist));
181 	Lck_Delete(&oh->mtx);
182 	wrk->stats->n_objecthead--;
183 	FREE_OBJ(oh);
184 }
185 
186 void
HSH_AddString(struct req * req,void * ctx,const char * str)187 HSH_AddString(struct req *req, void *ctx, const char *str)
188 {
189 
190 	CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
191 	AN(ctx);
192 	if (str != NULL) {
193 		VSHA256_Update(ctx, str, strlen(str));
194 		VSLb(req->vsl, SLT_Hash, "%s", str);
195 	} else
196 		VSHA256_Update(ctx, &str, 1);
197 }
198 
199 /*---------------------------------------------------------------------
200  * This is a debugging hack to enable testing of boundary conditions
201  * in the hash algorithm.
202  * We trap the first 9 different digests and translate them to different
203  * digests with edge bit conditions
204  */
205 
206 static struct hsh_magiclist {
207 	unsigned char was[VSHA256_LEN];
208 	unsigned char now[VSHA256_LEN];
209 } hsh_magiclist[] = {
210 	{ .now = {	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
211 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
212 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
213 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
214 	{ .now = {	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
215 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
216 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
217 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01 } },
218 	{ .now = {	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
219 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
220 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
221 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02 } },
222 	{ .now = {	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
223 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
224 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
225 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x40 } },
226 	{ .now = {	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
227 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
228 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
229 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80 } },
230 	{ .now = {	0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
231 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
232 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
233 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
234 	{ .now = {	0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
235 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
236 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
237 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
238 	{ .now = {	0x80, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
239 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
240 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
241 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
242 	{ .now = {	0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
243 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
244 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
245 			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 } },
246 };
247 
248 #define HSH_NMAGIC (sizeof hsh_magiclist / sizeof hsh_magiclist[0])
249 
250 static void
hsh_testmagic(void * result)251 hsh_testmagic(void *result)
252 {
253 	size_t i, j;
254 	static size_t nused = 0;
255 
256 	for (i = 0; i < nused; i++)
257 		if (!memcmp(hsh_magiclist[i].was, result, VSHA256_LEN))
258 			break;
259 	if (i == nused && i < HSH_NMAGIC)
260 		memcpy(hsh_magiclist[nused++].was, result, VSHA256_LEN);
261 	if (i == nused)
262 		return;
263 	assert(i < HSH_NMAGIC);
264 	fprintf(stderr, "HASHMAGIC: <");
265 	for (j = 0; j < VSHA256_LEN; j++)
266 		fprintf(stderr, "%02x", ((unsigned char*)result)[j]);
267 	fprintf(stderr, "> -> <");
268 	memcpy(result, hsh_magiclist[i].now, VSHA256_LEN);
269 	for (j = 0; j < VSHA256_LEN; j++)
270 		fprintf(stderr, "%02x", ((unsigned char*)result)[j]);
271 	fprintf(stderr, ">\n");
272 }
273 
274 /*---------------------------------------------------------------------
275  * Insert an object which magically appears out of nowhere or, more likely,
276  * comes off some persistent storage device.
277  * Insert it with a reference held.
278  */
279 
280 void
HSH_Insert(struct worker * wrk,const void * digest,struct objcore * oc,struct ban * ban)281 HSH_Insert(struct worker *wrk, const void *digest, struct objcore *oc,
282     struct ban *ban)
283 {
284 	struct objhead *oh;
285 	struct rush rush;
286 
287 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
288 	AN(digest);
289 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
290 	AN(ban);
291 	AN(oc->flags & OC_F_BUSY);
292 	AZ(oc->flags & OC_F_PRIVATE);
293 	assert(oc->refcnt == 1);
294 	INIT_OBJ(&rush, RUSH_MAGIC);
295 
296 	hsh_prealloc(wrk);
297 
298 	AN(wrk->nobjhead);
299 	oh = hash->lookup(wrk, digest, &wrk->nobjhead);
300 	CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
301 	Lck_AssertHeld(&oh->mtx);
302 	assert(oh->refcnt > 0);
303 
304 	/* Mark object busy and insert (precreated) objcore in
305 	   objecthead. The new object inherits our objhead reference. */
306 	oc->objhead = oh;
307 	VTAILQ_INSERT_TAIL(&oh->objcs, oc, hsh_list);
308 	EXP_RefNewObjcore(oc);
309 	Lck_Unlock(&oh->mtx);
310 
311 	BAN_RefBan(oc, ban);
312 	AN(oc->ban);
313 
314 	/* Move the object first in the oh list, unbusy it and run the
315 	   waitinglist if necessary */
316 	Lck_Lock(&oh->mtx);
317 	VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
318 	VTAILQ_INSERT_HEAD(&oh->objcs, oc, hsh_list);
319 	oc->flags &= ~OC_F_BUSY;
320 	if (!VTAILQ_EMPTY(&oh->waitinglist))
321 		hsh_rush1(wrk, oh, &rush, HSH_RUSH_POLICY);
322 	Lck_Unlock(&oh->mtx);
323 	hsh_rush2(wrk, &rush);
324 
325 	EXP_Insert(wrk, oc);
326 }
327 
328 /*---------------------------------------------------------------------
329  */
330 
331 static struct objcore *
hsh_insert_busyobj(struct worker * wrk,struct objhead * oh)332 hsh_insert_busyobj(struct worker *wrk, struct objhead *oh)
333 {
334 	struct objcore *oc;
335 
336 	CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
337 	Lck_AssertHeld(&oh->mtx);
338 
339 	oc = wrk->nobjcore;
340 	wrk->nobjcore = NULL;
341 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
342 
343 	AN(oc->flags & OC_F_BUSY);
344 	oc->refcnt = 1;		/* Owned by busyobj */
345 	oc->objhead = oh;
346 	VTAILQ_INSERT_TAIL(&oh->objcs, oc, hsh_list);
347 	return (oc);
348 }
349 
350 /*---------------------------------------------------------------------
351  */
352 
353 enum lookup_e
HSH_Lookup(struct req * req,struct objcore ** ocp,struct objcore ** bocp)354 HSH_Lookup(struct req *req, struct objcore **ocp, struct objcore **bocp)
355 {
356 	struct worker *wrk;
357 	struct objhead *oh;
358 	struct objcore *oc;
359 	struct objcore *exp_oc;
360 	const struct vcf_return *vr;
361 	vtim_real exp_t_origin;
362 	int busy_found;
363 	const uint8_t *vary;
364 	unsigned xid = 0;
365 	float dttl = 0.0;
366 
367 	AN(ocp);
368 	*ocp = NULL;
369 	AN(bocp);
370 	*bocp = NULL;
371 
372 	CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
373 	wrk = req->wrk;
374 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
375 	CHECK_OBJ_NOTNULL(req->http, HTTP_MAGIC);
376 	CHECK_OBJ_ORNULL(req->vcf, VCF_MAGIC);
377 	AN(hash);
378 
379 	hsh_prealloc(wrk);
380 	if (DO_DEBUG(DBG_HASHEDGE))
381 		hsh_testmagic(req->digest);
382 
383 	if (req->hash_objhead != NULL) {
384 		/*
385 		 * This req came off the waiting list, and brings an
386 		 * oh refcnt with it.
387 		 */
388 		CHECK_OBJ_NOTNULL(req->hash_objhead, OBJHEAD_MAGIC);
389 		oh = req->hash_objhead;
390 		Lck_Lock(&oh->mtx);
391 		req->hash_objhead = NULL;
392 	} else {
393 		AN(wrk->nobjhead);
394 		oh = hash->lookup(wrk, req->digest, &wrk->nobjhead);
395 	}
396 
397 	CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
398 	Lck_AssertHeld(&oh->mtx);
399 
400 	if (req->hash_always_miss) {
401 		/* XXX: should we do predictive Vary in this case ? */
402 		/* Insert new objcore in objecthead and release mutex */
403 		*bocp = hsh_insert_busyobj(wrk, oh);
404 		/* NB: no deref of objhead, new object inherits reference */
405 		Lck_Unlock(&oh->mtx);
406 		return (HSH_MISS);
407 	}
408 
409 	assert(oh->refcnt > 0);
410 	busy_found = 0;
411 	exp_oc = NULL;
412 	exp_t_origin = 0.0;
413 	VTAILQ_FOREACH(oc, &oh->objcs, hsh_list) {
414 		/* Must be at least our own ref + the objcore we examine */
415 		assert(oh->refcnt > 1);
416 		CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
417 		assert(oc->objhead == oh);
418 		assert(oc->refcnt > 0);
419 
420 		if (oc->flags & OC_F_DYING)
421 			continue;
422 		if (oc->flags & OC_F_FAILED)
423 			continue;
424 
425 		CHECK_OBJ_ORNULL(oc->boc, BOC_MAGIC);
426 		if (oc->flags & OC_F_BUSY) {
427 			if (req->hash_ignore_busy)
428 				continue;
429 
430 			if (oc->boc && oc->boc->vary != NULL &&
431 			    !VRY_Match(req, oc->boc->vary)) {
432 				wrk->strangelove++;
433 				continue;
434 			}
435 
436 			busy_found = 1;
437 			continue;
438 		}
439 
440 		if (oc->ttl <= 0.)
441 			continue;
442 
443 		if (BAN_CheckObject(wrk, oc, req)) {
444 			oc->flags |= OC_F_DYING;
445 			EXP_Remove(oc);
446 			continue;
447 		}
448 
449 		if (ObjHasAttr(wrk, oc, OA_VARY)) {
450 			vary = ObjGetAttr(wrk, oc, OA_VARY, NULL);
451 			AN(vary);
452 			if (!VRY_Match(req, vary)) {
453 				wrk->strangelove++;
454 				continue;
455 			}
456 		}
457 
458 		if (req->vcf != NULL) {
459 			vr = req->vcf->func(req, &oc, &exp_oc, 0);
460 			if (vr == VCF_CONTINUE)
461 				continue;
462 			if (vr == VCF_MISS) {
463 				oc = NULL;
464 				break;
465 			}
466 			if (vr == VCF_HIT)
467 				break;
468 			assert(vr == VCF_DEFAULT);
469 		}
470 
471 		if (EXP_Ttl(req, oc) > req->t_req) {
472 			assert(oh->refcnt > 1);
473 			assert(oc->objhead == oh);
474 			break;
475 		}
476 
477 		if (EXP_Ttl(NULL, oc) < req->t_req && /* ignore req.ttl */
478 		    oc->t_origin > exp_t_origin) {
479 			/* record the newest object */
480 			exp_oc = oc;
481 			exp_t_origin = oc->t_origin;
482 			assert(oh->refcnt > 1);
483 			assert(exp_oc->objhead == oh);
484 		}
485 	}
486 
487 	if (req->vcf != NULL)
488 		(void)req->vcf->func(req, &oc, &exp_oc, 1);
489 
490 	if (oc != NULL && oc->flags & OC_F_HFP) {
491 		xid = ObjGetXID(wrk, oc);
492 		dttl = EXP_Dttl(req, oc);
493 		AN(hsh_deref_objhead_unlock(wrk, &oh, HSH_RUSH_POLICY));
494 		wrk->stats->cache_hitpass++;
495 		VSLb(req->vsl, SLT_HitPass, "%u %.6f", xid, dttl);
496 		return (HSH_HITPASS);
497 	}
498 
499 	if (oc != NULL) {
500 		*ocp = oc;
501 		oc->refcnt++;
502 		if (oc->flags & OC_F_HFM) {
503 			xid = ObjGetXID(wrk, oc);
504 			dttl = EXP_Dttl(req, oc);
505 			*bocp = hsh_insert_busyobj(wrk, oh);
506 			Lck_Unlock(&oh->mtx);
507 			wrk->stats->cache_hitmiss++;
508 			VSLb(req->vsl, SLT_HitMiss, "%u %.6f", xid, dttl);
509 			return (HSH_HITMISS);
510 		}
511 		oc->hits++;
512 		AN(hsh_deref_objhead_unlock(wrk, &oh, HSH_RUSH_POLICY));
513 		return (HSH_HIT);
514 	}
515 
516 	if (exp_oc != NULL && exp_oc->flags & OC_F_HFM) {
517 		/*
518 		 * expired HFM ("grace/keep HFM")
519 		 *
520 		 * XXX should HFM objects actually have grace/keep ?
521 		 * XXX also:  why isn't *ocp = exp_oc ?
522 		 */
523 		xid = ObjGetXID(wrk, exp_oc);
524 		dttl = EXP_Dttl(req, exp_oc);
525 		*bocp = hsh_insert_busyobj(wrk, oh);
526 		Lck_Unlock(&oh->mtx);
527 		wrk->stats->cache_hitmiss++;
528 		VSLb(req->vsl, SLT_HitMiss, "%u %.6f", xid, dttl);
529 		return (HSH_HITMISS);
530 	}
531 
532 	if (!busy_found) {
533 		*bocp = hsh_insert_busyobj(wrk, oh);
534 
535 		if (exp_oc != NULL) {
536 			exp_oc->refcnt++;
537 			*ocp = exp_oc;
538 			if (EXP_Ttl_grace(req, exp_oc) >= req->t_req) {
539 				exp_oc->hits++;
540 				Lck_Unlock(&oh->mtx);
541 				return (HSH_GRACE);
542 			}
543 		}
544 		Lck_Unlock(&oh->mtx);
545 		return (HSH_MISS);
546 	}
547 
548 	AN(busy_found);
549 	if (exp_oc != NULL && EXP_Ttl_grace(req, exp_oc) >= req->t_req) {
550 		/* we do not wait on the busy object if in grace */
551 		exp_oc->refcnt++;
552 		*ocp = exp_oc;
553 		exp_oc->hits++;
554 		AN(hsh_deref_objhead_unlock(wrk, &oh, 0));
555 		return (HSH_GRACE);
556 	}
557 
558 	/* There are one or more busy objects, wait for them */
559 	VTAILQ_INSERT_TAIL(&oh->waitinglist, req, w_list);
560 
561 	AZ(req->hash_ignore_busy);
562 
563 	/*
564 	 * The objhead reference transfers to the sess, we get it
565 	 * back when the sess comes off the waiting list and
566 	 * calls us again
567 	 */
568 	req->hash_objhead = oh;
569 	req->wrk = NULL;
570 	req->waitinglist = 1;
571 
572 	if (DO_DEBUG(DBG_WAITINGLIST))
573 		VSLb(req->vsl, SLT_Debug, "on waiting list <%p>", oh);
574 
575 	Lck_Unlock(&oh->mtx);
576 
577 	wrk->stats->busy_sleep++;
578 	return (HSH_BUSY);
579 }
580 
581 /*---------------------------------------------------------------------
582  * Pick the req's we are going to rush from the waiting list
583  */
584 
585 static void
hsh_rush1(const struct worker * wrk,struct objhead * oh,struct rush * r,int max)586 hsh_rush1(const struct worker *wrk, struct objhead *oh, struct rush *r, int max)
587 {
588 	int i;
589 	struct req *req;
590 
591 	if (max == 0)
592 		return;
593 	if (max == HSH_RUSH_POLICY)
594 		max = cache_param->rush_exponent;
595 	assert(max > 0);
596 
597 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
598 	CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
599 	CHECK_OBJ_NOTNULL(r, RUSH_MAGIC);
600 	VTAILQ_INIT(&r->reqs);
601 	Lck_AssertHeld(&oh->mtx);
602 	for (i = 0; i < max; i++) {
603 		req = VTAILQ_FIRST(&oh->waitinglist);
604 		if (req == NULL)
605 			break;
606 		CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
607 		wrk->stats->busy_wakeup++;
608 		AZ(req->wrk);
609 		VTAILQ_REMOVE(&oh->waitinglist, req, w_list);
610 		VTAILQ_INSERT_TAIL(&r->reqs, req, w_list);
611 		req->waitinglist = 0;
612 	}
613 }
614 
615 /*---------------------------------------------------------------------
616  * Rush req's that came from waiting list.
617  */
618 
619 static void
hsh_rush2(struct worker * wrk,struct rush * r)620 hsh_rush2(struct worker *wrk, struct rush *r)
621 {
622 	struct req *req;
623 
624 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
625 	CHECK_OBJ_NOTNULL(r, RUSH_MAGIC);
626 
627 	while (!VTAILQ_EMPTY(&r->reqs)) {
628 		req = VTAILQ_FIRST(&r->reqs);
629 		CHECK_OBJ_NOTNULL(req, REQ_MAGIC);
630 		VTAILQ_REMOVE(&r->reqs, req, w_list);
631 		DSL(DBG_WAITINGLIST, req->vsl->wid, "off waiting list");
632 		if (req->transport->reembark != NULL) {
633 			// For ESI includes
634 			req->transport->reembark(wrk, req);
635 		} else {
636 			/*
637 			 * We ignore the queue limits which apply to new
638 			 * requests because if we fail to reschedule there
639 			 * may be vmod_privs to cleanup and we need a proper
640 			 * workerthread for that.
641 			 */
642 			AZ(Pool_Task(req->sp->pool, req->task, TASK_QUEUE_RUSH));
643 		}
644 	}
645 }
646 
647 /*---------------------------------------------------------------------
648  * Purge an entire objhead
649  */
650 
651 unsigned
HSH_Purge(struct worker * wrk,struct objhead * oh,vtim_real ttl_now,vtim_dur ttl,vtim_dur grace,vtim_dur keep)652 HSH_Purge(struct worker *wrk, struct objhead *oh, vtim_real ttl_now,
653     vtim_dur ttl, vtim_dur grace, vtim_dur keep)
654 {
655 	struct objcore *oc, **ocp;
656 	unsigned spc, ospc, nobj, n, n_tot = 0;
657 	int more = 0;
658 
659 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
660 	CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
661 	ospc = WS_ReserveAll(wrk->aws);
662 	assert(ospc >= sizeof *ocp);
663 	/*
664 	 * Because of "soft" purges, there might be oc's in the list that has
665 	 * the OC_F_PURGED flag set. We do not want to let these slip through,
666 	 * so we need to clear the flag before entering the do..while loop.
667 	 */
668 	Lck_Lock(&oh->mtx);
669 	assert(oh->refcnt > 0);
670 	VTAILQ_FOREACH(oc, &oh->objcs, hsh_list) {
671 		CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
672 		assert(oc->objhead == oh);
673 		oc->flags &= ~OC_F_PURGED;
674 	}
675 	Lck_Unlock(&oh->mtx);
676 
677 	do {
678 		more = 0;
679 		spc = ospc;
680 		nobj = 0;
681 		ocp = WS_Reservation(wrk->aws);
682 		Lck_Lock(&oh->mtx);
683 		assert(oh->refcnt > 0);
684 		VTAILQ_FOREACH(oc, &oh->objcs, hsh_list) {
685 			CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
686 			assert(oc->objhead == oh);
687 			if (oc->flags & OC_F_BUSY) {
688 				/*
689 				 * We cannot purge busy objects here, because
690 				 * their owners have special rights to them,
691 				 * and may nuke them without concern for the
692 				 * refcount, which by definition always must
693 				 * be one, so they don't check.
694 				 */
695 				continue;
696 			}
697 			if (oc->flags & OC_F_DYING)
698 				continue;
699 			if (oc->flags & OC_F_PURGED) {
700 				/*
701 				 * We have already called EXP_Rearm on this
702 				 * object, and we do not want to do it
703 				 * again. Plus the space in the ocp array may
704 				 * be limited.
705 				 */
706 				continue;
707 			}
708 			if (spc < sizeof *ocp) {
709 				/* Iterate if aws is not big enough */
710 				more = 1;
711 				break;
712 			}
713 			oc->refcnt++;
714 			spc -= sizeof *ocp;
715 			ocp[nobj++] = oc;
716 			oc->flags |= OC_F_PURGED;
717 		}
718 		Lck_Unlock(&oh->mtx);
719 
720 		for (n = 0; n < nobj; n++) {
721 			oc = ocp[n];
722 			CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
723 			EXP_Rearm(oc, ttl_now, ttl, grace, keep);
724 			(void)HSH_DerefObjCore(wrk, &oc, 0);
725 		}
726 		n_tot += nobj;
727 	} while (more);
728 	WS_Release(wrk->aws, 0);
729 	Pool_PurgeStat(n_tot);
730 	return (n_tot);
731 }
732 
733 /*---------------------------------------------------------------------
734  * Fail an objcore
735  */
736 
737 void
HSH_Fail(struct objcore * oc)738 HSH_Fail(struct objcore *oc)
739 {
740 	struct objhead *oh;
741 
742 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
743 	oh = oc->objhead;
744 	CHECK_OBJ(oh, OBJHEAD_MAGIC);
745 
746 	/*
747 	 * We have to have either a busy bit, so that HSH_Lookup
748 	 * will not consider this oc, or an object hung of the oc
749 	 * so that it can consider it.
750 	 */
751 	assert((oc->flags & OC_F_BUSY) || (oc->stobj->stevedore != NULL));
752 
753 	Lck_Lock(&oh->mtx);
754 	oc->flags |= OC_F_FAILED;
755 	Lck_Unlock(&oh->mtx);
756 }
757 
758 /*---------------------------------------------------------------------
759  * Abandon a fetch we will not need
760  */
761 
762 static void
hsh_abandon(struct objcore * oc)763 hsh_abandon(struct objcore *oc)
764 {
765 	struct objhead *oh;
766 
767 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
768 	oh = oc->objhead;
769 	CHECK_OBJ(oh, OBJHEAD_MAGIC);
770 
771 	Lck_Lock(&oh->mtx);
772 	oc->flags |= OC_F_ABANDON;
773 	Lck_Unlock(&oh->mtx);
774 }
775 
776 /*---------------------------------------------------------------------
777  * Cancel a fetch when the client does not need it any more
778  */
779 
780 void
HSH_Cancel(struct worker * wrk,struct objcore * oc,struct boc * boc)781 HSH_Cancel(struct worker *wrk, struct objcore *oc, struct boc *boc)
782 {
783 	struct boc *bocref = NULL;
784 
785 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
786 
787 	if ((oc->flags & (OC_F_PRIVATE | OC_F_HFM | OC_F_HFP)) == 0)
788 		return;
789 
790 	/*
791 	 * NB: we use two distinct variables to only release the reference if
792 	 * we had to acquire one. The caller-provided boc is optional.
793 	 */
794 	if (boc == NULL)
795 		bocref = boc = HSH_RefBoc(oc);
796 
797 	CHECK_OBJ_ORNULL(boc, BOC_MAGIC);
798 
799 	if (oc->flags & OC_F_HFP)
800 		AN(oc->flags & OC_F_HFM);
801 
802 	if (boc != NULL) {
803 		hsh_abandon(oc);
804 		ObjWaitState(oc, BOS_FINISHED);
805 	}
806 
807 	if (bocref != NULL)
808 		HSH_DerefBoc(wrk, oc);
809 
810 	ObjSlim(wrk, oc);
811 }
812 
813 /*---------------------------------------------------------------------
814  * Unbusy an objcore when the object is completely fetched.
815  */
816 
817 void
HSH_Unbusy(struct worker * wrk,struct objcore * oc)818 HSH_Unbusy(struct worker *wrk, struct objcore *oc)
819 {
820 	struct objhead *oh;
821 	struct rush rush;
822 
823 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
824 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
825 	oh = oc->objhead;
826 	CHECK_OBJ(oh, OBJHEAD_MAGIC);
827 	INIT_OBJ(&rush, RUSH_MAGIC);
828 
829 	AN(oc->stobj->stevedore);
830 	AN(oc->flags & OC_F_BUSY);
831 	assert(oh->refcnt > 0);
832 	assert(oc->refcnt > 0);
833 
834 	if (!(oc->flags & OC_F_PRIVATE)) {
835 		BAN_NewObjCore(oc);
836 		AN(oc->ban);
837 	}
838 
839 	/* XXX: pretouch neighbors on oh->objcs to prevent page-on under mtx */
840 	Lck_Lock(&oh->mtx);
841 	assert(oh->refcnt > 0);
842 	assert(oc->refcnt > 0);
843 	if (!(oc->flags & OC_F_PRIVATE))
844 		EXP_RefNewObjcore(oc); /* Takes a ref for expiry */
845 	/* XXX: strictly speaking, we should sort in Date: order. */
846 	VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
847 	VTAILQ_INSERT_HEAD(&oh->objcs, oc, hsh_list);
848 	oc->flags &= ~OC_F_BUSY;
849 	if (!VTAILQ_EMPTY(&oh->waitinglist)) {
850 		assert(oh->refcnt > 1);
851 		hsh_rush1(wrk, oh, &rush, HSH_RUSH_POLICY);
852 	}
853 	Lck_Unlock(&oh->mtx);
854 	EXP_Insert(wrk, oc); /* Does nothing unless EXP_RefNewObjcore was
855 			      * called */
856 	hsh_rush2(wrk, &rush);
857 }
858 
859 /*====================================================================
860  * HSH_Kill()
861  *
862  * It's dead Jim, kick it...
863  */
864 
865 void
HSH_Kill(struct objcore * oc)866 HSH_Kill(struct objcore *oc)
867 {
868 
869 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
870 	CHECK_OBJ_NOTNULL(oc->objhead, OBJHEAD_MAGIC);
871 
872 	Lck_Lock(&oc->objhead->mtx);
873 	oc->flags |= OC_F_DYING;
874 	Lck_Unlock(&oc->objhead->mtx);
875 	EXP_Remove(oc);
876 }
877 
878 /*====================================================================
879  * HSH_Snipe()
880  *
881  * If objcore is idle, gain a ref and mark it dead.
882  */
883 
884 int
HSH_Snipe(const struct worker * wrk,struct objcore * oc)885 HSH_Snipe(const struct worker *wrk, struct objcore *oc)
886 {
887 	int retval = 0;
888 
889 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
890 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
891 	CHECK_OBJ_NOTNULL(oc->objhead, OBJHEAD_MAGIC);
892 
893 	if (oc->refcnt == 1 && !Lck_Trylock(&oc->objhead->mtx)) {
894 		if (oc->refcnt == 1 && !(oc->flags & OC_F_DYING)) {
895 			oc->flags |= OC_F_DYING;
896 			oc->refcnt++;
897 			retval = 1;
898 		}
899 		Lck_Unlock(&oc->objhead->mtx);
900 	}
901 	if (retval)
902 		EXP_Remove(oc);
903 	return (retval);
904 }
905 
906 
907 /*---------------------------------------------------------------------
908  * Gain a reference on an objcore
909  */
910 
911 void
HSH_Ref(struct objcore * oc)912 HSH_Ref(struct objcore *oc)
913 {
914 	struct objhead *oh;
915 
916 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
917 	oh = oc->objhead;
918 	CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
919 	Lck_Lock(&oh->mtx);
920 	assert(oc->refcnt > 0);
921 	oc->refcnt++;
922 	Lck_Unlock(&oh->mtx);
923 }
924 
925 /*---------------------------------------------------------------------
926  * Gain a reference on the busyobj, if the objcore has one
927  */
928 
929 struct boc *
HSH_RefBoc(const struct objcore * oc)930 HSH_RefBoc(const struct objcore *oc)
931 {
932 	struct objhead *oh;
933 	struct boc *boc;
934 
935 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
936 	oh = oc->objhead;
937 	CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
938 	if (oc->boc == NULL)
939 		return (NULL);
940 	Lck_Lock(&oh->mtx);
941 	assert(oc->refcnt > 0);
942 	boc = oc->boc;
943 	CHECK_OBJ_ORNULL(boc, BOC_MAGIC);
944 	if (boc != NULL) {
945 		assert(boc->refcount > 0);
946 		if (boc->state < BOS_FINISHED)
947 			boc->refcount++;
948 		else
949 			boc = NULL;
950 	}
951 	Lck_Unlock(&oh->mtx);
952 	return (boc);
953 }
954 
955 void
HSH_DerefBoc(struct worker * wrk,struct objcore * oc)956 HSH_DerefBoc(struct worker *wrk, struct objcore *oc)
957 {
958 	struct boc *boc;
959 	unsigned r;
960 
961 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
962 	CHECK_OBJ_NOTNULL(oc, OBJCORE_MAGIC);
963 	boc = oc->boc;
964 	CHECK_OBJ_NOTNULL(boc, BOC_MAGIC);
965 	Lck_Lock(&oc->objhead->mtx);
966 	assert(oc->refcnt > 0);
967 	assert(boc->refcount > 0);
968 	r = --boc->refcount;
969 	if (r == 0)
970 		oc->boc = NULL;
971 	Lck_Unlock(&oc->objhead->mtx);
972 	if (r == 0)
973 		ObjBocDone(wrk, oc, &boc);
974 }
975 
976 /*--------------------------------------------------------------------
977  * Dereference objcore
978  *
979  * Returns zero if target was destroyed.
980  */
981 
982 int
HSH_DerefObjCore(struct worker * wrk,struct objcore ** ocp,int rushmax)983 HSH_DerefObjCore(struct worker *wrk, struct objcore **ocp, int rushmax)
984 {
985 	struct objcore *oc;
986 	struct objhead *oh;
987 	struct rush rush;
988 	int r;
989 
990 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
991 	TAKE_OBJ_NOTNULL(oc, ocp, OBJCORE_MAGIC);
992 	assert(oc->refcnt > 0);
993 	INIT_OBJ(&rush, RUSH_MAGIC);
994 
995 	oh = oc->objhead;
996 	CHECK_OBJ_NOTNULL(oh, OBJHEAD_MAGIC);
997 
998 	Lck_Lock(&oh->mtx);
999 	assert(oh->refcnt > 0);
1000 	r = --oc->refcnt;
1001 	if (!r)
1002 		VTAILQ_REMOVE(&oh->objcs, oc, hsh_list);
1003 	if (!VTAILQ_EMPTY(&oh->waitinglist)) {
1004 		assert(oh->refcnt > 1);
1005 		hsh_rush1(wrk, oh, &rush, rushmax);
1006 	}
1007 	Lck_Unlock(&oh->mtx);
1008 	hsh_rush2(wrk, &rush);
1009 	if (r != 0)
1010 		return (r);
1011 
1012 	AZ(oc->exp_flags);
1013 
1014 	BAN_DestroyObj(oc);
1015 	AZ(oc->ban);
1016 
1017 	if (oc->stobj->stevedore != NULL)
1018 		ObjFreeObj(wrk, oc);
1019 	ObjDestroy(wrk, &oc);
1020 
1021 	/* Drop our ref on the objhead */
1022 	assert(oh->refcnt > 0);
1023 	(void)hsh_deref_objhead(wrk, &oh);
1024 	return (0);
1025 }
1026 
1027 static int
hsh_deref_objhead_unlock(struct worker * wrk,struct objhead ** poh,int max)1028 hsh_deref_objhead_unlock(struct worker *wrk, struct objhead **poh, int max)
1029 {
1030 	struct objhead *oh;
1031 	struct rush rush;
1032 	int r;
1033 
1034 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
1035 	TAKE_OBJ_NOTNULL(oh, poh, OBJHEAD_MAGIC);
1036 
1037 	Lck_AssertHeld(&oh->mtx);
1038 
1039 	if (oh == private_oh) {
1040 		assert(VTAILQ_EMPTY(&oh->waitinglist));
1041 		assert(oh->refcnt > 1);
1042 		oh->refcnt--;
1043 		Lck_Unlock(&oh->mtx);
1044 		return (1);
1045 	}
1046 
1047 	INIT_OBJ(&rush, RUSH_MAGIC);
1048 	if (!VTAILQ_EMPTY(&oh->waitinglist)) {
1049 		assert(oh->refcnt > 1);
1050 		hsh_rush1(wrk, oh, &rush, max);
1051 	}
1052 
1053 	if (oh->refcnt == 1)
1054 		assert(VTAILQ_EMPTY(&oh->waitinglist));
1055 
1056 	assert(oh->refcnt > 0);
1057 	r = hash->deref(wrk, oh); /* Unlocks oh->mtx */
1058 	hsh_rush2(wrk, &rush);
1059 	return (r);
1060 }
1061 
1062 static int
hsh_deref_objhead(struct worker * wrk,struct objhead ** poh)1063 hsh_deref_objhead(struct worker *wrk, struct objhead **poh)
1064 {
1065 	struct objhead *oh;
1066 
1067 	CHECK_OBJ_NOTNULL(wrk, WORKER_MAGIC);
1068 	TAKE_OBJ_NOTNULL(oh, poh, OBJHEAD_MAGIC);
1069 
1070 	Lck_Lock(&oh->mtx);
1071 	return (hsh_deref_objhead_unlock(wrk, &oh, 0));
1072 }
1073 
1074 void
HSH_Init(const struct hash_slinger * slinger)1075 HSH_Init(const struct hash_slinger *slinger)
1076 {
1077 
1078 	assert(DIGEST_LEN == VSHA256_LEN);	/* avoid #include pollution */
1079 	hash = slinger;
1080 	if (hash->start != NULL)
1081 		hash->start();
1082 	private_oh = hsh_newobjhead();
1083 	private_oh->refcnt = 1;
1084 }
1085