1 /*-
2  * Copyright (c) 2008-2011 Varnish Software AS
3  * All rights reserved.
4  *
5  * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
6  *
7  * SPDX-License-Identifier: BSD-2-Clause
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  * Persistent storage method
31  *
32  * XXX: Before we start the client or maybe after it stops, we should give the
33  * XXX: stevedores a chance to examine their storage for consistency.
34  *
35  * XXX: Do we ever free the LRU-lists ?
36  */
37 
38 /*
39  *
40  * Overall layout:
41  *
42  *	struct smp_ident;		Identification and geometry
43  *	sha256[...]			checksum of same
44  *
45  *	struct smp_sign;
46  *	banspace_1;			First ban-space
47  *	sha256[...]			checksum of same
48  *
49  *	struct smp_sign;
50  *	banspace_2;			Second ban-space
51  *	sha256[...]			checksum of same
52  *
53  *	struct smp_sign;
54  *	struct smp_segment_1[N];	First Segment table
55  *	sha256[...]			checksum of same
56  *
57  *	struct smp_sign;
58  *	struct smp_segment_2[N];	Second Segment table
59  *	sha256[...]			checksum of same
60  *
61  *	N segments {
62  *		struct smp_sign;
63  *		struct smp_object[M]	Objects in segment
64  *		sha256[...]		checksum of same
65  *		objspace
66  *	}
67  *
68  */
69 
70 /*
71  * The identblock is located in the first sector of the storage space.
72  * This is written once and not subsequently modified in normal operation.
73  * It is immediately followed by a SHA256sum of the structure, as stored.
74  */
75 
76 struct smp_ident {
77 	char			ident[32];	/* Human readable ident
78 						 * so people and programs
79 						 * can tell what the file
80 						 * or device contains.
81 						 */
82 
83 	uint32_t		byte_order;	/* 0x12345678 */
84 
85 	uint32_t		size;		/* sizeof(struct smp_ident) */
86 
87 	uint32_t		major_version;
88 
89 	uint32_t		unique;
90 
91 	uint32_t		align;		/* alignment in silo */
92 
93 	uint32_t		granularity;	/* smallest ... in bytes */
94 
95 	uint64_t		mediasize;	/* ... in bytes */
96 
97 	uint64_t		stuff[6];	/* pointers to stuff */
98 #define	SMP_BAN1_STUFF		0
99 #define	SMP_BAN2_STUFF		1
100 #define	SMP_SEG1_STUFF		2
101 #define	SMP_SEG2_STUFF		3
102 #define	SMP_SPC_STUFF		4
103 #define	SMP_END_STUFF		5
104 };
105 
106 /*
107  * The size of smp_ident should be fixed and constant across all platforms.
108  * We enforce that with the following #define and an assert in smp_init()
109  */
110 #define SMP_IDENT_SIZE		112
111 
112 #define SMP_IDENT_STRING	"Varnish Persistent Storage Silo"
113 
114 /*
115  * This is used to sign various bits on the disk.
116  */
117 
118 struct smp_sign {
119 	char			ident[8];
120 	uint32_t		unique;
121 	uint64_t		mapped;
122 	/* The length field is the length of the signed data only
123 	 * (does not include struct smp_sign) */
124 	uint64_t		length;		/* NB: Must be last */
125 };
126 
127 #define SMP_SIGN_SPACE		(sizeof(struct smp_sign) + VSHA256_LEN)
128 
129 /*
130  * A segment pointer.
131  */
132 
133 struct smp_segptr {
134 	uint64_t		offset;		/* rel to silo */
135 	uint64_t		length;		/* rel to offset */
136 	uint64_t		objlist;	/* rel to silo */
137 	uint32_t		lobjlist;	/* len of objlist */
138 };
139 
140 /*
141  * An object descriptor
142  *
143  * A positive ttl is obj.ttl with obj.grace being NAN
144  * A negative ttl is - (obj.ttl + obj.grace)
145  */
146 
147 struct smp_object {
148 	uint8_t			hash[32];	/* really: DIGEST_LEN */
149 	double			t_origin;
150 	float			ttl;
151 	float			grace;
152 	float			keep;
153 	uint32_t		__filler__;	/* -> align/8 on 32bit */
154 	double			ban;
155 	uint64_t		ptr;		/* rel to silo */
156 };
157 
158 #define ASSERT_SILO_THREAD(sc) \
159     do {assert(pthread_equal(pthread_self(), (sc)->thread));} while (0)
160 
161 /*
162  * Context for a signature.
163  *
164  * A signature is a sequence of bytes in the silo, signed by a SHA256 hash
165  * which follows the bytes.
166  *
167  * The context structure allows us to append to a signature without
168  * recalculating the entire SHA256 hash.
169  */
170 
171 struct smp_signctx {
172 	struct smp_sign		*ss;
173 	struct VSHA256Context	ctx;
174 	uint32_t		unique;
175 	const char		*id;
176 };
177 
178 /*
179  * A space wrapped by a signature
180  *
181  * A signspace is a chunk of the silo that is wrapped by a
182  * signature. It has attributes for size, so range checking can be
183  * performed.
184  *
185  */
186 
187 struct smp_signspace {
188 	struct smp_signctx	ctx;
189 	uint8_t			*start;
190 	uint64_t		size;
191 };
192 
193 struct smp_sc;
194 
195 /* XXX: name confusion with on-media version ? */
196 struct smp_seg {
197 	unsigned		magic;
198 #define SMP_SEG_MAGIC		0x45c61895
199 
200 	struct smp_sc		*sc;
201 	VTAILQ_HEAD(,objcore)	objcores;
202 
203 	VTAILQ_ENTRY(smp_seg)	list;		/* on smp_sc.smp_segments */
204 
205 	struct smp_segptr	p;
206 
207 	unsigned		flags;
208 #define SMP_SEG_MUSTLOAD	(1 << 0)
209 #define SMP_SEG_LOADED		(1 << 1)
210 
211 	uint32_t		nobj;		/* Number of objects */
212 	uint32_t		nalloc;		/* Allocations */
213 	uint32_t		nfixed;		/* How many fixed objects */
214 
215 	/* Only for open segment */
216 	struct smp_object	*objs;		/* objdesc array */
217 	struct smp_signctx	ctx[1];
218 };
219 
220 VTAILQ_HEAD(smp_seghead, smp_seg);
221 
222 struct smp_sc {
223 	unsigned		magic;
224 #define SMP_SC_MAGIC		0x7b73af0a
225 	struct stevedore	*parent;
226 
227 	pthread_t		bgthread;
228 	unsigned		flags;
229 #define SMP_SC_LOADED		(1 << 0)
230 #define SMP_SC_STOP		(1 << 1)
231 
232 	const struct stevedore	*stevedore;
233 	int			fd;
234 	const char		*filename;
235 	uint64_t		mediasize;
236 	uintptr_t		align;
237 	uint32_t		granularity;
238 	uint32_t		unique;
239 
240 	uint8_t			*base;
241 
242 	struct smp_ident	*ident;
243 
244 	struct smp_seghead	segments;
245 	struct smp_seg		*cur_seg;
246 	uint64_t		next_bot;	/* next alloc address bottom */
247 	uint64_t		next_top;	/* next alloc address top */
248 
249 	uint64_t		free_offset;
250 
251 	pthread_t		thread;
252 
253 	VTAILQ_ENTRY(smp_sc)	list;
254 
255 	struct smp_signctx	idn;
256 	struct smp_signspace	ban1;
257 	struct smp_signspace	ban2;
258 	struct smp_signspace	seg1;
259 	struct smp_signspace	seg2;
260 
261 	struct lock		mtx;
262 
263 	/* Cleaner metrics */
264 
265 	unsigned		min_nseg;
266 	unsigned		aim_nseg;
267 	unsigned		max_nseg;
268 
269 	uint64_t		min_segl;
270 	uint64_t		aim_segl;
271 	uint64_t		max_segl;
272 
273 	uint64_t		free_reserve;
274 };
275 
276 /*--------------------------------------------------------------------*/
277 
278 /* Pointer round up/down & assert */
279 #define PRNUP(sc, x)	((void*)RUP2((uintptr_t)(x), sc->align))
280 
281 /* Integer round up/down & assert */
282 #define IRNDN(sc, x)	RDN2(x, sc->align)
283 #define IRNUP(sc, x)	RUP2(x, sc->align)
284 #define IASSERTALIGN(sc, x)	assert(IRNDN(sc, x) == (x))
285 
286 /*--------------------------------------------------------------------*/
287 
288 #define ASSERT_PTR_IN_SILO(sc, ptr) \
289 	assert((const void*)(ptr) >= (const void*)((sc)->base) && \
290 	    (const void*)(ptr) < (const void *)((sc)->base + (sc)->mediasize))
291 
292 /*--------------------------------------------------------------------*/
293 
294 #define SIGN_DATA(ctx)	((void *)((ctx)->ss + 1))
295 #define SIGN_END(ctx)	((void *)((int8_t *)SIGN_DATA(ctx) + (ctx)->ss->length))
296 
297 #define SIGNSPACE_DATA(spc)	(SIGN_DATA(&(spc)->ctx))
298 #define SIGNSPACE_FRONT(spc)	(SIGN_END(&(spc)->ctx))
299 #define SIGNSPACE_LEN(spc)	((spc)->ctx.ss->length)
300 #define SIGNSPACE_FREE(spc)	((spc)->size - SIGNSPACE_LEN(spc))
301 
302 /* storage_persistent_mgt.c */
303 
304 void smp_mgt_init(struct stevedore *parent, int ac, char * const *av);
305 
306 /* storage_persistent_silo.c */
307 
308 void smp_load_seg(struct worker *, const struct smp_sc *sc, struct smp_seg *sg);
309 void smp_new_seg(struct smp_sc *sc);
310 void smp_close_seg(struct smp_sc *sc, struct smp_seg *sg);
311 void smp_init_oc(struct objcore *oc, struct smp_seg *sg, unsigned objidx);
312 void smp_save_segs(struct smp_sc *sc);
313 sml_getobj_f smp_sml_getobj;
314 void smp_oc_objfree(struct worker *, struct objcore *);
315 obj_event_f smp_oc_event;
316 
317 /* storage_persistent_subr.c */
318 
319 void smp_def_sign(const struct smp_sc *sc, struct smp_signctx *ctx,
320     uint64_t off, const char *id);
321 int smp_chk_sign(struct smp_signctx *ctx);
322 void smp_reset_sign(struct smp_signctx *ctx);
323 void smp_sync_sign(const struct smp_signctx *ctx);
324 
325 int smp_chk_signspace(struct smp_signspace *spc);
326 void smp_append_signspace(struct smp_signspace *spc, uint32_t len);
327 void smp_reset_signspace(struct smp_signspace *spc);
328 void smp_copy_signspace(struct smp_signspace *dst,
329 			const struct smp_signspace *src);
330 
331 void smp_newsilo(struct smp_sc *sc);
332 int smp_valid_silo(struct smp_sc *sc);
333 
334 /*--------------------------------------------------------------------
335  * Caculate payload of some stuff
336  */
337 
338 static inline uint64_t
smp_stuff_len(const struct smp_sc * sc,unsigned stuff)339 smp_stuff_len(const struct smp_sc *sc, unsigned stuff)
340 {
341 	uint64_t l;
342 
343 	assert(stuff < SMP_END_STUFF);
344 	l = sc->ident->stuff[stuff + 1] - sc->ident->stuff[stuff];
345 	l -= SMP_SIGN_SPACE;
346 	return (l);
347 }
348 
349 static inline uint64_t
smp_segend(const struct smp_seg * sg)350 smp_segend(const struct smp_seg *sg)
351 {
352 
353 	return (sg->p.offset + sg->p.length);
354 }
355 
356 static inline uint64_t
smp_spaceleft(const struct smp_sc * sc,const struct smp_seg * sg)357 smp_spaceleft(const struct smp_sc *sc, const struct smp_seg *sg)
358 {
359 
360 	IASSERTALIGN(sc, sc->next_bot);
361 	assert(sc->next_bot <= sc->next_top - IRNUP(sc, SMP_SIGN_SPACE));
362 	assert(sc->next_bot >= sg->p.offset);
363 	assert(sc->next_top < sg->p.offset + sg->p.length);
364 	return ((sc->next_top - sc->next_bot) - IRNUP(sc, SMP_SIGN_SPACE));
365 }
366