1 /*-
2 * Copyright (c) 2008-2011 Varnish Software AS
3 * All rights reserved.
4 *
5 * Author: Poul-Henning Kamp <phk@phk.freebsd.dk>
6 *
7 * SPDX-License-Identifier: BSD-2-Clause
8 *
9 * Redistribution and use in source and binary forms, with or without
10 * modification, are permitted provided that the following conditions
11 * are met:
12 * 1. Redistributions of source code must retain the above copyright
13 * notice, this list of conditions and the following disclaimer.
14 * 2. Redistributions in binary form must reproduce the above copyright
15 * notice, this list of conditions and the following disclaimer in the
16 * documentation and/or other materials provided with the distribution.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28 * SUCH DAMAGE.
29 *
30 * Persistent storage method
31 *
32 * XXX: Before we start the client or maybe after it stops, we should give the
33 * XXX: stevedores a chance to examine their storage for consistency.
34 *
35 * XXX: Do we ever free the LRU-lists ?
36 */
37
38 /*
39 *
40 * Overall layout:
41 *
42 * struct smp_ident; Identification and geometry
43 * sha256[...] checksum of same
44 *
45 * struct smp_sign;
46 * banspace_1; First ban-space
47 * sha256[...] checksum of same
48 *
49 * struct smp_sign;
50 * banspace_2; Second ban-space
51 * sha256[...] checksum of same
52 *
53 * struct smp_sign;
54 * struct smp_segment_1[N]; First Segment table
55 * sha256[...] checksum of same
56 *
57 * struct smp_sign;
58 * struct smp_segment_2[N]; Second Segment table
59 * sha256[...] checksum of same
60 *
61 * N segments {
62 * struct smp_sign;
63 * struct smp_object[M] Objects in segment
64 * sha256[...] checksum of same
65 * objspace
66 * }
67 *
68 */
69
70 /*
71 * The identblock is located in the first sector of the storage space.
72 * This is written once and not subsequently modified in normal operation.
73 * It is immediately followed by a SHA256sum of the structure, as stored.
74 */
75
76 struct smp_ident {
77 char ident[32]; /* Human readable ident
78 * so people and programs
79 * can tell what the file
80 * or device contains.
81 */
82
83 uint32_t byte_order; /* 0x12345678 */
84
85 uint32_t size; /* sizeof(struct smp_ident) */
86
87 uint32_t major_version;
88
89 uint32_t unique;
90
91 uint32_t align; /* alignment in silo */
92
93 uint32_t granularity; /* smallest ... in bytes */
94
95 uint64_t mediasize; /* ... in bytes */
96
97 uint64_t stuff[6]; /* pointers to stuff */
98 #define SMP_BAN1_STUFF 0
99 #define SMP_BAN2_STUFF 1
100 #define SMP_SEG1_STUFF 2
101 #define SMP_SEG2_STUFF 3
102 #define SMP_SPC_STUFF 4
103 #define SMP_END_STUFF 5
104 };
105
106 /*
107 * The size of smp_ident should be fixed and constant across all platforms.
108 * We enforce that with the following #define and an assert in smp_init()
109 */
110 #define SMP_IDENT_SIZE 112
111
112 #define SMP_IDENT_STRING "Varnish Persistent Storage Silo"
113
114 /*
115 * This is used to sign various bits on the disk.
116 */
117
118 struct smp_sign {
119 char ident[8];
120 uint32_t unique;
121 uint64_t mapped;
122 /* The length field is the length of the signed data only
123 * (does not include struct smp_sign) */
124 uint64_t length; /* NB: Must be last */
125 };
126
127 #define SMP_SIGN_SPACE (sizeof(struct smp_sign) + VSHA256_LEN)
128
129 /*
130 * A segment pointer.
131 */
132
133 struct smp_segptr {
134 uint64_t offset; /* rel to silo */
135 uint64_t length; /* rel to offset */
136 uint64_t objlist; /* rel to silo */
137 uint32_t lobjlist; /* len of objlist */
138 };
139
140 /*
141 * An object descriptor
142 *
143 * A positive ttl is obj.ttl with obj.grace being NAN
144 * A negative ttl is - (obj.ttl + obj.grace)
145 */
146
147 struct smp_object {
148 uint8_t hash[32]; /* really: DIGEST_LEN */
149 double t_origin;
150 float ttl;
151 float grace;
152 float keep;
153 uint32_t __filler__; /* -> align/8 on 32bit */
154 double ban;
155 uint64_t ptr; /* rel to silo */
156 };
157
158 #define ASSERT_SILO_THREAD(sc) \
159 do {assert(pthread_equal(pthread_self(), (sc)->thread));} while (0)
160
161 /*
162 * Context for a signature.
163 *
164 * A signature is a sequence of bytes in the silo, signed by a SHA256 hash
165 * which follows the bytes.
166 *
167 * The context structure allows us to append to a signature without
168 * recalculating the entire SHA256 hash.
169 */
170
171 struct smp_signctx {
172 struct smp_sign *ss;
173 struct VSHA256Context ctx;
174 uint32_t unique;
175 const char *id;
176 };
177
178 /*
179 * A space wrapped by a signature
180 *
181 * A signspace is a chunk of the silo that is wrapped by a
182 * signature. It has attributes for size, so range checking can be
183 * performed.
184 *
185 */
186
187 struct smp_signspace {
188 struct smp_signctx ctx;
189 uint8_t *start;
190 uint64_t size;
191 };
192
193 struct smp_sc;
194
195 /* XXX: name confusion with on-media version ? */
196 struct smp_seg {
197 unsigned magic;
198 #define SMP_SEG_MAGIC 0x45c61895
199
200 struct smp_sc *sc;
201 VTAILQ_HEAD(,objcore) objcores;
202
203 VTAILQ_ENTRY(smp_seg) list; /* on smp_sc.smp_segments */
204
205 struct smp_segptr p;
206
207 unsigned flags;
208 #define SMP_SEG_MUSTLOAD (1 << 0)
209 #define SMP_SEG_LOADED (1 << 1)
210
211 uint32_t nobj; /* Number of objects */
212 uint32_t nalloc; /* Allocations */
213 uint32_t nfixed; /* How many fixed objects */
214
215 /* Only for open segment */
216 struct smp_object *objs; /* objdesc array */
217 struct smp_signctx ctx[1];
218 };
219
220 VTAILQ_HEAD(smp_seghead, smp_seg);
221
222 struct smp_sc {
223 unsigned magic;
224 #define SMP_SC_MAGIC 0x7b73af0a
225 struct stevedore *parent;
226
227 pthread_t bgthread;
228 unsigned flags;
229 #define SMP_SC_LOADED (1 << 0)
230 #define SMP_SC_STOP (1 << 1)
231
232 const struct stevedore *stevedore;
233 int fd;
234 const char *filename;
235 uint64_t mediasize;
236 uintptr_t align;
237 uint32_t granularity;
238 uint32_t unique;
239
240 uint8_t *base;
241
242 struct smp_ident *ident;
243
244 struct smp_seghead segments;
245 struct smp_seg *cur_seg;
246 uint64_t next_bot; /* next alloc address bottom */
247 uint64_t next_top; /* next alloc address top */
248
249 uint64_t free_offset;
250
251 pthread_t thread;
252
253 VTAILQ_ENTRY(smp_sc) list;
254
255 struct smp_signctx idn;
256 struct smp_signspace ban1;
257 struct smp_signspace ban2;
258 struct smp_signspace seg1;
259 struct smp_signspace seg2;
260
261 struct lock mtx;
262
263 /* Cleaner metrics */
264
265 unsigned min_nseg;
266 unsigned aim_nseg;
267 unsigned max_nseg;
268
269 uint64_t min_segl;
270 uint64_t aim_segl;
271 uint64_t max_segl;
272
273 uint64_t free_reserve;
274 };
275
276 /*--------------------------------------------------------------------*/
277
278 /* Pointer round up/down & assert */
279 #define PRNUP(sc, x) ((void*)RUP2((uintptr_t)(x), sc->align))
280
281 /* Integer round up/down & assert */
282 #define IRNDN(sc, x) RDN2(x, sc->align)
283 #define IRNUP(sc, x) RUP2(x, sc->align)
284 #define IASSERTALIGN(sc, x) assert(IRNDN(sc, x) == (x))
285
286 /*--------------------------------------------------------------------*/
287
288 #define ASSERT_PTR_IN_SILO(sc, ptr) \
289 assert((const void*)(ptr) >= (const void*)((sc)->base) && \
290 (const void*)(ptr) < (const void *)((sc)->base + (sc)->mediasize))
291
292 /*--------------------------------------------------------------------*/
293
294 #define SIGN_DATA(ctx) ((void *)((ctx)->ss + 1))
295 #define SIGN_END(ctx) ((void *)((int8_t *)SIGN_DATA(ctx) + (ctx)->ss->length))
296
297 #define SIGNSPACE_DATA(spc) (SIGN_DATA(&(spc)->ctx))
298 #define SIGNSPACE_FRONT(spc) (SIGN_END(&(spc)->ctx))
299 #define SIGNSPACE_LEN(spc) ((spc)->ctx.ss->length)
300 #define SIGNSPACE_FREE(spc) ((spc)->size - SIGNSPACE_LEN(spc))
301
302 /* storage_persistent_mgt.c */
303
304 void smp_mgt_init(struct stevedore *parent, int ac, char * const *av);
305
306 /* storage_persistent_silo.c */
307
308 void smp_load_seg(struct worker *, const struct smp_sc *sc, struct smp_seg *sg);
309 void smp_new_seg(struct smp_sc *sc);
310 void smp_close_seg(struct smp_sc *sc, struct smp_seg *sg);
311 void smp_init_oc(struct objcore *oc, struct smp_seg *sg, unsigned objidx);
312 void smp_save_segs(struct smp_sc *sc);
313 sml_getobj_f smp_sml_getobj;
314 void smp_oc_objfree(struct worker *, struct objcore *);
315 obj_event_f smp_oc_event;
316
317 /* storage_persistent_subr.c */
318
319 void smp_def_sign(const struct smp_sc *sc, struct smp_signctx *ctx,
320 uint64_t off, const char *id);
321 int smp_chk_sign(struct smp_signctx *ctx);
322 void smp_reset_sign(struct smp_signctx *ctx);
323 void smp_sync_sign(const struct smp_signctx *ctx);
324
325 int smp_chk_signspace(struct smp_signspace *spc);
326 void smp_append_signspace(struct smp_signspace *spc, uint32_t len);
327 void smp_reset_signspace(struct smp_signspace *spc);
328 void smp_copy_signspace(struct smp_signspace *dst,
329 const struct smp_signspace *src);
330
331 void smp_newsilo(struct smp_sc *sc);
332 int smp_valid_silo(struct smp_sc *sc);
333
334 /*--------------------------------------------------------------------
335 * Caculate payload of some stuff
336 */
337
338 static inline uint64_t
smp_stuff_len(const struct smp_sc * sc,unsigned stuff)339 smp_stuff_len(const struct smp_sc *sc, unsigned stuff)
340 {
341 uint64_t l;
342
343 assert(stuff < SMP_END_STUFF);
344 l = sc->ident->stuff[stuff + 1] - sc->ident->stuff[stuff];
345 l -= SMP_SIGN_SPACE;
346 return (l);
347 }
348
349 static inline uint64_t
smp_segend(const struct smp_seg * sg)350 smp_segend(const struct smp_seg *sg)
351 {
352
353 return (sg->p.offset + sg->p.length);
354 }
355
356 static inline uint64_t
smp_spaceleft(const struct smp_sc * sc,const struct smp_seg * sg)357 smp_spaceleft(const struct smp_sc *sc, const struct smp_seg *sg)
358 {
359
360 IASSERTALIGN(sc, sc->next_bot);
361 assert(sc->next_bot <= sc->next_top - IRNUP(sc, SMP_SIGN_SPACE));
362 assert(sc->next_bot >= sg->p.offset);
363 assert(sc->next_top < sg->p.offset + sg->p.length);
364 return ((sc->next_top - sc->next_bot) - IRNUP(sc, SMP_SIGN_SPACE));
365 }
366