1 /** @file
2
3 A brief file description
4
5 @section license License
6
7 Licensed to the Apache Software Foundation (ASF) under one
8 or more contributor license agreements. See the NOTICE file
9 distributed with this work for additional information
10 regarding copyright ownership. The ASF licenses this file
11 to you under the Apache License, Version 2.0 (the
12 "License"); you may not use this file except in compliance
13 with the License. You may obtain a copy of the License at
14
15 http://www.apache.org/licenses/LICENSE-2.0
16
17 Unless required by applicable law or agreed to in writing, software
18 distributed under the License is distributed on an "AS IS" BASIS,
19 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
20 See the License for the specific language governing permissions and
21 limitations under the License.
22 */
23
24 #pragma once
25
26 #include "P_CacheHttp.h"
27
28 struct Vol;
29 struct InterimCacheVol;
30 struct CacheVC;
31
32 /*
33 Directory layout
34 */
35
36 // Constants
37
38 #define DIR_TAG_WIDTH 12
39 #define DIR_MASK_TAG(_t) ((_t) & ((1 << DIR_TAG_WIDTH) - 1))
40 #define SIZEOF_DIR 10
41 #define ESTIMATED_OBJECT_SIZE 8000
42
43 #define MAX_DIR_SEGMENTS (32 * (1 << 16))
44 #define DIR_DEPTH 4
45 #define MAX_ENTRIES_PER_SEGMENT (1 << 16)
46 #define MAX_BUCKETS_PER_SEGMENT (MAX_ENTRIES_PER_SEGMENT / DIR_DEPTH)
47 #define DIR_SIZE_WIDTH 6
48 #define DIR_BLOCK_SIZES 4
49 #define DIR_BLOCK_SHIFT(_i) (3 * (_i))
50 #define DIR_BLOCK_SIZE(_i) (CACHE_BLOCK_SIZE << DIR_BLOCK_SHIFT(_i))
51 #define DIR_SIZE_WITH_BLOCK(_i) ((1 << DIR_SIZE_WIDTH) * DIR_BLOCK_SIZE(_i))
52 #define DIR_OFFSET_BITS 40
53 #define DIR_OFFSET_MAX ((((off_t)1) << DIR_OFFSET_BITS) - 1)
54
55 #define SYNC_MAX_WRITE (2 * 1024 * 1024)
56 #define SYNC_DELAY HRTIME_MSECONDS(500)
57 #define DO_NOT_REMOVE_THIS 0
58
59 // Debugging Options
60
61 //#define DO_CHECK_DIR_FAST
62 //#define DO_CHECK_DIR
63
64 // Macros
65
66 #ifdef DO_CHECK_DIR
67 #define CHECK_DIR(_d) ink_assert(check_dir(_d))
68 #else
69 #define CHECK_DIR(_d) ((void)0)
70 #endif
71
72 #define dir_index(_e, _i) ((Dir *)((char *)(_e)->dir + (SIZEOF_DIR * (_i))))
73 #define dir_assign(_e, _x) \
74 do { \
75 (_e)->w[0] = (_x)->w[0]; \
76 (_e)->w[1] = (_x)->w[1]; \
77 (_e)->w[2] = (_x)->w[2]; \
78 (_e)->w[3] = (_x)->w[3]; \
79 (_e)->w[4] = (_x)->w[4]; \
80 } while (0)
81 #define dir_assign_data(_e, _x) \
82 do { \
83 unsigned short next = dir_next(_e); \
84 dir_assign(_e, _x); \
85 dir_set_next(_e, next); \
86 } while (0)
87 // entry is valid
88 #define dir_valid(_d, _e) (_d->header->phase == dir_phase(_e) ? _d->vol_in_phase_valid(_e) : _d->vol_out_of_phase_valid(_e))
89 // entry is valid and outside of write aggregation region
90 #define dir_agg_valid(_d, _e) (_d->header->phase == dir_phase(_e) ? _d->vol_in_phase_valid(_e) : _d->vol_out_of_phase_agg_valid(_e))
91 // entry may be valid or overwritten in the last aggregated write
92 #define dir_write_valid(_d, _e) \
93 (_d->header->phase == dir_phase(_e) ? vol_in_phase_valid(_d, _e) : vol_out_of_phase_write_valid(_d, _e))
94 #define dir_agg_buf_valid(_d, _e) (_d->header->phase == dir_phase(_e) && _d->vol_in_phase_agg_buf_valid(_e))
95 #define dir_is_empty(_e) (!dir_offset(_e))
96 #define dir_clear(_e) \
97 do { \
98 (_e)->w[0] = 0; \
99 (_e)->w[1] = 0; \
100 (_e)->w[2] = 0; \
101 (_e)->w[3] = 0; \
102 (_e)->w[4] = 0; \
103 } while (0)
104 #define dir_clean(_e) dir_set_offset(_e, 0)
105
106 // OpenDir
107
108 #define OPEN_DIR_BUCKETS 256
109
110 struct EvacuationBlock;
111 typedef uint32_t DirInfo;
112
113 // Cache Directory
114
115 // INTERNAL: do not access these members directly, use the
116 // accessors below (e.g. dir_offset, dir_set_offset).
117 // These structures are stored in memory 2 byte aligned.
118 // The accessors prevent unaligned memory access which
119 // is often either less efficient or unsupported depending
120 // on the processor.
121 struct Dir {
122 #if DO_NOT_REMOVE_THIS
123 // THE BIT-FIELD INTERPRETATION OF THIS STRUCT WHICH HAS TO
124 // USE MACROS TO PREVENT UNALIGNED LOADS
125 // bits are numbered from lowest in u16 to highest
126 // always index as u16 to avoid byte order issues
127 unsigned int offset : 24; // (0,1:0-7) 16M * 512 = 8GB
128 unsigned int big : 2; // (1:8-9) 512 << (3 * big)
129 unsigned int size : 6; // (1:10-15) 6**2 = 64, 64*512 = 32768 .. 64*256=16MB
130 unsigned int tag : 12; // (2:0-11) 2048 / 8 entries/bucket = .4%
131 unsigned int phase : 1; // (2:12)
132 unsigned int head : 1; // (2:13) first segment in a document
133 unsigned int pinned : 1; // (2:14)
134 unsigned int token : 1; // (2:15)
135 unsigned int next : 16; // (3)
136 unsigned int offset_high : 16; // 8GB * 65k = 0.5PB (4)
137 #else
138 uint16_t w[5];
139 Dir() { dir_clear(this); }
140 #endif
141 };
142
143 // INTERNAL: do not access these members directly, use the
144 // accessors below (e.g. dir_offset, dir_set_offset)
145 struct FreeDir {
146 #if DO_NOT_REMOVE_THIS
147 // THE BIT-FIELD INTERPRETATION OF THIS STRUCT WHICH HAS TO
148 // USE MACROS TO PREVENT UNALIGNED LOADS
149 unsigned int offset : 24; // 0: empty
150 unsigned int reserved : 8;
151 unsigned int prev : 16; // (2)
152 unsigned int next : 16; // (3)
153 unsigned int offset_high : 16; // 0: empty
154 #else
155 uint16_t w[5];
156 FreeDir() { dir_clear(this); }
157 #endif
158 };
159
160 #define dir_offset(_e) \
161 ((int64_t)(((uint64_t)(_e)->w[0]) | (((uint64_t)((_e)->w[1] & 0xFF)) << 16) | (((uint64_t)(_e)->w[4]) << 24)))
162 #define dir_set_offset(_e, _o) \
163 do { \
164 (_e)->w[0] = (uint16_t)_o; \
165 (_e)->w[1] = (uint16_t)((((_o) >> 16) & 0xFF) | ((_e)->w[1] & 0xFF00)); \
166 (_e)->w[4] = (uint16_t)((_o) >> 24); \
167 } while (0)
168 #define dir_bit(_e, _w, _b) ((uint32_t)(((_e)->w[_w] >> (_b)) & 1))
169 #define dir_set_bit(_e, _w, _b, _v) (_e)->w[_w] = (uint16_t)(((_e)->w[_w] & ~(1 << (_b))) | (((_v) ? 1 : 0) << (_b)))
170 #define dir_big(_e) ((uint32_t)((((_e)->w[1]) >> 8) & 0x3))
171 #define dir_set_big(_e, _v) (_e)->w[1] = (uint16_t)(((_e)->w[1] & 0xFCFF) | (((uint16_t)(_v)) & 0x3) << 8)
172 #define dir_size(_e) ((uint32_t)(((_e)->w[1]) >> 10))
173 #define dir_set_size(_e, _v) (_e)->w[1] = (uint16_t)(((_e)->w[1] & ((1 << 10) - 1)) | ((_v) << 10))
174 #define dir_set_approx_size(_e, _s) \
175 do { \
176 if ((_s) <= DIR_SIZE_WITH_BLOCK(0)) { \
177 dir_set_big(_e, 0); \
178 dir_set_size(_e, ((_s)-1) / DIR_BLOCK_SIZE(0)); \
179 } else if ((_s) <= DIR_SIZE_WITH_BLOCK(1)) { \
180 dir_set_big(_e, 1); \
181 dir_set_size(_e, ((_s)-1) / DIR_BLOCK_SIZE(1)); \
182 } else if ((_s) <= DIR_SIZE_WITH_BLOCK(2)) { \
183 dir_set_big(_e, 2); \
184 dir_set_size(_e, ((_s)-1) / DIR_BLOCK_SIZE(2)); \
185 } else { \
186 dir_set_big(_e, 3); \
187 dir_set_size(_e, ((_s)-1) / DIR_BLOCK_SIZE(3)); \
188 } \
189 } while (0)
190 #define dir_approx_size(_e) ((dir_size(_e) + 1) * DIR_BLOCK_SIZE(dir_big(_e)))
191 #define round_to_approx_dir_size(_s) \
192 (_s <= DIR_SIZE_WITH_BLOCK(0) ? \
193 ROUND_TO(_s, DIR_BLOCK_SIZE(0)) : \
194 (_s <= DIR_SIZE_WITH_BLOCK(1) ? \
195 ROUND_TO(_s, DIR_BLOCK_SIZE(1)) : \
196 (_s <= DIR_SIZE_WITH_BLOCK(2) ? ROUND_TO(_s, DIR_BLOCK_SIZE(2)) : ROUND_TO(_s, DIR_BLOCK_SIZE(3)))))
197 #define dir_tag(_e) ((uint32_t)((_e)->w[2] & ((1 << DIR_TAG_WIDTH) - 1)))
198 #define dir_set_tag(_e, _t) \
199 (_e)->w[2] = (uint16_t)(((_e)->w[2] & ~((1 << DIR_TAG_WIDTH) - 1)) | ((_t) & ((1 << DIR_TAG_WIDTH) - 1)))
200 #define dir_phase(_e) dir_bit(_e, 2, 12)
201 #define dir_set_phase(_e, _v) dir_set_bit(_e, 2, 12, _v)
202 #define dir_head(_e) dir_bit(_e, 2, 13)
203 #define dir_set_head(_e, _v) dir_set_bit(_e, 2, 13, _v)
204 #define dir_pinned(_e) dir_bit(_e, 2, 14)
205 #define dir_set_pinned(_e, _v) dir_set_bit(_e, 2, 14, _v)
206 #define dir_token(_e) dir_bit(_e, 2, 15)
207 #define dir_set_token(_e, _v) dir_set_bit(_e, 2, 15, _v)
208 #define dir_next(_e) (_e)->w[3]
209 #define dir_set_next(_e, _o) (_e)->w[3] = (uint16_t)(_o)
210 #define dir_prev(_e) (_e)->w[2]
211 #define dir_set_prev(_e, _o) (_e)->w[2] = (uint16_t)(_o)
212
213 // INKqa11166 - Cache can not store 2 HTTP alternates simultaneously.
214 // To allow this, move the vector from the CacheVC to the OpenDirEntry.
215 // Each CacheVC now maintains a pointer to this vector. Adding/Deleting
216 // alternates from this vector is done under the Vol::lock. The alternate
217 // is deleted/inserted into the vector just before writing the vector disk
218 // (CacheVC::updateVector).
219 LINK_FORWARD_DECLARATION(CacheVC, opendir_link) // forward declaration
220 struct OpenDirEntry {
221 DLL<CacheVC, Link_CacheVC_opendir_link> writers; // list of all the current writers
222 DLL<CacheVC, Link_CacheVC_opendir_link> readers; // list of all the current readers - not used
223 CacheHTTPInfoVector vector; // Vector for the http document. Each writer
224 // maintains a pointer to this vector and
225 // writes it down to disk.
226 CacheKey single_doc_key; // Key for the resident alternate.
227 Dir single_doc_dir; // Directory for the resident alternate
228 Dir first_dir; // Dir for the vector. If empty, a new dir is
229 // inserted, otherwise this dir is overwritten
230 uint16_t num_writers; // num of current writers
231 uint16_t max_writers; // max number of simultaneous writers allowed
232 bool dont_update_directory; // if set, the first_dir is not updated.
233 bool move_resident_alt; // if set, single_doc_dir is inserted.
234 bool reading_vec; // somebody is currently reading the vector
235 bool writing_vec; // somebody is currently writing the vector
236
237 LINK(OpenDirEntry, link);
238
239 int wait(CacheVC *c, int msec);
240
241 bool
has_multiple_writersOpenDirEntry242 has_multiple_writers()
243 {
244 return num_writers > 1;
245 }
246 };
247
248 struct OpenDir : public Continuation {
249 Queue<CacheVC, Link_CacheVC_opendir_link> delayed_readers;
250 DLL<OpenDirEntry> bucket[OPEN_DIR_BUCKETS];
251
252 int open_write(CacheVC *c, int allow_if_writers, int max_writers);
253 int close_write(CacheVC *c);
254 OpenDirEntry *open_read(const CryptoHash *key);
255 int signal_readers(int event, Event *e);
256
257 OpenDir();
258 };
259
260 struct CacheSync : public Continuation {
261 int vol_idx = 0;
262 char *buf = nullptr;
263 size_t buflen = 0;
264 bool buf_huge = false;
265 off_t writepos = 0;
266 AIOCallbackInternal io;
267 Event *trigger = nullptr;
268 ink_hrtime start_time = 0;
269 int mainEvent(int event, Event *e);
270 void aio_write(int fd, char *b, int n, off_t o);
271
CacheSyncCacheSync272 CacheSync() : Continuation(new_ProxyMutex()) { SET_HANDLER(&CacheSync::mainEvent); }
273 };
274
275 // Global Functions
276
277 void vol_init_dir(Vol *d);
278 int dir_token_probe(const CacheKey *, Vol *, Dir *);
279 int dir_probe(const CacheKey *, Vol *, Dir *, Dir **);
280 int dir_insert(const CacheKey *key, Vol *d, Dir *to_part);
281 int dir_overwrite(const CacheKey *key, Vol *d, Dir *to_part, Dir *overwrite, bool must_overwrite = true);
282 int dir_delete(const CacheKey *key, Vol *d, Dir *del);
283 int dir_lookaside_probe(const CacheKey *key, Vol *d, Dir *result, EvacuationBlock **eblock);
284 int dir_lookaside_insert(EvacuationBlock *b, Vol *d, Dir *to);
285 int dir_lookaside_fixup(const CacheKey *key, Vol *d);
286 void dir_lookaside_cleanup(Vol *d);
287 void dir_lookaside_remove(const CacheKey *key, Vol *d);
288 void dir_free_entry(Dir *e, int s, Vol *d);
289 void dir_sync_init();
290 int check_dir(Vol *d);
291 void dir_clean_vol(Vol *d);
292 void dir_clear_range(off_t start, off_t end, Vol *d);
293 int dir_segment_accounted(int s, Vol *d, int offby = 0, int *free = nullptr, int *used = nullptr, int *empty = nullptr,
294 int *valid = nullptr, int *agg_valid = nullptr, int *avg_size = nullptr);
295 uint64_t dir_entries_used(Vol *d);
296 void sync_cache_dir_on_shutdown();
297
298 // Global Data
299
300 extern Dir empty_dir;
301
302 // Inline Functions
303
304 #define dir_in_seg(_s, _i) ((Dir *)(((char *)(_s)) + (SIZEOF_DIR * (_i))))
305
306 TS_INLINE bool
dir_compare_tag(const Dir * e,const CacheKey * key)307 dir_compare_tag(const Dir *e, const CacheKey *key)
308 {
309 return (dir_tag(e) == DIR_MASK_TAG(key->slice32(2)));
310 }
311
312 TS_INLINE Dir *
dir_from_offset(int64_t i,Dir * seg)313 dir_from_offset(int64_t i, Dir *seg)
314 {
315 #if DIR_DEPTH < 5
316 if (!i) {
317 return nullptr;
318 }
319 return dir_in_seg(seg, i);
320 #else
321 i = i + ((i - 1) / (DIR_DEPTH - 1));
322 return dir_in_seg(seg, i);
323 #endif
324 }
325 TS_INLINE Dir *
next_dir(Dir * d,Dir * seg)326 next_dir(Dir *d, Dir *seg)
327 {
328 int i = dir_next(d);
329 return dir_from_offset(i, seg);
330 }
331 TS_INLINE int64_t
dir_to_offset(const Dir * d,const Dir * seg)332 dir_to_offset(const Dir *d, const Dir *seg)
333 {
334 #if DIR_DEPTH < 5
335 return (((char *)d) - ((char *)seg)) / SIZEOF_DIR;
336 #else
337 int64_t i = (int64_t)((((char *)d) - ((char *)seg)) / SIZEOF_DIR);
338 i = i - (i / DIR_DEPTH);
339 return i;
340 #endif
341 }
342 TS_INLINE Dir *
dir_bucket(int64_t b,Dir * seg)343 dir_bucket(int64_t b, Dir *seg)
344 {
345 return dir_in_seg(seg, b * DIR_DEPTH);
346 }
347 TS_INLINE Dir *
dir_bucket_row(Dir * b,int64_t i)348 dir_bucket_row(Dir *b, int64_t i)
349 {
350 return dir_in_seg(b, i);
351 }
352