1 /*
2  * Copyright (c) 2017 Fastly, Inc.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a copy
5  * of this software and associated documentation files (the "Software"), to
6  * deal in the Software without restriction, including without limitation the
7  * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
8  * sell copies of the Software, and to permit persons to whom the Software is
9  * furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice shall be included in
12  * all copies or substantial portions of the Software.
13  *
14  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
19  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
20  * IN THE SOFTWARE.
21  */
22 #include <string.h>
23 #include <stdio.h>
24 #include <inttypes.h>
25 #include "h2o/memory.h"
26 #include "h2o/timerwheel.h"
27 
28 #define H2O_TIMERWHEEL_SLOTS_MASK (H2O_TIMERWHEEL_SLOTS_PER_WHEEL - 1)
29 
30 #ifndef H2O_TIMER_VALIDATE
31 #define H2O_TIMER_VALIDATE 0
32 #endif
33 
34 #define REPORT_CORRUPT_TIMER(ctx, e, fmt, ...)                                                                                     \
35     do {                                                                                                                           \
36         h2o_timerwheel_entry_t *_e = (e);                                                                                          \
37         h2o_error_printf("%s:%d:last_run=%" PRIu64 fmt ", timer(%p)={expire_at=%" PRIu64 ", cb=%p}\n", __FUNCTION__, __LINE__,     \
38                          (ctx)->last_run, __VA_ARGS__, _e, _e->expire_at, _e->cb);                                                 \
39     } while (0)
40 
41 #define ABORT_CORRUPT_TIMER(ctx, t, fmt, ...)                                                                                      \
42     do {                                                                                                                           \
43         REPORT_CORRUPT_TIMER(ctx, t, fmt, __VA_ARGS__);                                                                            \
44         h2o_fatal("timerwheel");                                                                                                   \
45     } while (0)
46 
47 struct st_h2o_timerwheel_t {
48     /**
49      * the last time h2o_timer_run_wheel was called
50      */
51     uint64_t last_run;
52     /**
53      * maximum ticks that can be retained safely in the structure. Objects that need to be retained longer will be re-registered at
54      * the highest wheel.
55      */
56     uint64_t max_ticks;
57     /**
58      * number of wheels and the wheel
59      */
60     size_t num_wheels;
61     h2o_linklist_t wheels[1][H2O_TIMERWHEEL_SLOTS_PER_WHEEL];
62 };
63 
h2o_timerwheel_dump(h2o_timerwheel_t * ctx)64 void h2o_timerwheel_dump(h2o_timerwheel_t *ctx)
65 {
66     size_t wheel, slot;
67 
68     h2o_error_printf("%s(%p):\n", __FUNCTION__, ctx);
69     for (wheel = 0; wheel < ctx->num_wheels; wheel++) {
70         for (slot = 0; slot < H2O_TIMERWHEEL_SLOTS_PER_WHEEL; slot++) {
71             h2o_linklist_t *anchor = &ctx->wheels[wheel][slot], *l;
72             for (l = anchor->next; l != anchor; l = l->next) {
73                 h2o_timerwheel_entry_t *e = H2O_STRUCT_FROM_MEMBER(h2o_timerwheel_entry_t, _link, l);
74                 h2o_error_printf("  - {wheel: %zu, slot: %zu, expires:%" PRIu64 ", self: %p, cb:%p}\n", wheel, slot, e->expire_at,
75                                  e, e->cb);
76             }
77         }
78     }
79 }
80 
timer_wheel(size_t num_wheels,uint64_t delta)81 static size_t timer_wheel(size_t num_wheels, uint64_t delta)
82 {
83     H2O_BUILD_ASSERT(sizeof(unsigned long long) == 8);
84 
85     if (delta == 0)
86         return 0;
87     return (63 - __builtin_clzll(delta)) / H2O_TIMERWHEEL_BITS_PER_WHEEL;
88 }
89 
90 /* calculate slot number based on the absolute expiration time */
timer_slot(size_t wheel,uint64_t expire)91 static size_t timer_slot(size_t wheel, uint64_t expire)
92 {
93     return H2O_TIMERWHEEL_SLOTS_MASK & (expire >> (wheel * H2O_TIMERWHEEL_BITS_PER_WHEEL));
94 }
95 
96 /**
97  * returned at_max is inclusive
98  */
calc_expire_for_slot(size_t num_wheels,uint64_t last_run,size_t wheel,size_t slot,uint64_t * at_min,uint64_t * at_max)99 static void calc_expire_for_slot(size_t num_wheels, uint64_t last_run, size_t wheel, size_t slot, uint64_t *at_min,
100                                  uint64_t *at_max)
101 {
102 #define SPAN(i) ((uint64_t)1 << (H2O_TIMERWHEEL_BITS_PER_WHEEL * (i))) /* returns the span of time for given wheel index */
103 
104     int adj_at_min = 0;
105 
106     *at_min = (last_run & ~(SPAN(wheel + 1) - 1)) + slot * SPAN(wheel);
107 
108     if (wheel == 0) {
109         if (*at_min < last_run)
110             adj_at_min = 1;
111     } else {
112         if (*at_min <= last_run)
113             adj_at_min = 1;
114     }
115     if (adj_at_min)
116         *at_min += SPAN(wheel + 1);
117 
118     if (wheel == num_wheels - 1) {
119         *at_max = UINT64_MAX;
120     } else {
121         *at_max = *at_min + SPAN(wheel) - 1;
122     }
123 
124 #undef SPAN
125 }
126 
validate_slot(h2o_timerwheel_t * ctx,size_t wheel,size_t slot)127 static int validate_slot(h2o_timerwheel_t *ctx, size_t wheel, size_t slot)
128 {
129     h2o_linklist_t *anchor = &ctx->wheels[wheel][slot], *link;
130     uint64_t at_min, at_max;
131     int success = 1;
132 
133     calc_expire_for_slot(ctx->num_wheels, ctx->last_run, wheel, slot, &at_min, &at_max);
134 
135     for (link = anchor->next; link != anchor; link = link->next) {
136         h2o_timerwheel_entry_t *e = H2O_STRUCT_FROM_MEMBER(h2o_timerwheel_entry_t, _link, link);
137         if (!(at_min <= e->expire_at && e->expire_at <= at_max)) {
138             REPORT_CORRUPT_TIMER(ctx, e, ", wheel=%zu, slot=%zu, expected_range=[%" PRIu64 ",%" PRIu64 "]", wheel, slot, at_min,
139                                  at_max);
140             success = 0;
141         }
142     }
143 
144     return success;
145 }
146 
h2o_timerwheel_validate(h2o_timerwheel_t * ctx)147 int h2o_timerwheel_validate(h2o_timerwheel_t *ctx)
148 {
149     size_t wheel, slot;
150     int success = 1;
151 
152     for (wheel = 0; wheel < ctx->num_wheels; ++wheel)
153         for (slot = 0; slot < H2O_TIMERWHEEL_SLOTS_PER_WHEEL; ++slot)
154             if (!validate_slot(ctx, wheel, slot))
155                 success = 0;
156 
157     return success;
158 }
159 
h2o_timerwheel_get_wake_at(h2o_timerwheel_t * ctx)160 uint64_t h2o_timerwheel_get_wake_at(h2o_timerwheel_t *ctx)
161 {
162     size_t wheel, slot;
163     uint64_t at = ctx->last_run;
164 
165     for (wheel = 0; wheel < ctx->num_wheels; ++wheel) {
166         uint64_t at_incr = (uint64_t)1 << (wheel * H2O_TIMERWHEEL_BITS_PER_WHEEL);
167         size_t slot_base = timer_slot(wheel, at);
168         /* check current wheel from slot_base */
169         for (slot = slot_base; slot < H2O_TIMERWHEEL_SLOTS_PER_WHEEL; ++slot) {
170             if (!h2o_linklist_is_empty(&ctx->wheels[wheel][slot]))
171                 goto Found;
172             at += at_incr;
173         }
174         while (1) {
175             /* handle carry */
176             if (wheel + 1 < ctx->num_wheels) {
177                 size_t wi;
178                 for (wi = wheel + 1; wi < ctx->num_wheels; ++wi) {
179                     size_t si = timer_slot(wi, at);
180                     if (!h2o_linklist_is_empty(&ctx->wheels[wi][si]))
181                         goto Found;
182                     if (si != 0)
183                         break;
184                 }
185             }
186             /* check current wheel from 0 to slot_base */
187             if (slot_base == 0)
188                 break;
189             for (slot = 0; slot < slot_base; ++slot) {
190                 if (!h2o_linklist_is_empty(&ctx->wheels[wheel][slot]))
191                     goto Found;
192                 at += at_incr;
193             }
194             at += at_incr * (H2O_TIMERWHEEL_SLOTS_PER_WHEEL - slot_base);
195             slot_base = 0;
196         }
197     }
198 
199     /* not found */
200     return UINT64_MAX;
201 Found:
202     return at;
203 }
204 
link_timer(h2o_timerwheel_t * ctx,h2o_timerwheel_entry_t * entry)205 static void link_timer(h2o_timerwheel_t *ctx, h2o_timerwheel_entry_t *entry)
206 {
207     size_t wheel, slot;
208     uint64_t wheel_abs = entry->expire_at;
209 
210     if (wheel_abs > ctx->last_run + ctx->max_ticks)
211         wheel_abs = ctx->last_run + ctx->max_ticks;
212 
213     wheel = timer_wheel(ctx->num_wheels, wheel_abs - ctx->last_run);
214     slot = timer_slot(wheel, wheel_abs);
215 
216     if (H2O_TIMER_VALIDATE) {
217         uint64_t at_min, at_max;
218         calc_expire_for_slot(ctx->num_wheels, ctx->last_run, wheel, slot, &at_min, &at_max);
219         if (!(at_min <= entry->expire_at && entry->expire_at <= at_max))
220             ABORT_CORRUPT_TIMER(ctx, entry, ", wheel=%zu, slot=%zu, at_min=%" PRIu64 ", at_max=%" PRIu64, wheel, slot, at_min,
221                                 at_max);
222     }
223 
224     h2o_linklist_insert(&ctx->wheels[wheel][slot], &entry->_link);
225 }
226 
227 /* timer wheel APIs */
228 
229 /**
230  * initializes a timerwheel
231  */
h2o_timerwheel_create(size_t num_wheels,uint64_t now)232 h2o_timerwheel_t *h2o_timerwheel_create(size_t num_wheels, uint64_t now)
233 {
234     h2o_timerwheel_t *ctx = h2o_mem_alloc(offsetof(h2o_timerwheel_t, wheels) + sizeof(ctx->wheels[0]) * num_wheels);
235     size_t i, j;
236 
237     ctx->last_run = now;
238     /* max_ticks cannot be so large that the entry will be linked once more to the same slot, see the assert in `cascade` */
239     ctx->max_ticks = ((uint64_t)1 << (H2O_TIMERWHEEL_BITS_PER_WHEEL * (num_wheels - 1))) * (H2O_TIMERWHEEL_SLOTS_PER_WHEEL - 1);
240     ctx->num_wheels = num_wheels;
241     for (i = 0; i < ctx->num_wheels; i++)
242         for (j = 0; j < H2O_TIMERWHEEL_SLOTS_PER_WHEEL; j++)
243             h2o_linklist_init_anchor(&ctx->wheels[i][j]);
244 
245     return ctx;
246 }
247 
h2o_timerwheel_destroy(h2o_timerwheel_t * ctx)248 void h2o_timerwheel_destroy(h2o_timerwheel_t *ctx)
249 {
250     size_t i, j;
251 
252     for (i = 0; i < ctx->num_wheels; ++i) {
253         for (j = 0; j < H2O_TIMERWHEEL_SLOTS_PER_WHEEL; ++j) {
254             while (!h2o_linklist_is_empty(&ctx->wheels[i][j])) {
255                 h2o_timerwheel_entry_t *entry = H2O_STRUCT_FROM_MEMBER(h2o_timerwheel_entry_t, _link, ctx->wheels[i][j].next);
256                 h2o_timerwheel_unlink(entry);
257             }
258         }
259     }
260 
261     free(ctx);
262 }
263 
264 /**
265  * cascading happens when the lower wheel wraps around and ticks the next
266  * higher wheel
267  */
cascade_one(h2o_timerwheel_t * ctx,size_t wheel,size_t slot)268 static void cascade_one(h2o_timerwheel_t *ctx, size_t wheel, size_t slot)
269 {
270     assert(wheel > 0);
271 
272     h2o_linklist_t *s = &ctx->wheels[wheel][slot];
273 
274     while (!h2o_linklist_is_empty(s)) {
275         h2o_timerwheel_entry_t *entry = H2O_STRUCT_FROM_MEMBER(h2o_timerwheel_entry_t, _link, s->next);
276         if (entry->expire_at < ctx->last_run)
277             ABORT_CORRUPT_TIMER(ctx, entry, ", wheel=%zu, slot=%zu", wheel, slot);
278         h2o_linklist_unlink(&entry->_link);
279         link_timer(ctx, entry);
280         assert(&entry->_link != s->prev); /* detect the entry reassigned to the same slot */
281     }
282 }
283 
cascade_all(h2o_timerwheel_t * ctx,size_t wheel)284 static int cascade_all(h2o_timerwheel_t *ctx, size_t wheel)
285 {
286     int cascaded = 0;
287 
288     for (; wheel < ctx->num_wheels; ++wheel) {
289         size_t slot = timer_slot(wheel, ctx->last_run);
290         if (!h2o_linklist_is_empty(&ctx->wheels[wheel][slot]))
291             cascaded = 1;
292         cascade_one(ctx, wheel, slot);
293         if (slot != 0)
294             break;
295     }
296 
297     return cascaded;
298 }
299 
h2o_timerwheel_get_expired(h2o_timerwheel_t * ctx,uint64_t now,h2o_linklist_t * expired)300 void h2o_timerwheel_get_expired(h2o_timerwheel_t *ctx, uint64_t now, h2o_linklist_t *expired)
301 {
302     size_t wheel = 0, slot, slot_start;
303 
304     /* time might rewind if the clock is reset */
305     if (now < ctx->last_run) {
306         h2o_error_printf("%s:detected rewind; last_run=%" PRIu64 ", now=%" PRIu64 "\n", __FUNCTION__, ctx->last_run, now);
307         return;
308     }
309 
310 Redo:
311     /* collect from the first slot */
312     slot_start = timer_slot(wheel, ctx->last_run);
313     for (slot = slot_start; slot < H2O_TIMERWHEEL_SLOTS_PER_WHEEL; ++slot) {
314         if (wheel == 0) {
315             h2o_linklist_insert_list(expired, &ctx->wheels[wheel][slot]);
316             if (ctx->last_run == now)
317                 goto Exit;
318             ++ctx->last_run;
319         } else {
320             if (!h2o_linklist_is_empty(&ctx->wheels[wheel][slot])) {
321                 cascade_one(ctx, wheel, slot);
322                 assert(h2o_linklist_is_empty(&ctx->wheels[wheel][slot]));
323                 wheel = 0;
324                 goto Redo;
325             }
326             ctx->last_run += 1 << (wheel * H2O_TIMERWHEEL_BITS_PER_WHEEL);
327             if (ctx->last_run > now) {
328                 ctx->last_run = now;
329                 goto Exit;
330             }
331         }
332     }
333     /* carry */
334     if (cascade_all(ctx, wheel != 0 ? wheel : 1)) {
335         wheel = 0;
336         goto Redo;
337     }
338     if (slot_start != 0 || ++wheel < ctx->num_wheels)
339         goto Redo;
340     /* all the wheels were empty, and they all belonged to the past */
341     if (ctx->last_run < now)
342         ctx->last_run = now;
343 
344 Exit:
345     assert(ctx->last_run == now);
346 }
347 
h2o_timerwheel_run(h2o_timerwheel_t * ctx,uint64_t now)348 size_t h2o_timerwheel_run(h2o_timerwheel_t *ctx, uint64_t now)
349 {
350     h2o_linklist_t expired;
351     size_t count = 0;
352 
353     h2o_linklist_init_anchor(&expired);
354     h2o_timerwheel_get_expired(ctx, now, &expired);
355     while (!h2o_linklist_is_empty(&expired)) {
356         h2o_timerwheel_entry_t *entry = H2O_STRUCT_FROM_MEMBER(h2o_timerwheel_entry_t, _link, expired.next);
357         h2o_linklist_unlink(&entry->_link);
358         entry->cb(entry);
359         ++count;
360     }
361 
362     return count;
363 }
364 
h2o_timerwheel_link_abs(h2o_timerwheel_t * ctx,h2o_timerwheel_entry_t * entry,uint64_t at)365 void h2o_timerwheel_link_abs(h2o_timerwheel_t *ctx, h2o_timerwheel_entry_t *entry, uint64_t at)
366 {
367     entry->expire_at = at < ctx->last_run ? ctx->last_run : at;
368     link_timer(ctx, entry);
369 }
370