1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2019, 2020 Jeffrey Roberson <jeff@FreeBSD.org> 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 1. Redistributions of source code must retain the above copyright 10 * notice unmodified, this list of conditions, and the following 11 * disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 21 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 22 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 23 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 24 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 25 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 * 27 * $FreeBSD$ 28 * 29 */ 30 31 #ifndef _SYS_SMR_H_ 32 #define _SYS_SMR_H_ 33 34 #include <sys/_smr.h> 35 36 /* 37 * Safe memory reclamation. See subr_smr.c for a description of the 38 * algorithm, and smr_types.h for macros to define and access SMR-protected 39 * data structures. 40 * 41 * Readers synchronize with smr_enter()/exit() and writers may either 42 * free directly to a SMR UMA zone or use smr_synchronize or wait. 43 */ 44 45 /* 46 * Modular arithmetic for comparing sequence numbers that have 47 * potentially wrapped. Copied from tcp_seq.h. 48 */ 49 #define SMR_SEQ_LT(a, b) ((smr_delta_t)((a)-(b)) < 0) 50 #define SMR_SEQ_LEQ(a, b) ((smr_delta_t)((a)-(b)) <= 0) 51 #define SMR_SEQ_GT(a, b) ((smr_delta_t)((a)-(b)) > 0) 52 #define SMR_SEQ_GEQ(a, b) ((smr_delta_t)((a)-(b)) >= 0) 53 #define SMR_SEQ_DELTA(a, b) ((smr_delta_t)((a)-(b))) 54 #define SMR_SEQ_MIN(a, b) (SMR_SEQ_LT((a), (b)) ? (a) : (b)) 55 #define SMR_SEQ_MAX(a, b) (SMR_SEQ_GT((a), (b)) ? (a) : (b)) 56 57 #define SMR_SEQ_INVALID 0 58 59 /* Shared SMR state. */ 60 union s_wr { 61 struct { 62 smr_seq_t seq; /* Current write sequence #. */ 63 int ticks; /* tick of last update (LAZY) */ 64 }; 65 uint64_t _pair; 66 }; 67 struct smr_shared { 68 const char *s_name; /* Name for debugging/reporting. */ 69 union s_wr s_wr; /* Write sequence */ 70 smr_seq_t s_rd_seq; /* Minimum observed read sequence. */ 71 }; 72 typedef struct smr_shared *smr_shared_t; 73 74 /* Per-cpu SMR state. */ 75 struct smr { 76 smr_seq_t c_seq; /* Current observed sequence. */ 77 smr_shared_t c_shared; /* Shared SMR state. */ 78 int c_deferred; /* Deferred advance counter. */ 79 int c_limit; /* Deferred advance limit. */ 80 int c_flags; /* SMR Configuration */ 81 }; 82 83 #define SMR_LAZY 0x0001 /* Higher latency write, fast read. */ 84 #define SMR_DEFERRED 0x0002 /* Aggregate updates to wr_seq. */ 85 86 /* 87 * Return the current write sequence number. This is not the same as the 88 * current goal which may be in the future. 89 */ 90 static inline smr_seq_t 91 smr_shared_current(smr_shared_t s) 92 { 93 94 return (atomic_load_int(&s->s_wr.seq)); 95 } 96 97 static inline smr_seq_t 98 smr_current(smr_t smr) 99 { 100 101 return (smr_shared_current(zpcpu_get(smr)->c_shared)); 102 } 103 104 /* 105 * Enter a read section. 106 */ 107 static inline void 108 smr_enter(smr_t smr) 109 { 110 111 critical_enter(); 112 smr = zpcpu_get(smr); 113 KASSERT((smr->c_flags & SMR_LAZY) == 0, 114 ("smr_enter(%s) lazy smr.", smr->c_shared->s_name)); 115 KASSERT(smr->c_seq == 0, 116 ("smr_enter(%s) does not support recursion.", 117 smr->c_shared->s_name)); 118 119 /* 120 * Store the current observed write sequence number in our 121 * per-cpu state so that it can be queried via smr_poll(). 122 * Frees that are newer than this stored value will be 123 * deferred until we call smr_exit(). 124 * 125 * Subsequent loads must not be re-ordered with the store. On 126 * x86 platforms, any locked instruction will provide this 127 * guarantee, so as an optimization we use a single operation to 128 * both store the cached write sequence number and provide the 129 * requisite barrier, taking advantage of the fact that 130 * SMR_SEQ_INVALID is zero. 131 * 132 * It is possible that a long delay between loading the wr_seq 133 * and storing the c_seq could create a situation where the 134 * rd_seq advances beyond our stored c_seq. In this situation 135 * only the observed wr_seq is stale, the fence still orders 136 * the load. See smr_poll() for details on how this condition 137 * is detected and handled there. 138 */ 139 #if defined(__amd64__) || defined(__i386__) 140 atomic_add_acq_int(&smr->c_seq, smr_shared_current(smr->c_shared)); 141 #else 142 atomic_store_int(&smr->c_seq, smr_shared_current(smr->c_shared)); 143 atomic_thread_fence_seq_cst(); 144 #endif 145 } 146 147 /* 148 * Exit a read section. 149 */ 150 static inline void 151 smr_exit(smr_t smr) 152 { 153 154 smr = zpcpu_get(smr); 155 CRITICAL_ASSERT(curthread); 156 KASSERT((smr->c_flags & SMR_LAZY) == 0, 157 ("smr_exit(%s) lazy smr.", smr->c_shared->s_name)); 158 KASSERT(smr->c_seq != SMR_SEQ_INVALID, 159 ("smr_exit(%s) not in a smr section.", smr->c_shared->s_name)); 160 161 /* 162 * Clear the recorded sequence number. This allows poll() to 163 * detect CPUs not in read sections. 164 * 165 * Use release semantics to retire any stores before the sequence 166 * number is cleared. 167 */ 168 atomic_store_rel_int(&smr->c_seq, SMR_SEQ_INVALID); 169 critical_exit(); 170 } 171 172 /* 173 * Enter a lazy smr section. This is used for read-mostly state that 174 * can tolerate a high free latency. 175 */ 176 static inline void 177 smr_lazy_enter(smr_t smr) 178 { 179 180 critical_enter(); 181 smr = zpcpu_get(smr); 182 KASSERT((smr->c_flags & SMR_LAZY) != 0, 183 ("smr_lazy_enter(%s) non-lazy smr.", smr->c_shared->s_name)); 184 KASSERT(smr->c_seq == 0, 185 ("smr_lazy_enter(%s) does not support recursion.", 186 smr->c_shared->s_name)); 187 188 /* 189 * This needs no serialization. If an interrupt occurs before we 190 * assign sr_seq to c_seq any speculative loads will be discarded. 191 * If we assign a stale wr_seq value due to interrupt we use the 192 * same algorithm that renders smr_enter() safe. 193 */ 194 atomic_store_int(&smr->c_seq, smr_shared_current(smr->c_shared)); 195 } 196 197 /* 198 * Exit a lazy smr section. This is used for read-mostly state that 199 * can tolerate a high free latency. 200 */ 201 static inline void 202 smr_lazy_exit(smr_t smr) 203 { 204 205 smr = zpcpu_get(smr); 206 CRITICAL_ASSERT(curthread); 207 KASSERT((smr->c_flags & SMR_LAZY) != 0, 208 ("smr_lazy_enter(%s) non-lazy smr.", smr->c_shared->s_name)); 209 KASSERT(smr->c_seq != SMR_SEQ_INVALID, 210 ("smr_lazy_exit(%s) not in a smr section.", smr->c_shared->s_name)); 211 212 /* 213 * All loads/stores must be retired before the sequence becomes 214 * visible. The fence compiles away on amd64. Another 215 * alternative would be to omit the fence but store the exit 216 * time and wait 1 tick longer. 217 */ 218 atomic_thread_fence_rel(); 219 atomic_store_int(&smr->c_seq, SMR_SEQ_INVALID); 220 critical_exit(); 221 } 222 223 /* 224 * Advances the write sequence number. Returns the sequence number 225 * required to ensure that all modifications are visible to readers. 226 */ 227 smr_seq_t smr_advance(smr_t smr); 228 229 /* 230 * Returns true if a goal sequence has been reached. If 231 * wait is true this will busy loop until success. 232 */ 233 bool smr_poll(smr_t smr, smr_seq_t goal, bool wait); 234 235 /* Create a new SMR context. */ 236 smr_t smr_create(const char *name, int limit, int flags); 237 238 /* Destroy the context. */ 239 void smr_destroy(smr_t smr); 240 241 /* 242 * Blocking wait for all readers to observe 'goal'. 243 */ 244 static inline bool 245 smr_wait(smr_t smr, smr_seq_t goal) 246 { 247 248 return (smr_poll(smr, goal, true)); 249 } 250 251 /* 252 * Synchronize advances the write sequence and returns when all 253 * readers have observed it. 254 * 255 * If your application can cache a sequence number returned from 256 * smr_advance() and poll or wait at a later time there will 257 * be less chance of busy looping while waiting for readers. 258 */ 259 static inline void 260 smr_synchronize(smr_t smr) 261 { 262 263 smr_wait(smr, smr_advance(smr)); 264 } 265 266 /* Only at startup. */ 267 void smr_init(void); 268 269 #endif /* _SYS_SMR_H_ */ 270