1 /* $OpenBSD: kern_smr.c,v 1.17 2024/07/08 14:46:47 mpi Exp $ */
2
3 /*
4 * Copyright (c) 2019-2020 Visa Hankala
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 */
18
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/kthread.h>
22 #include <sys/mutex.h>
23 #include <sys/percpu.h>
24 #include <sys/proc.h>
25 #include <sys/smr.h>
26 #include <sys/time.h>
27 #include <sys/tracepoint.h>
28 #include <sys/witness.h>
29
30 #include <machine/cpu.h>
31
32 #define SMR_PAUSE 100 /* pause between rounds in msec */
33
34 void smr_dispatch(struct schedstate_percpu *);
35 void smr_grace_wait(void);
36 void smr_thread(void *);
37 void smr_wakeup(void *);
38
39 struct mutex smr_lock = MUTEX_INITIALIZER(IPL_HIGH);
40 struct smr_entry_list smr_deferred;
41 struct timeout smr_wakeup_tmo;
42 unsigned int smr_expedite;
43 unsigned int smr_ndeferred;
44 unsigned char smr_grace_period;
45
46 #ifdef WITNESS
47 static const char smr_lock_name[] = "smr";
48 struct lock_object smr_lock_obj = {
49 .lo_name = smr_lock_name,
50 .lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE |
51 (LO_CLASS_RWLOCK << LO_CLASSSHIFT)
52 };
53 struct lock_type smr_lock_type = {
54 .lt_name = smr_lock_name
55 };
56 #endif
57
58 static inline int
smr_cpu_is_idle(struct cpu_info * ci)59 smr_cpu_is_idle(struct cpu_info *ci)
60 {
61 return ci->ci_curproc == ci->ci_schedstate.spc_idleproc;
62 }
63
64 void
smr_startup(void)65 smr_startup(void)
66 {
67 SIMPLEQ_INIT(&smr_deferred);
68 WITNESS_INIT(&smr_lock_obj, &smr_lock_type);
69 timeout_set(&smr_wakeup_tmo, smr_wakeup, NULL);
70 }
71
72 void
smr_startup_thread(void)73 smr_startup_thread(void)
74 {
75 if (kthread_create(smr_thread, NULL, NULL, "smr") != 0)
76 panic("could not create smr thread");
77 }
78
79 struct timeval smr_logintvl = { 300, 0 };
80
81 void
smr_thread(void * arg)82 smr_thread(void *arg)
83 {
84 struct timeval elapsed, end, loglast, start;
85 struct smr_entry_list deferred;
86 struct smr_entry *smr;
87 unsigned long count;
88
89 KERNEL_ASSERT_LOCKED();
90 KERNEL_UNLOCK();
91
92 memset(&loglast, 0, sizeof(loglast));
93 SIMPLEQ_INIT(&deferred);
94
95 for (;;) {
96 mtx_enter(&smr_lock);
97 if (smr_ndeferred == 0) {
98 while (smr_ndeferred == 0)
99 msleep_nsec(&smr_ndeferred, &smr_lock, PVM,
100 "bored", INFSLP);
101 } else {
102 if (smr_expedite == 0)
103 msleep_nsec(&smr_ndeferred, &smr_lock, PVM,
104 "pause", MSEC_TO_NSEC(SMR_PAUSE));
105 }
106
107 SIMPLEQ_CONCAT(&deferred, &smr_deferred);
108 smr_ndeferred = 0;
109 smr_expedite = 0;
110 mtx_leave(&smr_lock);
111
112 getmicrouptime(&start);
113
114 smr_grace_wait();
115
116 WITNESS_CHECKORDER(&smr_lock_obj, LOP_NEWORDER, NULL);
117 WITNESS_LOCK(&smr_lock_obj, 0);
118
119 count = 0;
120 while ((smr = SIMPLEQ_FIRST(&deferred)) != NULL) {
121 SIMPLEQ_REMOVE_HEAD(&deferred, smr_list);
122 TRACEPOINT(smr, called, smr->smr_func, smr->smr_arg);
123 smr->smr_func(smr->smr_arg);
124 count++;
125 }
126
127 WITNESS_UNLOCK(&smr_lock_obj, 0);
128
129 getmicrouptime(&end);
130 timersub(&end, &start, &elapsed);
131 if (elapsed.tv_sec >= 2 &&
132 ratecheck(&loglast, &smr_logintvl)) {
133 printf("smr: dispatch took %ld.%06lds\n",
134 (long)elapsed.tv_sec,
135 (long)elapsed.tv_usec);
136 }
137 TRACEPOINT(smr, thread, TIMEVAL_TO_NSEC(&elapsed), count);
138 }
139 }
140
141 /*
142 * Announce next grace period and wait until all CPUs have entered it
143 * by crossing quiescent state.
144 */
145 void
smr_grace_wait(void)146 smr_grace_wait(void)
147 {
148 #ifdef MULTIPROCESSOR
149 CPU_INFO_ITERATOR cii;
150 struct cpu_info *ci;
151 unsigned char smrgp;
152
153 smrgp = READ_ONCE(smr_grace_period) + 1;
154 WRITE_ONCE(smr_grace_period, smrgp);
155
156 curcpu()->ci_schedstate.spc_smrgp = smrgp;
157
158 CPU_INFO_FOREACH(cii, ci) {
159 if (!CPU_IS_RUNNING(ci))
160 continue;
161 if (READ_ONCE(ci->ci_schedstate.spc_smrgp) == smrgp)
162 continue;
163 sched_peg_curproc(ci);
164 KASSERT(ci->ci_schedstate.spc_smrgp == smrgp);
165 }
166 sched_unpeg_curproc();
167 #endif /* MULTIPROCESSOR */
168 }
169
170 void
smr_wakeup(void * arg)171 smr_wakeup(void *arg)
172 {
173 TRACEPOINT(smr, wakeup, NULL);
174 wakeup(&smr_ndeferred);
175 }
176
177 void
smr_read_enter(void)178 smr_read_enter(void)
179 {
180 #ifdef DIAGNOSTIC
181 struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
182
183 spc->spc_smrdepth++;
184 #endif
185 }
186
187 void
smr_read_leave(void)188 smr_read_leave(void)
189 {
190 #ifdef DIAGNOSTIC
191 struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
192
193 KASSERT(spc->spc_smrdepth > 0);
194 spc->spc_smrdepth--;
195 #endif
196 }
197
198 /*
199 * Move SMR entries from the local queue to the system-wide queue.
200 */
201 void
smr_dispatch(struct schedstate_percpu * spc)202 smr_dispatch(struct schedstate_percpu *spc)
203 {
204 int expedite = 0, wake = 0;
205
206 mtx_enter(&smr_lock);
207 if (smr_ndeferred == 0)
208 wake = 1;
209 SIMPLEQ_CONCAT(&smr_deferred, &spc->spc_deferred);
210 smr_ndeferred += spc->spc_ndeferred;
211 spc->spc_ndeferred = 0;
212 smr_expedite |= spc->spc_smrexpedite;
213 spc->spc_smrexpedite = 0;
214 expedite = smr_expedite;
215 mtx_leave(&smr_lock);
216
217 if (expedite)
218 smr_wakeup(NULL);
219 else if (wake)
220 timeout_add_msec(&smr_wakeup_tmo, SMR_PAUSE);
221 }
222
223 /*
224 * Signal that the current CPU is in quiescent state.
225 */
226 void
smr_idle(void)227 smr_idle(void)
228 {
229 struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
230 unsigned char smrgp;
231
232 SMR_ASSERT_NONCRITICAL();
233
234 if (spc->spc_ndeferred > 0)
235 smr_dispatch(spc);
236
237 /*
238 * Update this CPU's view of the system's grace period.
239 * The update must become visible after any preceding reads
240 * of SMR-protected data.
241 */
242 smrgp = READ_ONCE(smr_grace_period);
243 if (__predict_false(spc->spc_smrgp != smrgp)) {
244 membar_exit();
245 WRITE_ONCE(spc->spc_smrgp, smrgp);
246 }
247 }
248
249 void
smr_call_impl(struct smr_entry * smr,void (* func)(void *),void * arg,int expedite)250 smr_call_impl(struct smr_entry *smr, void (*func)(void *), void *arg,
251 int expedite)
252 {
253 struct cpu_info *ci = curcpu();
254 struct schedstate_percpu *spc = &ci->ci_schedstate;
255 int s;
256
257 KASSERT(smr->smr_func == NULL);
258
259 smr->smr_func = func;
260 smr->smr_arg = arg;
261
262 s = splhigh();
263 SIMPLEQ_INSERT_TAIL(&spc->spc_deferred, smr, smr_list);
264 spc->spc_ndeferred++;
265 spc->spc_smrexpedite |= expedite;
266 splx(s);
267 TRACEPOINT(smr, call, func, arg, expedite);
268
269 /*
270 * If this call was made from an interrupt context that
271 * preempted idle state, dispatch the local queue to the shared
272 * queue immediately.
273 * The entries would linger in the local queue long if the CPU
274 * went to sleep without calling smr_idle().
275 */
276 if (smr_cpu_is_idle(ci))
277 smr_dispatch(spc);
278 }
279
280 void
smr_barrier_func(void * arg)281 smr_barrier_func(void *arg)
282 {
283 struct cond *c = arg;
284
285 cond_signal(c);
286 }
287
288 void
smr_barrier_impl(int expedite)289 smr_barrier_impl(int expedite)
290 {
291 struct cond c = COND_INITIALIZER();
292 struct smr_entry smr;
293
294 if (panicstr != NULL || db_active)
295 return;
296
297 WITNESS_CHECKORDER(&smr_lock_obj, LOP_NEWORDER, NULL);
298
299 TRACEPOINT(smr, barrier_enter, expedite);
300 smr_init(&smr);
301 smr_call_impl(&smr, smr_barrier_func, &c, expedite);
302 cond_wait(&c, "smrbar");
303 TRACEPOINT(smr, barrier_exit, expedite);
304 }
305