xref: /openbsd/sys/kern/kern_smr.c (revision cf31dfde)
1 /*	$OpenBSD: kern_smr.c,v 1.17 2024/07/08 14:46:47 mpi Exp $	*/
2 
3 /*
4  * Copyright (c) 2019-2020 Visa Hankala
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>
20 #include <sys/systm.h>
21 #include <sys/kthread.h>
22 #include <sys/mutex.h>
23 #include <sys/percpu.h>
24 #include <sys/proc.h>
25 #include <sys/smr.h>
26 #include <sys/time.h>
27 #include <sys/tracepoint.h>
28 #include <sys/witness.h>
29 
30 #include <machine/cpu.h>
31 
32 #define SMR_PAUSE	100		/* pause between rounds in msec */
33 
34 void	smr_dispatch(struct schedstate_percpu *);
35 void	smr_grace_wait(void);
36 void	smr_thread(void *);
37 void	smr_wakeup(void *);
38 
39 struct mutex		smr_lock = MUTEX_INITIALIZER(IPL_HIGH);
40 struct smr_entry_list	smr_deferred;
41 struct timeout		smr_wakeup_tmo;
42 unsigned int		smr_expedite;
43 unsigned int		smr_ndeferred;
44 unsigned char		smr_grace_period;
45 
46 #ifdef WITNESS
47 static const char smr_lock_name[] = "smr";
48 struct lock_object smr_lock_obj = {
49 	.lo_name = smr_lock_name,
50 	.lo_flags = LO_WITNESS | LO_INITIALIZED | LO_SLEEPABLE |
51 	    (LO_CLASS_RWLOCK << LO_CLASSSHIFT)
52 };
53 struct lock_type smr_lock_type = {
54 	.lt_name = smr_lock_name
55 };
56 #endif
57 
58 static inline int
smr_cpu_is_idle(struct cpu_info * ci)59 smr_cpu_is_idle(struct cpu_info *ci)
60 {
61 	return ci->ci_curproc == ci->ci_schedstate.spc_idleproc;
62 }
63 
64 void
smr_startup(void)65 smr_startup(void)
66 {
67 	SIMPLEQ_INIT(&smr_deferred);
68 	WITNESS_INIT(&smr_lock_obj, &smr_lock_type);
69 	timeout_set(&smr_wakeup_tmo, smr_wakeup, NULL);
70 }
71 
72 void
smr_startup_thread(void)73 smr_startup_thread(void)
74 {
75 	if (kthread_create(smr_thread, NULL, NULL, "smr") != 0)
76 		panic("could not create smr thread");
77 }
78 
79 struct timeval smr_logintvl = { 300, 0 };
80 
81 void
smr_thread(void * arg)82 smr_thread(void *arg)
83 {
84 	struct timeval elapsed, end, loglast, start;
85 	struct smr_entry_list deferred;
86 	struct smr_entry *smr;
87 	unsigned long count;
88 
89 	KERNEL_ASSERT_LOCKED();
90 	KERNEL_UNLOCK();
91 
92 	memset(&loglast, 0, sizeof(loglast));
93 	SIMPLEQ_INIT(&deferred);
94 
95 	for (;;) {
96 		mtx_enter(&smr_lock);
97 		if (smr_ndeferred == 0) {
98 			while (smr_ndeferred == 0)
99 				msleep_nsec(&smr_ndeferred, &smr_lock, PVM,
100 				    "bored", INFSLP);
101 		} else {
102 			if (smr_expedite == 0)
103 				msleep_nsec(&smr_ndeferred, &smr_lock, PVM,
104 				    "pause", MSEC_TO_NSEC(SMR_PAUSE));
105 		}
106 
107 		SIMPLEQ_CONCAT(&deferred, &smr_deferred);
108 		smr_ndeferred = 0;
109 		smr_expedite = 0;
110 		mtx_leave(&smr_lock);
111 
112 		getmicrouptime(&start);
113 
114 		smr_grace_wait();
115 
116 		WITNESS_CHECKORDER(&smr_lock_obj, LOP_NEWORDER, NULL);
117 		WITNESS_LOCK(&smr_lock_obj, 0);
118 
119 		count = 0;
120 		while ((smr = SIMPLEQ_FIRST(&deferred)) != NULL) {
121 			SIMPLEQ_REMOVE_HEAD(&deferred, smr_list);
122 			TRACEPOINT(smr, called, smr->smr_func, smr->smr_arg);
123 			smr->smr_func(smr->smr_arg);
124 			count++;
125 		}
126 
127 		WITNESS_UNLOCK(&smr_lock_obj, 0);
128 
129 		getmicrouptime(&end);
130 		timersub(&end, &start, &elapsed);
131 		if (elapsed.tv_sec >= 2 &&
132 		    ratecheck(&loglast, &smr_logintvl)) {
133 			printf("smr: dispatch took %ld.%06lds\n",
134 			    (long)elapsed.tv_sec,
135 			    (long)elapsed.tv_usec);
136 		}
137 		TRACEPOINT(smr, thread, TIMEVAL_TO_NSEC(&elapsed), count);
138 	}
139 }
140 
141 /*
142  * Announce next grace period and wait until all CPUs have entered it
143  * by crossing quiescent state.
144  */
145 void
smr_grace_wait(void)146 smr_grace_wait(void)
147 {
148 #ifdef MULTIPROCESSOR
149 	CPU_INFO_ITERATOR cii;
150 	struct cpu_info *ci;
151 	unsigned char smrgp;
152 
153 	smrgp = READ_ONCE(smr_grace_period) + 1;
154 	WRITE_ONCE(smr_grace_period, smrgp);
155 
156 	curcpu()->ci_schedstate.spc_smrgp = smrgp;
157 
158 	CPU_INFO_FOREACH(cii, ci) {
159 		if (!CPU_IS_RUNNING(ci))
160 			continue;
161 		if (READ_ONCE(ci->ci_schedstate.spc_smrgp) == smrgp)
162 			continue;
163 		sched_peg_curproc(ci);
164 		KASSERT(ci->ci_schedstate.spc_smrgp == smrgp);
165 	}
166 	sched_unpeg_curproc();
167 #endif /* MULTIPROCESSOR */
168 }
169 
170 void
smr_wakeup(void * arg)171 smr_wakeup(void *arg)
172 {
173 	TRACEPOINT(smr, wakeup, NULL);
174 	wakeup(&smr_ndeferred);
175 }
176 
177 void
smr_read_enter(void)178 smr_read_enter(void)
179 {
180 #ifdef DIAGNOSTIC
181 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
182 
183 	spc->spc_smrdepth++;
184 #endif
185 }
186 
187 void
smr_read_leave(void)188 smr_read_leave(void)
189 {
190 #ifdef DIAGNOSTIC
191 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
192 
193 	KASSERT(spc->spc_smrdepth > 0);
194 	spc->spc_smrdepth--;
195 #endif
196 }
197 
198 /*
199  * Move SMR entries from the local queue to the system-wide queue.
200  */
201 void
smr_dispatch(struct schedstate_percpu * spc)202 smr_dispatch(struct schedstate_percpu *spc)
203 {
204 	int expedite = 0, wake = 0;
205 
206 	mtx_enter(&smr_lock);
207 	if (smr_ndeferred == 0)
208 		wake = 1;
209 	SIMPLEQ_CONCAT(&smr_deferred, &spc->spc_deferred);
210 	smr_ndeferred += spc->spc_ndeferred;
211 	spc->spc_ndeferred = 0;
212 	smr_expedite |= spc->spc_smrexpedite;
213 	spc->spc_smrexpedite = 0;
214 	expedite = smr_expedite;
215 	mtx_leave(&smr_lock);
216 
217 	if (expedite)
218 		smr_wakeup(NULL);
219 	else if (wake)
220 		timeout_add_msec(&smr_wakeup_tmo, SMR_PAUSE);
221 }
222 
223 /*
224  * Signal that the current CPU is in quiescent state.
225  */
226 void
smr_idle(void)227 smr_idle(void)
228 {
229 	struct schedstate_percpu *spc = &curcpu()->ci_schedstate;
230 	unsigned char smrgp;
231 
232 	SMR_ASSERT_NONCRITICAL();
233 
234 	if (spc->spc_ndeferred > 0)
235 		smr_dispatch(spc);
236 
237 	/*
238 	 * Update this CPU's view of the system's grace period.
239 	 * The update must become visible after any preceding reads
240 	 * of SMR-protected data.
241 	 */
242 	smrgp = READ_ONCE(smr_grace_period);
243 	if (__predict_false(spc->spc_smrgp != smrgp)) {
244 		membar_exit();
245 		WRITE_ONCE(spc->spc_smrgp, smrgp);
246 	}
247 }
248 
249 void
smr_call_impl(struct smr_entry * smr,void (* func)(void *),void * arg,int expedite)250 smr_call_impl(struct smr_entry *smr, void (*func)(void *), void *arg,
251     int expedite)
252 {
253 	struct cpu_info *ci = curcpu();
254 	struct schedstate_percpu *spc = &ci->ci_schedstate;
255 	int s;
256 
257 	KASSERT(smr->smr_func == NULL);
258 
259 	smr->smr_func = func;
260 	smr->smr_arg = arg;
261 
262 	s = splhigh();
263 	SIMPLEQ_INSERT_TAIL(&spc->spc_deferred, smr, smr_list);
264 	spc->spc_ndeferred++;
265 	spc->spc_smrexpedite |= expedite;
266 	splx(s);
267 	TRACEPOINT(smr, call, func, arg, expedite);
268 
269 	/*
270 	 * If this call was made from an interrupt context that
271 	 * preempted idle state, dispatch the local queue to the shared
272 	 * queue immediately.
273 	 * The entries would linger in the local queue long if the CPU
274 	 * went to sleep without calling smr_idle().
275 	 */
276 	if (smr_cpu_is_idle(ci))
277 		smr_dispatch(spc);
278 }
279 
280 void
smr_barrier_func(void * arg)281 smr_barrier_func(void *arg)
282 {
283 	struct cond *c = arg;
284 
285 	cond_signal(c);
286 }
287 
288 void
smr_barrier_impl(int expedite)289 smr_barrier_impl(int expedite)
290 {
291 	struct cond c = COND_INITIALIZER();
292 	struct smr_entry smr;
293 
294 	if (panicstr != NULL || db_active)
295 		return;
296 
297 	WITNESS_CHECKORDER(&smr_lock_obj, LOP_NEWORDER, NULL);
298 
299 	TRACEPOINT(smr, barrier_enter, expedite);
300 	smr_init(&smr);
301 	smr_call_impl(&smr, smr_barrier_func, &c, expedite);
302 	cond_wait(&c, "smrbar");
303 	TRACEPOINT(smr, barrier_exit, expedite);
304 }
305