xref: /netbsd/sys/sys/lwp.h (revision df32c3d2)
1 /*	$NetBSD: lwp.h,v 1.215 2022/04/09 23:45:37 riastradh Exp $	*/
2 
3 /*
4  * Copyright (c) 2001, 2006, 2007, 2008, 2009, 2010, 2019, 2020
5  *    The NetBSD Foundation, Inc.
6  * All rights reserved.
7  *
8  * This code is derived from software contributed to The NetBSD Foundation
9  * by Nathan J. Williams and Andrew Doran.
10  *
11  * Redistribution and use in source and binary forms, with or without
12  * modification, are permitted provided that the following conditions
13  * are met:
14  * 1. Redistributions of source code must retain the above copyright
15  *    notice, this list of conditions and the following disclaimer.
16  * 2. Redistributions in binary form must reproduce the above copyright
17  *    notice, this list of conditions and the following disclaimer in the
18  *    documentation and/or other materials provided with the distribution.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30  * POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #ifndef _SYS_LWP_H_
34 #define _SYS_LWP_H_
35 
36 #if defined(_KERNEL) || defined(_KMEMUSER)
37 
38 #include <sys/param.h>
39 #include <sys/time.h>
40 #include <sys/queue.h>
41 #include <sys/callout.h>
42 #include <sys/kcpuset.h>
43 #include <sys/mutex.h>
44 #include <sys/condvar.h>
45 #include <sys/signalvar.h>
46 #include <sys/sched.h>
47 #include <sys/specificdata.h>
48 #include <sys/syncobj.h>
49 #include <sys/resource.h>
50 
51 #if defined(_KERNEL)
52 struct lwp;
53 /* forward declare this for <machine/cpu.h> so it can get l_cpu. */
54 static __inline struct cpu_info *lwp_getcpu(struct lwp *);
55 #include <machine/cpu.h>		/* curcpu() and cpu_info */
56 #include <sys/atomic.h>
57 #ifdef _KERNEL_OPT
58 #include "opt_kcov.h"
59 #include "opt_kmsan.h"
60 #endif
61 #endif
62 
63 #include <machine/proc.h>		/* Machine-dependent proc substruct. */
64 
65 /*
66  * Lightweight process.  Field markings and the corresponding locks:
67  *
68  * a:	proc_lock
69  * c:	condition variable interlock, passed to cv_wait()
70  * l:	*l_mutex
71  * p:	l_proc->p_lock
72  * s:	spc_mutex, which may or may not be referenced by l_mutex
73  * S:	l_selcluster->sc_lock
74  * (:	unlocked, stable
75  * !:	unlocked, may only be reliably accessed by the LWP itself
76  *
77  * Fields are clustered together by usage (to increase the likelihood
78  * of cache hits) and by size (to reduce dead space in the structure).
79  */
80 
81 #include <sys/pcu.h>
82 
83 struct lockdebug;
84 struct sysent;
85 
86 struct lwp {
87 	/* Must not be zeroed on free. */
88 	struct cpu_info *volatile l_cpu;/* s: CPU we're on if LSONPROC */
89 	kmutex_t * volatile l_mutex;	/* l: ptr to mutex on sched state */
90 	struct turnstile *l_ts;		/* l: current turnstile */
91 	int		l_stat;		/* l: overall LWP status */
92 	int		l__reserved;	/*  : padding - reuse as needed */
93 
94 	/* Scheduling and overall state. */
95 #define	l_startzero l_runq
96 	TAILQ_ENTRY(lwp) l_runq;	/* s: run queue */
97 	union {
98 		void *	info;		/* s: scheduler-specific structure */
99 		u_int	timeslice;	/* l: time-quantum for SCHED_M2 */
100 	} l_sched;
101 	void		*l_addr;	/* l: PCB address; use lwp_getpcb() */
102 	struct mdlwp	l_md;		/* l: machine-dependent fields. */
103 	struct bintime 	l_rtime;	/* l: real time */
104 	struct bintime	l_stime;	/* l: start time (while ONPROC) */
105 	int		l_flag;		/* l: misc flag values */
106 	u_int		l_swtime;	/* l: time swapped in or out */
107 	u_int		l_rticks;	/* l: Saved start time of run */
108 	u_int		l_rticksum;	/* l: Sum of ticks spent running */
109 	u_int		l_slpticks;	/* l: Saved start time of sleep */
110 	u_int		l_slpticksum;	/* l: Sum of ticks spent sleeping */
111 	int		l_biglocks;	/* l: biglock count before sleep */
112 	int		l_class;	/* l: scheduling class */
113 	int		l_kpriority;	/* !: has kernel priority boost */
114 	pri_t		l_kpribase;	/* !: kernel priority base level */
115 	pri_t		l_priority;	/* l: scheduler priority */
116 	pri_t		l_inheritedprio;/* l: inherited priority */
117 	pri_t		l_protectprio;	/* l: for PTHREAD_PRIO_PROTECT */
118 	pri_t		l_auxprio;	/* l: max(inherit,protect) priority */
119 	int		l_protectdepth;	/* l: for PTHREAD_PRIO_PROTECT */
120 	u_int		l_cpticks;	/* (: Ticks of CPU time */
121 	psetid_t	l_psid;		/* l: assigned processor-set ID */
122 	fixpt_t		l_pctcpu;	/* p: %cpu during l_swtime */
123 	fixpt_t		l_estcpu;	/* l: cpu time for SCHED_4BSD */
124 	volatile uint64_t l_ncsw;	/* l: total context switches */
125 	volatile uint64_t l_nivcsw;	/* l: involuntary context switches */
126 	SLIST_HEAD(, turnstile) l_pi_lenders; /* l: ts lending us priority */
127 	struct cpu_info *l_target_cpu;	/* l: target CPU to migrate */
128 	struct lwpctl	*l_lwpctl;	/* p: lwpctl block kernel address */
129 	struct lcpage	*l_lcpage;	/* p: lwpctl containing page */
130 	kcpuset_t	*l_affinity;	/* l: CPU set for affinity */
131 
132 	/* Synchronisation. */
133 	struct syncobj	*l_syncobj;	/* l: sync object operations set */
134 	LIST_ENTRY(lwp) l_sleepchain;	/* l: sleep queue */
135 	wchan_t		l_wchan;	/* l: sleep address */
136 	const char	*l_wmesg;	/* l: reason for sleep */
137 	struct sleepq	*l_sleepq;	/* l: current sleep queue */
138 	callout_t	l_timeout_ch;	/* !: callout for tsleep */
139 	kcondvar_t	l_waitcv;	/* a: vfork() wait */
140 	u_int		l_slptime;	/* l: time since last blocked */
141 	bool		l_vforkwaiting;	/* a: vfork() waiting */
142 
143 	/* User-space synchronization. */
144 	uintptr_t	l_robust_head;	/* !: list of robust futexes */
145 	uint32_t	l___rsvd1;	/* reserved for future use */
146 
147 #if PCU_UNIT_COUNT > 0
148 	struct cpu_info	* volatile l_pcu_cpu[PCU_UNIT_COUNT];
149 	uint32_t	l_pcu_valid;
150 #endif
151 
152 	/* Process level and global state, misc. */
153 	lwpid_t		l_lid;		/* (: LWP identifier; local to proc */
154 	LIST_ENTRY(lwp)	l_list;		/* a: entry on list of all LWPs */
155 	void		*l_ctxlink;	/* p: uc_link {get,set}context */
156 	struct proc	*l_proc;	/* p: parent process */
157 	LIST_ENTRY(lwp)	l_sibling;	/* p: entry on proc's list of LWPs */
158 	char		*l_name;	/* (: name, optional */
159 	lwpid_t		l_waiter;	/* p: first LWP waiting on us */
160 	lwpid_t 	l_waitingfor;	/* p: specific LWP we are waiting on */
161 	int		l_prflag;	/* p: process level flags */
162 	u_int		l_refcnt;	/* p: reference count on this LWP */
163 
164 	/* State of select() or poll(). */
165 	int		l_selflag;	/* S: polling state flags */
166 	int		l_selret;	/* S: return value of select/poll */
167 	SLIST_HEAD(,selinfo) l_selwait;	/* S: descriptors waited on */
168 	uintptr_t	l_selrec;	/* !: argument for selrecord() */
169 	struct selcluster *l_selcluster;/* !: associated cluster data */
170 	void *		l_selbits;	/* (: select() bit-field */
171 	size_t		l_selni;	/* (: size of a single bit-field */
172 
173 	/* Signals. */
174 	int		l_sigrestore;	/* p: need to restore old sig mask */
175 	sigset_t	l_sigwaitset;	/* p: signals being waited for */
176 	kcondvar_t	l_sigcv;	/* p: for sigsuspend() */
177 	struct ksiginfo	*l_sigwaited;	/* p: delivered signals from set */
178 	sigpend_t	*l_sigpendset;	/* p: XXX issignal()/postsig() baton */
179 	LIST_ENTRY(lwp)	l_sigwaiter;	/* p: chain on list of waiting LWPs */
180 	stack_t		l_sigstk;	/* p: sp & on stack state variable */
181 	sigset_t	l_sigmask;	/* p: signal mask */
182 	sigpend_t	l_sigpend;	/* p: signals to this LWP */
183 	sigset_t	l_sigoldmask;	/* p: mask for sigpause */
184 
185 	/* Private data. */
186 	specificdata_reference
187 		l_specdataref;		/* !: subsystem lwp-specific data */
188 	struct timespec l_ktrcsw;	/* !: for ktrace CSW trace XXX */
189 	void		*l_private;	/* !: svr4-style lwp-private data */
190 	struct lwp	*l_switchto;	/* !: mi_switch: switch to this LWP */
191 	struct kauth_cred *l_cred;	/* !: cached credentials */
192 	struct filedesc	*l_fd;		/* !: cached copy of proc::p_fd */
193 	void		*l_emuldata;	/* !: kernel lwp-private data */
194 	struct fstrans_lwp_info *l_fstrans; /* (: fstrans private data */
195 	u_short		l_shlocks;	/* !: lockdebug: shared locks held */
196 	u_short		l_exlocks;	/* !: lockdebug: excl. locks held */
197 	u_short		l_psrefs;	/* !: count of psref held */
198 	u_short		l_blcnt;	/* !: count of kernel_lock held */
199 	volatile int	l_nopreempt;	/* !: don't preempt me! */
200 	volatile u_int	l_dopreempt;	/* s: kernel preemption pending */
201 	int		l_pflag;	/* !: LWP private flags */
202 	int		l_dupfd;	/* !: side return from cloning devs XXX */
203 	const struct sysent * volatile l_sysent;/* !: currently active syscall */
204 	struct rusage	l_ru;		/* !: accounting information */
205 	uint64_t	l_pfailtime;	/* !: for kernel preemption */
206 	uintptr_t	l_pfailaddr;	/* !: for kernel preemption */
207 	uintptr_t	l_pfaillock;	/* !: for kernel preemption */
208 	_TAILQ_HEAD(,struct lockdebug,volatile) l_ld_locks;/* !: locks held by LWP */
209 	volatile void	*l_ld_wanted;	/* !: lock currently wanted by LWP */
210 	uintptr_t	l_rwcallsite;	/* !: rwlock actual callsite */
211 	int		l_tcgen;	/* !: for timecounter removal */
212 
213 	/* These are only used by 'options SYSCALL_TIMES'. */
214 	uint32_t	l_syscall_time;	/* !: time epoch for current syscall */
215 	uint64_t	*l_syscall_counter; /* !: counter for current process */
216 
217 	struct kdtrace_thread *l_dtrace; /* (: DTrace-specific data. */
218 
219 #ifdef KMSAN
220 	void		*l_kmsan; /* !: KMSAN private data. */
221 #endif
222 #ifdef KCOV
223 	void		*l_kcov; /* !: KCOV private data. */
224 #endif
225 };
226 
227 /*
228  * UAREA_PCB_OFFSET: an offset of PCB structure in the uarea.  MD code may
229  * define it in <machine/proc.h>, to indicate a different uarea layout.
230  */
231 #ifndef UAREA_PCB_OFFSET
232 #define	UAREA_PCB_OFFSET	0
233 #endif
234 
235 LIST_HEAD(lwplist, lwp);		/* A list of LWPs. */
236 
237 #ifdef _KERNEL
238 extern struct lwplist	alllwp;		/* List of all LWPs. */
239 extern lwp_t		lwp0;		/* LWP for proc0. */
240 extern int		maxlwp __read_mostly;	/* max number of lwps */
241 #ifndef MAXLWP
242 #define	MAXLWP		2048
243 #endif
244 #ifndef	__HAVE_CPU_MAXLWP
245 #define	cpu_maxlwp()	MAXLWP
246 #endif
247 #endif
248 
249 #endif /* _KERNEL || _KMEMUSER */
250 
251 /*
252  * These flags are kept in l_flag, and they are modified only with the LWP
253  * locked.
254  */
255 #define	LW_IDLE		0x00000001 /* Idle lwp. */
256 #define	LW_LWPCTL	0x00000002 /* Adjust lwpctl in userret */
257 #define	LW_STIMO	0x00000040 /* Sleep timed out */
258 #define	LW_SINTR	0x00000080 /* Sleep is interruptible. */
259 #define	LW_CATCHINTR	0x00000100 /* LW_SINTR intent; see sleepq_block(). */
260 #define	LW_SYSTEM	0x00000200 /* Kernel thread */
261 #define	LW_SYSTEM_FPU	0x00000400 /* Kernel thread with vector/FP enabled */
262 #define	LW_DBGSUSPEND	0x00010000 /* Suspend by debugger */
263 #define	LW_WSUSPEND	0x00020000 /* Suspend before return to user */
264 #define	LW_BATCH	0x00040000 /* LWP tends to hog CPU */
265 #define	LW_WCORE	0x00080000 /* Stop for core dump on return to user */
266 #define	LW_WEXIT	0x00100000 /* Exit before return to user */
267 #define	LW_PENDSIG	0x01000000 /* Pending signal for us */
268 #define	LW_CANCELLED	0x02000000 /* tsleep should not sleep */
269 #define	LW_WREBOOT	0x08000000 /* System is rebooting, please suspend */
270 #define	LW_UNPARKED	0x10000000 /* Unpark op pending */
271 #define	LW_RUMP_CLEAR	0x40000000 /* Clear curlwp in RUMP scheduler */
272 #define	LW_RUMP_QEXIT	0x80000000 /* LWP should exit ASAP */
273 
274 /*
275  * The second set of flags is kept in l_pflag, and they are modified only by
276  * the LWP itself, or modified when it's known the LWP cannot be running.
277  * LP_RUNNING is typically updated with the LWP locked, but not always in
278  * the case of soft interrupt handlers.
279  */
280 #define	LP_KTRACTIVE	0x00000001 /* Executing ktrace operation */
281 #define	LP_KTRCSW	0x00000002 /* ktrace context switch marker */
282 #define	LP_KTRCSWUSER	0x00000004 /* ktrace context switch marker */
283 	/* 		0x00000008    was LP_PIDLID */
284 #define	LP_OWEUPC	0x00000010 /* Owe user profiling tick */
285 #define	LP_MPSAFE	0x00000020 /* Starts life without kernel_lock */
286 #define	LP_INTR		0x00000040 /* Soft interrupt handler */
287 #define	LP_SYSCTLWRITE	0x00000080 /* sysctl write lock held */
288 #define	LP_MUSTJOIN	0x00000100 /* Must join kthread on exit */
289 #define	LP_SINGLESTEP	0x00000400 /* Single step thread in ptrace(2) */
290 #define	LP_TIMEINTR	0x00010000 /* Time this soft interrupt */
291 #define	LP_PREEMPTING	0x00020000 /* mi_switch called involuntarily */
292 #define	LP_RUNNING	0x20000000 /* Active on a CPU */
293 #define	LP_TELEPORT	0x40000000 /* Teleport to new CPU on preempt() */
294 #define	LP_BOUND	0x80000000 /* Bound to a CPU */
295 
296 /*
297  * The third set of flags is kept in l_prflag and they are modified only
298  * with p_lock held.
299  */
300 #define	LPR_DETACHED	0x00800000 /* Won't be waited for. */
301 #define	LPR_CRMOD	0x00000100 /* Credentials modified */
302 #define	LPR_DRAINING	0x80000000 /* Draining references before exiting */
303 
304 /*
305  * Mask indicating that there is "exceptional" work to be done on return to
306  * user.
307  */
308 #define	LW_USERRET	\
309     (LW_WEXIT | LW_PENDSIG | LW_WREBOOT | LW_WSUSPEND | LW_WCORE | LW_LWPCTL)
310 
311 /*
312  * Status values.
313  *
314  * A note about LSRUN and LSONPROC: LSRUN indicates that a process is
315  * runnable but *not* yet running, i.e. is on a run queue.  LSONPROC
316  * indicates that the process is actually executing on a CPU, i.e.
317  * it is no longer on a run queue.
318  *
319  * These values are set in stone and must not be reused with future changes.
320  */
321 #define	LSIDL		1	/* Process being created by fork. */
322 #define	LSRUN		2	/* Currently runnable. */
323 #define	LSSLEEP		3	/* Sleeping on an address. */
324 #define	LSSTOP		4	/* Process debugging or suspension. */
325 #define	LSZOMB		5	/* Awaiting collection by parent. */
326 /* define	LSDEAD	6	Process is almost a zombie. (removed in 5.0) */
327 #define	LSONPROC	7	/* Process is currently on a CPU. */
328 #define	LSSUSPENDED	8	/* Not running, not signalable. */
329 
330 #if defined(_KERNEL) || defined(_KMEMUSER)
331 static __inline void *
332 lwp_getpcb(struct lwp *l)
333 {
334 
335 	return l->l_addr;
336 }
337 #endif /* _KERNEL || _KMEMUSER */
338 
339 #ifdef _KERNEL
340 #define	LWP_CACHE_CREDS(l, p)						\
341 do {									\
342 	(void)p;							\
343 	if (__predict_false((l)->l_prflag & LPR_CRMOD))			\
344 		lwp_update_creds(l);					\
345 } while (/* CONSTCOND */ 0)
346 
347 void	lwpinit(void);
348 void	lwp0_init(void);
349 
350 void	lwp_startup(lwp_t *, lwp_t *);
351 void	startlwp(void *);
352 
353 int	lwp_locked(lwp_t *, kmutex_t *);
354 kmutex_t *lwp_setlock(lwp_t *, kmutex_t *);
355 void	lwp_unlock_to(lwp_t *, kmutex_t *);
356 int	lwp_trylock(lwp_t *);
357 void	lwp_addref(lwp_t *);
358 void	lwp_delref(lwp_t *);
359 void	lwp_delref2(lwp_t *);
360 bool	lwp_drainrefs(lwp_t *);
361 bool	lwp_alive(lwp_t *);
362 lwp_t	*lwp_find_first(proc_t *);
363 
364 int	lwp_wait(lwp_t *, lwpid_t, lwpid_t *, bool);
365 void	lwp_continue(lwp_t *);
366 void	lwp_unsleep(lwp_t *, bool);
367 void	lwp_unstop(lwp_t *);
368 void	lwp_exit(lwp_t *);
369 int	lwp_suspend(lwp_t *, lwp_t *);
370 int	lwp_create1(lwp_t *, const void *, size_t, u_long, lwpid_t *);
371 void	lwp_start(lwp_t *, int);
372 void	lwp_update_creds(lwp_t *);
373 void	lwp_migrate(lwp_t *, struct cpu_info *);
374 lwp_t *	lwp_find2(pid_t, lwpid_t);
375 lwp_t *	lwp_find(proc_t *, int);
376 void	lwp_userret(lwp_t *);
377 void	lwp_need_userret(lwp_t *);
378 void	lwp_free(lwp_t *, bool, bool);
379 uint64_t lwp_pctr(void);
380 int	lwp_setprivate(lwp_t *, void *);
381 int	do_lwp_create(lwp_t *, void *, u_long, lwp_t **, const sigset_t *,
382     const stack_t *);
383 
384 void	lwp_thread_cleanup(lwp_t *);
385 
386 void	lwpinit_specificdata(void);
387 int	lwp_specific_key_create(specificdata_key_t *, specificdata_dtor_t);
388 void	lwp_specific_key_delete(specificdata_key_t);
389 void	lwp_initspecific(lwp_t *);
390 void	lwp_finispecific(lwp_t *);
391 void	*lwp_getspecific(specificdata_key_t);
392 #if defined(_LWP_API_PRIVATE)
393 void	*_lwp_getspecific_by_lwp(lwp_t *, specificdata_key_t);
394 #endif
395 void	lwp_setspecific(specificdata_key_t, void *);
396 void	lwp_setspecific_by_lwp(lwp_t *, specificdata_key_t, void *);
397 
398 /* Syscalls. */
399 int	lwp_park(clockid_t, int, struct timespec *);
400 int	lwp_unpark(const lwpid_t *, const u_int);
401 
402 /* DDB. */
403 void	lwp_whatis(uintptr_t, void (*)(const char *, ...) __printflike(1, 2));
404 
405 /*
406  * Lock an LWP. XXX _MODULE
407  */
408 static __inline void
409 lwp_lock(lwp_t *l)
410 {
411 	kmutex_t *old = atomic_load_consume(&l->l_mutex);
412 
413 	/*
414 	 * Note: mutex_spin_enter() will have posted a read barrier.
415 	 * Re-test l->l_mutex.  If it has changed, we need to try again.
416 	 */
417 	mutex_spin_enter(old);
418 	while (__predict_false(atomic_load_relaxed(&l->l_mutex) != old)) {
419 		mutex_spin_exit(old);
420 		old = atomic_load_consume(&l->l_mutex);
421 		mutex_spin_enter(old);
422 	}
423 }
424 
425 /*
426  * Unlock an LWP. XXX _MODULE
427  */
428 static __inline void
429 lwp_unlock(lwp_t *l)
430 {
431 	mutex_spin_exit(l->l_mutex);
432 }
433 
434 static __inline void
435 lwp_changepri(lwp_t *l, pri_t pri)
436 {
437 	KASSERT(mutex_owned(l->l_mutex));
438 
439 	if (l->l_priority == pri)
440 		return;
441 
442 	(*l->l_syncobj->sobj_changepri)(l, pri);
443 	KASSERT(l->l_priority == pri);
444 }
445 
446 static __inline void
447 lwp_lendpri(lwp_t *l, pri_t pri)
448 {
449 	KASSERT(mutex_owned(l->l_mutex));
450 
451 	(*l->l_syncobj->sobj_lendpri)(l, pri);
452 	KASSERT(l->l_inheritedprio == pri);
453 }
454 
455 static __inline pri_t
456 lwp_eprio(lwp_t *l)
457 {
458 	pri_t pri;
459 
460 	pri = l->l_priority;
461 	if ((l->l_flag & LW_SYSTEM) == 0 && l->l_kpriority && pri < PRI_KERNEL)
462 		pri = (pri >> 1) + l->l_kpribase;
463 	return MAX(l->l_auxprio, pri);
464 }
465 
466 int lwp_create(lwp_t *, struct proc *, vaddr_t, int, void *, size_t,
467     void (*)(void *), void *, lwp_t **, int, const sigset_t *, const stack_t *);
468 
469 /*
470  * XXX _MODULE
471  * We should provide real stubs for the below that modules can use.
472  */
473 
474 static __inline void
475 spc_lock(struct cpu_info *ci)
476 {
477 	mutex_spin_enter(ci->ci_schedstate.spc_mutex);
478 }
479 
480 static __inline void
481 spc_unlock(struct cpu_info *ci)
482 {
483 	mutex_spin_exit(ci->ci_schedstate.spc_mutex);
484 }
485 
486 static __inline void
487 spc_dlock(struct cpu_info *ci1, struct cpu_info *ci2)
488 {
489 	struct schedstate_percpu *spc1 = &ci1->ci_schedstate;
490 	struct schedstate_percpu *spc2 = &ci2->ci_schedstate;
491 
492 	KASSERT(ci1 != ci2);
493 	if (ci1 < ci2) {
494 		mutex_spin_enter(spc1->spc_mutex);
495 		mutex_spin_enter(spc2->spc_mutex);
496 	} else {
497 		mutex_spin_enter(spc2->spc_mutex);
498 		mutex_spin_enter(spc1->spc_mutex);
499 	}
500 }
501 
502 /*
503  * Allow machine-dependent code to override curlwp in <machine/cpu.h> for
504  * its own convenience.  Otherwise, we declare it as appropriate.
505  */
506 #if !defined(curlwp)
507 #if defined(MULTIPROCESSOR)
508 #define	curlwp		curcpu()->ci_curlwp	/* Current running LWP */
509 #else
510 extern struct lwp	*curlwp;		/* Current running LWP */
511 #endif /* MULTIPROCESSOR */
512 #endif /* ! curlwp */
513 #define	curproc		(curlwp->l_proc)
514 
515 /*
516  * This provides a way for <machine/cpu.h> to get l_cpu for curlwp before
517  * struct lwp is defined.
518  */
519 static __inline struct cpu_info *
520 lwp_getcpu(struct lwp *l)
521 {
522 	return l->l_cpu;
523 }
524 
525 static __inline bool
526 CURCPU_IDLE_P(void)
527 {
528 	struct cpu_info *ci = curcpu();
529 	return ci->ci_onproc == ci->ci_data.cpu_idlelwp;
530 }
531 
532 /*
533  * Disable and re-enable preemption.  Only for low-level kernel
534  * use.  Device drivers and anything that could potentially be
535  * compiled as a module should use kpreempt_disable() and
536  * kpreempt_enable().
537  */
538 static __inline void
539 KPREEMPT_DISABLE(lwp_t *l)
540 {
541 
542 	KASSERT(l == curlwp);
543 	l->l_nopreempt++;
544 	__insn_barrier();
545 }
546 
547 static __inline void
548 KPREEMPT_ENABLE(lwp_t *l)
549 {
550 
551 	KASSERT(l == curlwp);
552 	KASSERT(l->l_nopreempt > 0);
553 	__insn_barrier();
554 	if (--l->l_nopreempt != 0)
555 		return;
556 	__insn_barrier();
557 	if (__predict_false(l->l_dopreempt))
558 		kpreempt(0);
559 	__insn_barrier();
560 }
561 
562 /* For lwp::l_dopreempt */
563 #define	DOPREEMPT_ACTIVE	0x01
564 #define	DOPREEMPT_COUNTED	0x02
565 
566 /*
567  * Prevent curlwp from migrating between CPUs between curlwp_bind and
568  * curlwp_bindx. One use case is psref(9) that has a contract that
569  * forbids migrations.
570  */
571 static __inline int
572 curlwp_bind(void)
573 {
574 	int bound;
575 
576 	bound = curlwp->l_pflag & LP_BOUND;
577 	curlwp->l_pflag |= LP_BOUND;
578 	__insn_barrier();
579 
580 	return bound;
581 }
582 
583 static __inline void
584 curlwp_bindx(int bound)
585 {
586 
587 	KASSERT(curlwp->l_pflag & LP_BOUND);
588 	__insn_barrier();
589 	curlwp->l_pflag ^= bound ^ LP_BOUND;
590 }
591 
592 #endif /* _KERNEL */
593 
594 /* Flags for _lwp_create(), as per Solaris. */
595 #define	LWP_DETACHED	0x00000040
596 #define	LWP_SUSPENDED	0x00000080
597 
598 /* Kernel-internal flags for LWP creation. */
599 	/*		0x40000000	was LWP_PIDLID */
600 #define	LWP_VFORK	0x80000000
601 
602 #endif	/* !_SYS_LWP_H_ */
603