xref: /openbsd/share/man/man9/syscall.9 (revision 89f5bfa7)
1.\"	$OpenBSD: syscall.9,v 1.12 2016/03/02 15:04:20 naddy Exp $
2.\"
3.\" Copyright (c) 2003 Michael Shalayeff
4.\"
5.\" Redistribution and use in source and binary forms, with or without
6.\" modification, are permitted provided that the following conditions
7.\" are met:
8.\" 1. Redistributions of source code must retain the above copyright
9.\"    notice, this list of conditions and the following disclaimer.
10.\" 2. Redistributions in binary form must reproduce the above copyright
11.\"    notice, this list of conditions and the following disclaimer in the
12.\"    documentation and/or other materials provided with the distribution.
13.\"
14.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
15.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
18.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20.\" OR SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24.\" SUCH DAMAGE.
25.\"
26.Dd $Mdocdate: March 2 2016 $
27.Dt SYSCALL 9
28.Os
29.Sh NAME
30.Nm syscall
31.Nd system calls overview
32.Sh DESCRIPTION
33System calls in the kernel are implemented through a set of
34switch tables for each emulation type.
35Each table is generated from the
36.Dq master
37file by
38.Pa sys/kern/makesyscalls.sh
39through the appropriate rules in the
40.Pa Makefile .
41.Pp
42The
43.Dq master
44file is a text file consisting of a list of lines for each
45system call.
46Lines may be split by the means of back slashing the end of the line.
47Each line is a set of fields separated by whitespace:
48.Pp
49.D1 Cd number type ...
50.Pp
51Where:
52.Bl -tag -width number -compact
53.It number
54is the system call number;
55.It type
56is one of:
57.Bl -tag -width COMPAT_XXX -compact
58.It STD
59always included;
60.It OBSOL
61obsolete, not included in the system;
62.It UNIMPL
63unimplemented, not included in the system;
64.It NODEF
65included, but don't define the syscall number;
66.It NOARGS
67included, but don't define the syscall args structure;
68.It INDIR
69included, but don't define the syscall args structure,
70and allow it to be "really" varargs;
71.It COMPAT_XX
72a compatibility system call, only included if the corresponding
73option is configured for the kernel (see
74.Xr options 4 ) .
75.El
76.El
77.Pp
78The rest of the line for the STD, NODEF, NOARGS, and COMPAT_XX
79types is:
80.Pp
81.D1 Cd { pseudo-proto } [alias]
82.Pp
83.Nm pseudo-proto
84is a C-like prototype used to generate the system call argument list,
85and alias is an optional name alias for the call.
86The function in the prototype has to be defined somewhere in
87the kernel sources as it will be used as an entry point for
88the corresponding system call.
89.Pp
90For other types the rest of the line is a comment.
91.Pp
92To generate the header and code files from the
93.Dq master
94file a
95.Xr make 1
96command has to be run from the directory containing the
97.Dq master
98file.
99.Ss Usage
100Entry from the user space for the system call is machine dependent.
101Typical code to invoke a system call from the machine dependent
102sources might look like this:
103.Bd -literal -offset indent
104
105	const struct sysent *callp;
106	register_t code, args[8], rval[2];
107	struct proc *p = curproc;
108	int code, nsys;
109
110\&...
111
112/* "code" is the system call number passed from the user space */
113
114\&...
115
116if (code < 0 || code >= nsys)
117	callp += p->p_emul->e_nosys;	/* illegal */
118else
119	callp += code;
120
121/* copyin the arguments from the user space */
122\&...
123	rval[0] = 0;
124
125/* the following steps are now performed using mi_syscall() */
126#ifdef SYSCALL_DEBUG
127	scdebug_call(p, code, args);
128#endif
129#ifdef KTRACE
130	if (KTRPOINT(p, KTR_SYSCALL))
131		ktrsyscall(p, code, argsize, args);
132#endif
133#if NSYSTRACE > 0
134	if (ISSET(p->p_flag, P_SYSTRACE))
135		error = systrace_redirect(code, p, args, rval);
136	else
137#endif
138		error = (*callp->sy_call)(p, args, rval);
139
140	switch (error) {
141	case 0:
142		/* normal return */
143		\&...
144		break;
145	case ERESTART:
146		/*
147		 * adjust PC to point before the system call
148		 * in the user space in order for the return
149		 * back there we reenter the kernel to repeat
150		 * the same system call
151		 */
152		\&...
153		break;
154	case EJUSTRETURN:
155		/* just return */
156		break;
157	default:
158		/*
159		 * an error returned:
160		 *	call an optional emulation errno mapping
161		 *	routine and return back to the user.
162		 */
163		if (p->p_emul->e_errno)
164			error = p->p_emul->e_errno[error];
165		\&...
166		break;
167	}
168
169/* the following steps are now performed using mi_syscall_return() */
170#ifdef SYSCALL_DEBUG
171	scdebug_ret(p, code, orig_error, rval);
172#endif
173	userret(p);
174#ifdef KTRACE
175	if (KTRPOINT(p, KTR_SYSRET))
176		ktrsysret(p, code, orig_error, rval[0]);
177#endif
178
179.Ed
180.Pp
181The
182.Dq SYSCALL_DEBUG
183parts of the code are explained in the section
184.Sx Debugging
185later in the document.
186For the
187.Dq KTRACE
188portions of the code refer to the
189.Xr ktrace 9
190document for further explanations.
191.Dq NSYSTRACE
192is a system call tracing facility and is explained in the
193.Xr systrace 9
194and
195.Xr systrace 4
196documents.
197.Ss Debugging
198For debugging purposes the line
199.Pp
200.D1 Cd option SYSCALL_DEBUG
201.Pp
202should be included in the kernel configuration file (see
203.Xr options 4 ) .
204This allows tracing for calls, returns, and arguments for both
205implemented and non-implemented system calls.
206A global integer variable
207.Dq scdebug
208contains a mask for the desired logging events:
209.Pp
210.Bl -tag -width SCDEBUG_SHOWARGS__ -compact
211.It SCDEBUG_CALLS
212(0x0001) show calls;
213.It SCDEBUG_RETURNS
214(0x0002) show returns;
215.It SCDEBUG_ALL
216(0x0004) show even syscalls that are implemented;
217.It SCDEBUG_SHOWARGS
218(0x0008) show arguments to calls.
219.El
220.Pp
221Use
222.Xr ddb 4
223to set the
224.Dq scdebug
225to a value desired.
226.Sh CODE REFERENCES
227.Bl -tag -width sys/kern/syscalls.master -compact
228.It Pa sys/kern/makesyscalls.sh
229a
230.Xr sh 1
231script for generating C files out of the syscall master file;
232.It Pa sys/kern/syscalls.conf
233a configuration file for the shell script above;
234.It Pa sys/kern/syscalls.master
235master files describing names and numbers for the system calls;
236.It Pa sys/kern/syscalls.c
237system call names lists;
238.It Pa sys/kern/init_sysent.c
239system call switch tables;
240.It Pa sys/sys/syscallargs.h
241system call argument lists;
242.It Pa sys/sys/syscall.h
243system call numbers;
244.It Pa sys/sys/syscall_mi.h
245Machine-independent syscall entry end return handling.
246.El
247.Sh SEE ALSO
248.Xr ktrace 2 ,
249.Xr syscall 2 ,
250.Xr systrace 4 ,
251.Xr ktrace 9 ,
252.Xr sysctl_int 9 ,
253.Xr systrace 9
254.Sh HISTORY
255The
256.Nm
257section manual page appeared in
258.Ox 3.4 .
259