xref: /openbsd/share/man/man9/syscall.9 (revision b1d599a8)
1.\"	$OpenBSD: syscall.9,v 1.13 2016/04/25 19:24:42 tedu Exp $
2.\"
3.\" Copyright (c) 2003 Michael Shalayeff
4.\"
5.\" Redistribution and use in source and binary forms, with or without
6.\" modification, are permitted provided that the following conditions
7.\" are met:
8.\" 1. Redistributions of source code must retain the above copyright
9.\"    notice, this list of conditions and the following disclaimer.
10.\" 2. Redistributions in binary form must reproduce the above copyright
11.\"    notice, this list of conditions and the following disclaimer in the
12.\"    documentation and/or other materials provided with the distribution.
13.\"
14.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
15.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
18.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20.\" OR SERVICES; LOSS OF MIND, USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24.\" SUCH DAMAGE.
25.\"
26.Dd $Mdocdate: April 25 2016 $
27.Dt SYSCALL 9
28.Os
29.Sh NAME
30.Nm syscall
31.Nd system calls overview
32.Sh DESCRIPTION
33System calls in the kernel are implemented through a set of
34switch tables for each emulation type.
35Each table is generated from the
36.Dq master
37file by
38.Pa sys/kern/makesyscalls.sh
39through the appropriate rules in the
40.Pa Makefile .
41.Pp
42The
43.Dq master
44file is a text file consisting of a list of lines for each
45system call.
46Lines may be split by the means of back slashing the end of the line.
47Each line is a set of fields separated by whitespace:
48.Pp
49.D1 Cd number type ...
50.Pp
51Where:
52.Bl -tag -width number -compact
53.It number
54is the system call number;
55.It type
56is one of:
57.Bl -tag -width COMPAT_XXX -compact
58.It STD
59always included;
60.It OBSOL
61obsolete, not included in the system;
62.It UNIMPL
63unimplemented, not included in the system;
64.It NODEF
65included, but don't define the syscall number;
66.It NOARGS
67included, but don't define the syscall args structure;
68.It INDIR
69included, but don't define the syscall args structure,
70and allow it to be "really" varargs;
71.It COMPAT_XX
72a compatibility system call, only included if the corresponding
73option is configured for the kernel (see
74.Xr options 4 ) .
75.El
76.El
77.Pp
78The rest of the line for the STD, NODEF, NOARGS, and COMPAT_XX
79types is:
80.Pp
81.D1 Cd { pseudo-proto } [alias]
82.Pp
83.Nm pseudo-proto
84is a C-like prototype used to generate the system call argument list,
85and alias is an optional name alias for the call.
86The function in the prototype has to be defined somewhere in
87the kernel sources as it will be used as an entry point for
88the corresponding system call.
89.Pp
90For other types the rest of the line is a comment.
91.Pp
92To generate the header and code files from the
93.Dq master
94file a
95.Xr make 1
96command has to be run from the directory containing the
97.Dq master
98file.
99.Ss Usage
100Entry from the user space for the system call is machine dependent.
101Typical code to invoke a system call from the machine dependent
102sources might look like this:
103.Bd -literal -offset indent
104
105	const struct sysent *callp;
106	register_t code, args[8], rval[2];
107	struct proc *p = curproc;
108	int code, nsys;
109
110\&...
111
112/* "code" is the system call number passed from the user space */
113
114\&...
115
116if (code < 0 || code >= nsys)
117	callp += p->p_emul->e_nosys;	/* illegal */
118else
119	callp += code;
120
121/* copyin the arguments from the user space */
122\&...
123	rval[0] = 0;
124
125/* the following steps are now performed using mi_syscall() */
126#ifdef SYSCALL_DEBUG
127	scdebug_call(p, code, args);
128#endif
129#ifdef KTRACE
130	if (KTRPOINT(p, KTR_SYSCALL))
131		ktrsyscall(p, code, argsize, args);
132#endif
133	error = (*callp->sy_call)(p, args, rval);
134
135	switch (error) {
136	case 0:
137		/* normal return */
138		\&...
139		break;
140	case ERESTART:
141		/*
142		 * adjust PC to point before the system call
143		 * in the user space in order for the return
144		 * back there we reenter the kernel to repeat
145		 * the same system call
146		 */
147		\&...
148		break;
149	case EJUSTRETURN:
150		/* just return */
151		break;
152	default:
153		/*
154		 * an error returned:
155		 *	call an optional emulation errno mapping
156		 *	routine and return back to the user.
157		 */
158		if (p->p_emul->e_errno)
159			error = p->p_emul->e_errno[error];
160		\&...
161		break;
162	}
163
164/* the following steps are now performed using mi_syscall_return() */
165#ifdef SYSCALL_DEBUG
166	scdebug_ret(p, code, orig_error, rval);
167#endif
168	userret(p);
169#ifdef KTRACE
170	if (KTRPOINT(p, KTR_SYSRET))
171		ktrsysret(p, code, orig_error, rval[0]);
172#endif
173
174.Ed
175.Pp
176The
177.Dq SYSCALL_DEBUG
178parts of the code are explained in the section
179.Sx Debugging
180later in the document.
181For the
182.Dq KTRACE
183portions of the code refer to the
184.Xr ktrace 9
185document for further explanations.
186.Ss Debugging
187For debugging purposes the line
188.Pp
189.D1 Cd option SYSCALL_DEBUG
190.Pp
191should be included in the kernel configuration file (see
192.Xr options 4 ) .
193This allows tracing for calls, returns, and arguments for both
194implemented and non-implemented system calls.
195A global integer variable
196.Dq scdebug
197contains a mask for the desired logging events:
198.Pp
199.Bl -tag -width SCDEBUG_SHOWARGS__ -compact
200.It SCDEBUG_CALLS
201(0x0001) show calls;
202.It SCDEBUG_RETURNS
203(0x0002) show returns;
204.It SCDEBUG_ALL
205(0x0004) show even syscalls that are implemented;
206.It SCDEBUG_SHOWARGS
207(0x0008) show arguments to calls.
208.El
209.Pp
210Use
211.Xr ddb 4
212to set the
213.Dq scdebug
214to a value desired.
215.Sh CODE REFERENCES
216.Bl -tag -width sys/kern/syscalls.master -compact
217.It Pa sys/kern/makesyscalls.sh
218a
219.Xr sh 1
220script for generating C files out of the syscall master file;
221.It Pa sys/kern/syscalls.conf
222a configuration file for the shell script above;
223.It Pa sys/kern/syscalls.master
224master files describing names and numbers for the system calls;
225.It Pa sys/kern/syscalls.c
226system call names lists;
227.It Pa sys/kern/init_sysent.c
228system call switch tables;
229.It Pa sys/sys/syscallargs.h
230system call argument lists;
231.It Pa sys/sys/syscall.h
232system call numbers;
233.It Pa sys/sys/syscall_mi.h
234Machine-independent syscall entry end return handling.
235.El
236.Sh SEE ALSO
237.Xr ktrace 2 ,
238.Xr syscall 2 ,
239.Xr ktrace 9 ,
240.Xr sysctl_int 9
241.Sh HISTORY
242The
243.Nm
244section manual page appeared in
245.Ox 3.4 .
246