xref: /freebsd/share/man/man9/vnet.9 (revision 4b9d6057)
1.\"-
2.\" Copyright (c) 2010 The FreeBSD Foundation
3.\"
4.\" This documentation was written by CK Software GmbH under sponsorship from
5.\" the FreeBSD Foundation.
6.\"
7.\" Redistribution and use in source and binary forms, with or without
8.\" modification, are permitted provided that the following conditions
9.\" are met:
10.\" 1. Redistributions of source code must retain the above copyright
11.\"    notice, this list of conditions and the following disclaimer.
12.\" 2. Redistributions in binary form must reproduce the above copyright
13.\"    notice, this list of conditions and the following disclaimer in the
14.\"    documentation and/or other materials provided with the distribution.
15.\"
16.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26.\" SUCH DAMAGE.
27.\"
28.Dd December 10, 2020
29.Dt VNET 9
30.Os
31.Sh NAME
32.Nm VNET
33.Nd "network subsystem virtualization infrastructure"
34.Sh SYNOPSIS
35.Cd "options VIMAGE"
36.Cd "options VNET_DEBUG"
37.Pp
38.In net/vnet.h
39.\"------------------------------------------------------------
40.Ss "Constants and Global Variables"
41.\"
42.Dv VNET_SETNAME
43.\"	"set_vnet"
44.Dv VNET_SYMPREFIX
45.\"	"vnet_entry_"
46.Vt extern struct vnet *vnet0;
47.\"------------------------------------------------------------
48.Ss "Variable Declaration"
49.Fo VNET
50.Fa "name"
51.Fc
52.\"
53.Fo VNET_NAME
54.Fa "name"
55.Fc
56.\"
57.Fo VNET_DECLARE
58.Fa "type" "name"
59.Fc
60.\"
61.Fo VNET_DEFINE
62.Fa "type" "name"
63.Fc
64.\"
65.Fo VNET_DEFINE_STATIC
66.Fa "type" "name"
67.Fc
68.\"
69.Bd -literal
70#define	V_name	VNET(name)
71.Ed
72.\" ------------------------------------------------------------
73.Ss "Virtual Instance Selection"
74.\"
75.Fo CRED_TO_VNET
76.Fa "struct ucred *"
77.Fc
78.\"
79.Fo TD_TO_VNET
80.Fa "struct thread *"
81.Fc
82.\"
83.Fo P_TO_VNET
84.Fa "struct proc *"
85.Fc
86.\"
87.Fo IS_DEFAULT_VNET
88.Fa "struct vnet *"
89.Fc
90.\"
91.Fo VNET_ASSERT
92.Fa exp msg
93.Fc
94.\"
95.Fo CURVNET_SET
96.Fa "struct vnet *"
97.Fc
98.\"
99.Fo CURVNET_SET_QUIET
100.Fa "struct vnet *"
101.Fc
102.\"
103.Fn CURVNET_RESTORE
104.\"
105.Fo VNET_ITERATOR_DECL
106.Fa "struct vnet *"
107.Fc
108.\"
109.Fo VNET_FOREACH
110.Fa "struct vnet *"
111.Fc
112.\" ------------------------------------------------------------
113.Ss "Locking"
114.\"
115.Fn VNET_LIST_RLOCK
116.Fn VNET_LIST_RUNLOCK
117.Fn VNET_LIST_RLOCK_NOSLEEP
118.Fn VNET_LIST_RUNLOCK_NOSLEEP
119.\" ------------------------------------------------------------
120.Ss "Startup and Teardown Functions"
121.\"
122.Ft "struct vnet *"
123.Fo vnet_alloc
124.Fa void
125.Fc
126.\"
127.Ft void
128.Fo vnet_destroy
129.Fa "struct vnet *"
130.Fc
131.\"
132.Fo VNET_SYSINIT
133.Fa ident
134.Fa "enum sysinit_sub_id subsystem"
135.Fa "enum sysinit_elem_order order"
136.Fa "sysinit_cfunc_t func"
137.Fa "const void *arg"
138.Fc
139.\"
140.Fo VNET_SYSUNINIT
141.Fa ident
142.Fa "enum sysinit_sub_id subsystem"
143.Fa "enum sysinit_elem_order order"
144.Fa "sysinit_cfunc_t func"
145.Fa "const void *arg"
146.Fc
147.\" ------------------------------------------------------------
148.Ss "Eventhandlers"
149.\"
150.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER
151.Fa "const char *name"
152.Fa "void *func"
153.Fa "void *arg"
154.Fa "int priority"
155.Fc
156.\"
157.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG
158.Fa "eventhandler_tag tag"
159.Fa "const char *name"
160.Fa "void *func"
161.Fa "void *arg"
162.Fa "int priority"
163.Fc
164.\" ------------------------------------------------------------
165.Ss "Sysctl Handling"
166.Fo SYSCTL_VNET_INT
167.Fa parent nbr name access ptr val descr
168.Fc
169.Fo SYSCTL_VNET_PROC
170.Fa parent nbr name access ptr arg handler fmt descr
171.Fc
172.Fo SYSCTL_VNET_STRING
173.Fa parent nbr name access arg len descr
174.Fc
175.Fo SYSCTL_VNET_STRUCT
176.Fa parent nbr name access ptr type descr
177.Fc
178.Fo SYSCTL_VNET_UINT
179.Fa parent nbr name access ptr val descr
180.Fc
181.Fo VNET_SYSCTL_ARG
182.Fa req arg1
183.Fc
184.\" ------------------------------------------------------------
185.Sh DESCRIPTION
186.Nm
187is the name of a technique to virtualize the network stack.
188The basic idea is to change global resources most notably variables into
189per network stack resources and have functions, sysctls, eventhandlers,
190etc. access and handle them in the context of the correct instance.
191Each (virtual) network stack is attached to a
192.Em prison ,
193with
194.Vt vnet0
195being the unrestricted default network stack of the base system.
196.Pp
197The global defines for
198.Dv VNET_SETNAME
199and
200.Dv VNET_SYMPREFIX
201are shared with
202.Xr kvm 3
203to access internals for debugging reasons.
204.\" ------------------------------------------------------------
205.Ss "Variable Declaration"
206.\"
207Variables are virtualized by using the
208.Fn VNET_DEFINE
209macro rather than writing them out as
210.Em type name .
211One can still use static initialization, e.g.,
212.Pp
213.Dl Li VNET_DEFINE(int, foo) = 1;
214.Pp
215Variables declared with the static keyword can use the
216.Fn VNET_DEFINE_STATIC
217macro, e.g.,
218.Pp
219.Dl Li VNET_DEFINE_STATIC(SLIST_HEAD(, bar), bars);
220.Pp
221Static initialization is not possible when the virtualized variable
222would need to be referenced, e.g., with
223.Dq TAILQ_HEAD_INITIALIZER() .
224In that case a
225.Fn VNET_SYSINIT
226based initialization function must be used.
227.Pp
228External variables have to be declared using the
229.Fn VNET_DECLARE
230macro.
231In either case the convention is to define another macro,
232that is then used throughout the implementation to access that variable.
233The variable name is usually prefixed by
234.Em V_
235to express that it is virtualized.
236The
237.Fn VNET
238macro will then translate accesses to that variable to the copy of the
239currently selected instance (see the
240.Sx "Virtual instance selection"
241section):
242.Pp
243.Dl Li #define	V_name	VNET(name)
244.Pp
245.Em NOTE:
246Do not confuse this with the convention used by
247.Xr VFS 9 .
248.Pp
249The
250.Fn VNET_NAME
251macro returns the offset within the memory region of the virtual network
252stack instance.
253It is usually only used with
254.Fn SYSCTL_VNET_*
255macros.
256.\" ------------------------------------------------------------
257.Ss "Virtual Instance Selection"
258.\"
259There are three different places where the current virtual
260network stack pointer is stored and can be taken from:
261.Bl -enum -offset indent
262.It
263a
264.Em prison :
265.Dl "(struct prison *)->pr_vnet"
266.Pp
267For convenience the following macros are provided:
268.Bd -literal -compact -offset indent
269.Fn CRED_TO_VNET "struct ucred *"
270.Fn TD_TO_VNET "struct thread *"
271.Fn P_TO_VNET "struct proc *"
272.Ed
273.It
274a
275.Em socket :
276.Dl "(struct socket *)->so_vnet"
277.It
278an
279.Em interface :
280.Dl "(struct ifnet *)->if_vnet"
281.El
282.Pp
283.\"
284In addition the currently active instance is cached in
285.Dq "curthread->td_vnet"
286which is usually only accessed through the
287.Dv curvnet
288macro.
289.Pp
290.\"
291To set the correct context of the current virtual network instance, use the
292.Fn CURVNET_SET
293or
294.Fn CURVNET_SET_QUIET
295macros.
296The
297.Fn CURVNET_SET_QUIET
298version will not record vnet recursions in case the kernel was compiled
299with
300.Cd "options VNET_DEBUG"
301and should thus only be used in well known cases, where recursion is
302unavoidable.
303Both macros will save the previous state on the stack and it must be restored
304with the
305.Fn CURVNET_RESTORE
306macro.
307.Pp
308.Em NOTE:
309As the previous state is saved on the stack, you cannot have multiple
310.Fn CURVNET_SET
311calls in the same block.
312.Pp
313.Em NOTE:
314As the previous state is saved on the stack, a
315.Fn CURVNET_RESTORE
316call has to be in the same block as the
317.Fn CURVNET_SET
318call or in a subblock with the same idea of the saved instances as the
319outer block.
320.Pp
321.Em NOTE:
322As each macro is a set of operations and, as previously explained, cannot
323be put into its own block when defined, one cannot conditionally set
324the current vnet context.
325The following will
326.Em not
327work:
328.Bd -literal -offset indent
329if (condition)
330	CURVNET_SET(vnet);
331.Ed
332.Pp
333nor would this work:
334.Bd -literal -offset indent
335if (condition) {
336	CURVNET_SET(vnet);
337}
338CURVNET_RESTORE();
339.Ed
340.Pp
341.\"
342Sometimes one needs to loop over all virtual instances, for example to update
343virtual from global state, to run a function from a
344.Xr callout 9
345for each instance, etc.
346For those cases the
347.Fn VNET_ITERATOR_DECL
348and
349.Fn VNET_FOREACH
350macros are provided.
351The former macro defines the variable that iterates over the loop,
352and the latter loops over all of the virtual network stack instances.
353See
354.Sx "Locking"
355for how to savely traverse the list of all virtual instances.
356.Pp
357.\"
358The
359.Fn IS_DEFAULT_VNET
360macro provides a safe way to check whether the currently active instance is the
361unrestricted default network stack of the base system
362.Pq Vt vnet0 .
363.Pp
364.\"
365The
366.Fn VNET_ASSERT
367macro provides a way to conditionally add assertions that are only active with
368.Cd "options VIMAGE"
369compiled in and either
370.Cd "options VNET_DEBUG"
371or
372.Cd "options INVARIANTS"
373enabled as well.
374It uses the same semantics as
375.Xr KASSERT 9 .
376.\" ------------------------------------------------------------
377.Ss "Locking"
378.\"
379For public access to the list of virtual network stack instances
380e.g., by the
381.Fn VNET_FOREACH
382macro, read locks are provided.
383Macros are used to abstract from the actual type of the locks.
384If a caller may sleep while traversing the list, it must use the
385.Fn VNET_LIST_RLOCK
386and
387.Fn VNET_LIST_RUNLOCK
388macros.
389Otherwise, the caller can use
390.Fn VNET_LIST_RLOCK_NOSLEEP
391and
392.Fn VNET_LIST_RUNLOCK_NOSLEEP .
393.\" ------------------------------------------------------------
394.Ss "Startup and Teardown Functions"
395.\"
396To start or tear down a virtual network stack instance the internal
397functions
398.Fn vnet_alloc
399and
400.Fn vnet_destroy
401are provided and called from the jail framework.
402They run the publicly provided methods to handle network stack
403startup and teardown.
404.Pp
405For public control, the system startup interface has been enhanced
406to not only handle a system boot but to also handle a virtual
407network stack startup and teardown.
408To the base system the
409.Fn VNET_SYSINIT
410and
411.Fn VNET_SYSUNINIT
412macros look exactly as if there were no virtual network stack.
413In fact, if
414.Cd "options VIMAGE"
415is not compiled in they are compiled to the standard
416.Fn SYSINIT
417macros.
418In addition to that they are run for each virtual network stack
419when starting or, in reverse order, when shutting down.
420.\" ------------------------------------------------------------
421.Ss "Eventhandlers"
422.\"
423Eventhandlers can be handled in two ways:
424.Pp
425.Bl -enum -offset indent -compact
426.It
427save the
428.Em tags
429returned in each virtual instance and properly free the eventhandlers
430on teardown using those, or
431.It
432use one eventhandler that will iterate over all virtual network
433stack instances.
434.El
435.Pp
436For the first case one can just use the normal
437.Xr EVENTHANDLER 9
438functions, while for the second case the
439.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER
440and
441.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG
442macros are provided.
443These differ in that
444.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG
445takes an extra first argument that will carry the
446.Fa "tag"
447upon return.
448Eventhandlers registered with either of these will not run
449.Fa func
450directly but
451.Fa func
452will be called from an internal iterator function for each vnet.
453Both macros can only be used for eventhandlers that do not take
454additional arguments, as the variadic arguments from an
455.Xr EVENTHANDLER_INVOKE 9
456call will be ignored.
457.\" ------------------------------------------------------------
458.Ss "Sysctl Handling"
459.\"
460A
461.Xr sysctl 9
462can be virtualized by using one of the
463.Fn SYSCTL_VNET_*
464macros.
465.Pp
466They take the same arguments as the standard
467.Xr sysctl 9
468functions, with the only difference, that the
469.Fa ptr
470argument has to be passed as
471.Ql &VNET_NAME(foo)
472instead of
473.Ql &foo
474so that the variable can be selected from the correct memory
475region of the virtual network stack instance of the caller.
476.Pp
477For the very rare case a sysctl handler function would want to
478handle
479.Fa arg1
480itself the
481.Fn VNET_SYSCTL_ARG req arg1
482is provided that will translate the
483.Fa arg1
484argument to the correct memory address in the virtual network stack
485context of the caller.
486.\" ------------------------------------------------------------
487.Sh SEE ALSO
488.Xr jail 2 ,
489.Xr kvm 3 ,
490.Xr EVENTHANDLER 9 ,
491.\" .Xr pcpu 9 ,
492.Xr KASSERT 9 ,
493.Xr sysctl 9
494.\" .Xr SYSINIT 9
495.Pp
496Marko Zec, Implementing a Clonable Network Stack in the FreeBSD Kernel,
497USENIX ATC'03, June 2003, Boston
498.Sh HISTORY
499The virtual network stack implementation first appeared in
500.Fx 8.0 .
501.Sh AUTHORS
502.An -nosplit
503The
504.Nm
505framework was designed and implemented at the University of Zagreb by
506.An Marko Zec
507under sponsorship of the FreeBSD Foundation and NLnet Foundation,
508and later extended and refined by
509.An Bjoern A. Zeeb
510(also under FreeBSD Foundation sponsorship), and
511.An Robert Watson .
512.Pp
513This manual page was written by
514.An Bjoern A. Zeeb, CK Software GmbH,
515under sponsorship from the FreeBSD Foundation.
516