xref: /freebsd/share/man/man9/vnet.9 (revision 271171e0)
1.\"-
2.\" Copyright (c) 2010 The FreeBSD Foundation
3.\"
4.\" This documentation was written by CK Software GmbH under sponsorship from
5.\" the FreeBSD Foundation.
6.\"
7.\" Redistribution and use in source and binary forms, with or without
8.\" modification, are permitted provided that the following conditions
9.\" are met:
10.\" 1. Redistributions of source code must retain the above copyright
11.\"    notice, this list of conditions and the following disclaimer.
12.\" 2. Redistributions in binary form must reproduce the above copyright
13.\"    notice, this list of conditions and the following disclaimer in the
14.\"    documentation and/or other materials provided with the distribution.
15.\"
16.\" THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26.\" SUCH DAMAGE.
27.\"
28.\" $FreeBSD$
29.\"
30.Dd December 10, 2020
31.Dt VNET 9
32.Os
33.Sh NAME
34.Nm VNET
35.Nd "network subsystem virtualization infrastructure"
36.Sh SYNOPSIS
37.Cd "options VIMAGE"
38.Cd "options VNET_DEBUG"
39.Pp
40.In net/vnet.h
41.\"------------------------------------------------------------
42.Ss "Constants and Global Variables"
43.\"
44.Dv VNET_SETNAME
45.\"	"set_vnet"
46.Dv VNET_SYMPREFIX
47.\"	"vnet_entry_"
48.Vt extern struct vnet *vnet0;
49.\"------------------------------------------------------------
50.Ss "Variable Declaration"
51.Fo VNET
52.Fa "name"
53.Fc
54.\"
55.Fo VNET_NAME
56.Fa "name"
57.Fc
58.\"
59.Fo VNET_DECLARE
60.Fa "type" "name"
61.Fc
62.\"
63.Fo VNET_DEFINE
64.Fa "type" "name"
65.Fc
66.\"
67.Fo VNET_DEFINE_STATIC
68.Fa "type" "name"
69.Fc
70.\"
71.Bd -literal
72#define	V_name	VNET(name)
73.Ed
74.\" ------------------------------------------------------------
75.Ss "Virtual Instance Selection"
76.\"
77.Fo CRED_TO_VNET
78.Fa "struct ucred *"
79.Fc
80.\"
81.Fo TD_TO_VNET
82.Fa "struct thread *"
83.Fc
84.\"
85.Fo P_TO_VNET
86.Fa "struct proc *"
87.Fc
88.\"
89.Fo IS_DEFAULT_VNET
90.Fa "struct vnet *"
91.Fc
92.\"
93.Fo VNET_ASSERT
94.Fa exp msg
95.Fc
96.\"
97.Fo CURVNET_SET
98.Fa "struct vnet *"
99.Fc
100.\"
101.Fo CURVNET_SET_QUIET
102.Fa "struct vnet *"
103.Fc
104.\"
105.Fn CURVNET_RESTORE
106.\"
107.Fo VNET_ITERATOR_DECL
108.Fa "struct vnet *"
109.Fc
110.\"
111.Fo VNET_FOREACH
112.Fa "struct vnet *"
113.Fc
114.\" ------------------------------------------------------------
115.Ss "Locking"
116.\"
117.Fn VNET_LIST_RLOCK
118.Fn VNET_LIST_RUNLOCK
119.Fn VNET_LIST_RLOCK_NOSLEEP
120.Fn VNET_LIST_RUNLOCK_NOSLEEP
121.\" ------------------------------------------------------------
122.Ss "Startup and Teardown Functions"
123.\"
124.Ft "struct vnet *"
125.Fo vnet_alloc
126.Fa void
127.Fc
128.\"
129.Ft void
130.Fo vnet_destroy
131.Fa "struct vnet *"
132.Fc
133.\"
134.Fo VNET_SYSINIT
135.Fa ident
136.Fa "enum sysinit_sub_id subsystem"
137.Fa "enum sysinit_elem_order order"
138.Fa "sysinit_cfunc_t func"
139.Fa "const void *arg"
140.Fc
141.\"
142.Fo VNET_SYSUNINIT
143.Fa ident
144.Fa "enum sysinit_sub_id subsystem"
145.Fa "enum sysinit_elem_order order"
146.Fa "sysinit_cfunc_t func"
147.Fa "const void *arg"
148.Fc
149.\" ------------------------------------------------------------
150.Ss "Eventhandlers"
151.\"
152.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER
153.Fa "const char *name"
154.Fa "void *func"
155.Fa "void *arg"
156.Fa "int priority"
157.Fc
158.\"
159.Fo VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG
160.Fa "eventhandler_tag tag"
161.Fa "const char *name"
162.Fa "void *func"
163.Fa "void *arg"
164.Fa "int priority"
165.Fc
166.\" ------------------------------------------------------------
167.Ss "Sysctl Handling"
168.Fo SYSCTL_VNET_INT
169.Fa parent nbr name access ptr val descr
170.Fc
171.Fo SYSCTL_VNET_PROC
172.Fa parent nbr name access ptr arg handler fmt descr
173.Fc
174.Fo SYSCTL_VNET_STRING
175.Fa parent nbr name access arg len descr
176.Fc
177.Fo SYSCTL_VNET_STRUCT
178.Fa parent nbr name access ptr type descr
179.Fc
180.Fo SYSCTL_VNET_UINT
181.Fa parent nbr name access ptr val descr
182.Fc
183.Fo VNET_SYSCTL_ARG
184.Fa req arg1
185.Fc
186.\" ------------------------------------------------------------
187.Sh DESCRIPTION
188.Nm
189is the name of a technique to virtualize the network stack.
190The basic idea is to change global resources most notably variables into
191per network stack resources and have functions, sysctls, eventhandlers,
192etc. access and handle them in the context of the correct instance.
193Each (virtual) network stack is attached to a
194.Em prison ,
195with
196.Vt vnet0
197being the unrestricted default network stack of the base system.
198.Pp
199The global defines for
200.Dv VNET_SETNAME
201and
202.Dv VNET_SYMPREFIX
203are shared with
204.Xr kvm 3
205to access internals for debugging reasons.
206.\" ------------------------------------------------------------
207.Ss "Variable Declaration"
208.\"
209Variables are virtualized by using the
210.Fn VNET_DEFINE
211macro rather than writing them out as
212.Em type name .
213One can still use static initialization, e.g.,
214.Pp
215.Dl Li VNET_DEFINE(int, foo) = 1;
216.Pp
217Variables declared with the static keyword can use the
218.Fn VNET_DEFINE_STATIC
219macro, e.g.,
220.Pp
221.Dl Li VNET_DEFINE_STATIC(SLIST_HEAD(, bar), bars);
222.Pp
223Static initialization is not possible when the virtualized variable
224would need to be referenced, e.g., with
225.Dq TAILQ_HEAD_INITIALIZER() .
226In that case a
227.Fn VNET_SYSINIT
228based initialization function must be used.
229.Pp
230External variables have to be declared using the
231.Fn VNET_DECLARE
232macro.
233In either case the convention is to define another macro,
234that is then used throughout the implementation to access that variable.
235The variable name is usually prefixed by
236.Em V_
237to express that it is virtualized.
238The
239.Fn VNET
240macro will then translate accesses to that variable to the copy of the
241currently selected instance (see the
242.Sx "Virtual instance selection"
243section):
244.Pp
245.Dl Li #define	V_name	VNET(name)
246.Pp
247.Em NOTE:
248Do not confuse this with the convention used by
249.Xr VFS 9 .
250.Pp
251The
252.Fn VNET_NAME
253macro returns the offset within the memory region of the virtual network
254stack instance.
255It is usually only used with
256.Fn SYSCTL_VNET_*
257macros.
258.\" ------------------------------------------------------------
259.Ss "Virtual Instance Selection"
260.\"
261There are three different places where the current virtual
262network stack pointer is stored and can be taken from:
263.Bl -enum -offset indent
264.It
265a
266.Em prison :
267.Dl "(struct prison *)->pr_vnet"
268.Pp
269For convenience the following macros are provided:
270.Bd -literal -compact -offset indent
271.Fn CRED_TO_VNET "struct ucred *"
272.Fn TD_TO_VNET "struct thread *"
273.Fn P_TO_VNET "struct proc *"
274.Ed
275.It
276a
277.Em socket :
278.Dl "(struct socket *)->so_vnet"
279.It
280an
281.Em interface :
282.Dl "(struct ifnet *)->if_vnet"
283.El
284.Pp
285.\"
286In addition the currently active instance is cached in
287.Dq "curthread->td_vnet"
288which is usually only accessed through the
289.Dv curvnet
290macro.
291.Pp
292.\"
293To set the correct context of the current virtual network instance, use the
294.Fn CURVNET_SET
295or
296.Fn CURVNET_SET_QUIET
297macros.
298The
299.Fn CURVNET_SET_QUIET
300version will not record vnet recursions in case the kernel was compiled
301with
302.Cd "options VNET_DEBUG"
303and should thus only be used in well known cases, where recursion is
304unavoidable.
305Both macros will save the previous state on the stack and it must be restored
306with the
307.Fn CURVNET_RESTORE
308macro.
309.Pp
310.Em NOTE:
311As the previous state is saved on the stack, you cannot have multiple
312.Fn CURVNET_SET
313calls in the same block.
314.Pp
315.Em NOTE:
316As the previous state is saved on the stack, a
317.Fn CURVNET_RESTORE
318call has to be in the same block as the
319.Fn CURVNET_SET
320call or in a subblock with the same idea of the saved instances as the
321outer block.
322.Pp
323.Em NOTE:
324As each macro is a set of operations and, as previously explained, cannot
325be put into its own block when defined, one cannot conditionally set
326the current vnet context.
327The following will
328.Em not
329work:
330.Bd -literal -offset indent
331if (condition)
332	CURVNET_SET(vnet);
333.Ed
334.Pp
335nor would this work:
336.Bd -literal -offset indent
337if (condition) {
338	CURVNET_SET(vnet);
339}
340CURVNET_RESTORE();
341.Ed
342.Pp
343.\"
344Sometimes one needs to loop over all virtual instances, for example to update
345virtual from global state, to run a function from a
346.Xr callout 9
347for each instance, etc.
348For those cases the
349.Fn VNET_ITERATOR_DECL
350and
351.Fn VNET_FOREACH
352macros are provided.
353The former macro defines the variable that iterates over the loop,
354and the latter loops over all of the virtual network stack instances.
355See
356.Sx "Locking"
357for how to savely traverse the list of all virtual instances.
358.Pp
359.\"
360The
361.Fn IS_DEFAULT_VNET
362macro provides a safe way to check whether the currently active instance is the
363unrestricted default network stack of the base system
364.Pq Vt vnet0 .
365.Pp
366.\"
367The
368.Fn VNET_ASSERT
369macro provides a way to conditionally add assertions that are only active with
370.Cd "options VIMAGE"
371compiled in and either
372.Cd "options VNET_DEBUG"
373or
374.Cd "options INVARIANTS"
375enabled as well.
376It uses the same semantics as
377.Xr KASSERT 9 .
378.\" ------------------------------------------------------------
379.Ss "Locking"
380.\"
381For public access to the list of virtual network stack instances
382e.g., by the
383.Fn VNET_FOREACH
384macro, read locks are provided.
385Macros are used to abstract from the actual type of the locks.
386If a caller may sleep while traversing the list, it must use the
387.Fn VNET_LIST_RLOCK
388and
389.Fn VNET_LIST_RUNLOCK
390macros.
391Otherwise, the caller can use
392.Fn VNET_LIST_RLOCK_NOSLEEP
393and
394.Fn VNET_LIST_RUNLOCK_NOSLEEP .
395.\" ------------------------------------------------------------
396.Ss "Startup and Teardown Functions"
397.\"
398To start or tear down a virtual network stack instance the internal
399functions
400.Fn vnet_alloc
401and
402.Fn vnet_destroy
403are provided and called from the jail framework.
404They run the publicly provided methods to handle network stack
405startup and teardown.
406.Pp
407For public control, the system startup interface has been enhanced
408to not only handle a system boot but to also handle a virtual
409network stack startup and teardown.
410To the base system the
411.Fn VNET_SYSINIT
412and
413.Fn VNET_SYSUNINIT
414macros look exactly as if there were no virtual network stack.
415In fact, if
416.Cd "options VIMAGE"
417is not compiled in they are compiled to the standard
418.Fn SYSINIT
419macros.
420In addition to that they are run for each virtual network stack
421when starting or, in reverse order, when shutting down.
422.\" ------------------------------------------------------------
423.Ss "Eventhandlers"
424.\"
425Eventhandlers can be handled in two ways:
426.Pp
427.Bl -enum -offset indent -compact
428.It
429save the
430.Em tags
431returned in each virtual instance and properly free the eventhandlers
432on teardown using those, or
433.It
434use one eventhandler that will iterate over all virtual network
435stack instances.
436.El
437.Pp
438For the first case one can just use the normal
439.Xr EVENTHANDLER 9
440functions, while for the second case the
441.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER
442and
443.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG
444macros are provided.
445These differ in that
446.Fn VNET_GLOBAL_EVENTHANDLER_REGISTER_TAG
447takes an extra first argument that will carry the
448.Fa "tag"
449upon return.
450Eventhandlers registered with either of these will not run
451.Fa func
452directly but
453.Fa func
454will be called from an internal iterator function for each vnet.
455Both macros can only be used for eventhandlers that do not take
456additional arguments, as the variadic arguments from an
457.Xr EVENTHANDLER_INVOKE 9
458call will be ignored.
459.\" ------------------------------------------------------------
460.Ss "Sysctl Handling"
461.\"
462A
463.Xr sysctl 9
464can be virtualized by using one of the
465.Fn SYSCTL_VNET_*
466macros.
467.Pp
468They take the same arguments as the standard
469.Xr sysctl 9
470functions, with the only difference, that the
471.Fa ptr
472argument has to be passed as
473.Ql &VNET_NAME(foo)
474instead of
475.Ql &foo
476so that the variable can be selected from the correct memory
477region of the virtual network stack instance of the caller.
478.Pp
479For the very rare case a sysctl handler function would want to
480handle
481.Fa arg1
482itself the
483.Fn VNET_SYSCTL_ARG req arg1
484is provided that will translate the
485.Fa arg1
486argument to the correct memory address in the virtual network stack
487context of the caller.
488.\" ------------------------------------------------------------
489.Sh SEE ALSO
490.Xr jail 2 ,
491.Xr kvm 3 ,
492.Xr EVENTHANDLER 9 ,
493.\" .Xr pcpu 9 ,
494.Xr KASSERT 9 ,
495.Xr sysctl 9
496.\" .Xr SYSINIT 9
497.Pp
498Marko Zec, Implementing a Clonable Network Stack in the FreeBSD Kernel,
499USENIX ATC'03, June 2003, Boston
500.Sh HISTORY
501The virtual network stack implementation first appeared in
502.Fx 8.0 .
503.Sh AUTHORS
504.An -nosplit
505The
506.Nm
507framework was designed and implemented at the University of Zagreb by
508.An Marko Zec
509under sponsorship of the FreeBSD Foundation and NLnet Foundation,
510and later extended and refined by
511.An Bjoern A. Zeeb
512(also under FreeBSD Foundation sponsorship), and
513.An Robert Watson .
514.Pp
515This manual page was written by
516.An Bjoern A. Zeeb, CK Software GmbH,
517under sponsorship from the FreeBSD Foundation.
518