xref: /openbsd/usr.sbin/vmd/priv.c (revision 6f40fd34)
1 /*	$OpenBSD: priv.c,v 1.9 2017/05/04 08:26:06 reyk Exp $	*/
2 
3 /*
4  * Copyright (c) 2016 Reyk Floeter <reyk@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/param.h>	/* nitems */
20 #include <sys/queue.h>
21 #include <sys/stat.h>
22 #include <sys/socket.h>
23 #include <sys/un.h>
24 #include <sys/ioctl.h>
25 #include <sys/tree.h>
26 
27 #include <net/if.h>
28 #include <netinet/in.h>
29 #include <netinet/if_ether.h>
30 #include <net/if_bridge.h>
31 
32 #include <arpa/inet.h>
33 
34 #include <errno.h>
35 #include <event.h>
36 #include <fcntl.h>
37 #include <stdlib.h>
38 #include <stdio.h>
39 #include <string.h>
40 #include <unistd.h>
41 #include <signal.h>
42 #include <ctype.h>
43 
44 #include "proc.h"
45 #include "vmd.h"
46 
47 int	 priv_dispatch_parent(int, struct privsep_proc *, struct imsg *);
48 void	 priv_run(struct privsep *, struct privsep_proc *, void *);
49 
50 static struct privsep_proc procs[] = {
51 	{ "parent",	PROC_PARENT,	priv_dispatch_parent }
52 };
53 
54 void
55 priv(struct privsep *ps, struct privsep_proc *p)
56 {
57 	proc_run(ps, p, procs, nitems(procs), priv_run, NULL);
58 }
59 
60 void
61 priv_run(struct privsep *ps, struct privsep_proc *p, void *arg)
62 {
63 	struct vmd		*env = ps->ps_env;
64 
65 	/*
66 	 * no pledge(2) in the "priv" process:
67 	 * write ioctls are not permitted by pledge.
68 	 */
69 
70 	/* Open our own socket for generic interface ioctls */
71 	if ((env->vmd_fd = socket(AF_INET, SOCK_DGRAM, 0)) == -1)
72 		fatal("socket");
73 }
74 
75 int
76 priv_dispatch_parent(int fd, struct privsep_proc *p, struct imsg *imsg)
77 {
78 	const char		*desct[] = { "tap", "switch", "bridge", NULL };
79 	struct privsep		*ps = p->p_ps;
80 	struct vmop_ifreq	 vfr;
81 	struct vmd		*env = ps->ps_env;
82 	struct ifreq		 ifr;
83 	struct ifbreq		 ifbr;
84 	struct ifgroupreq	 ifgr;
85 	struct ifaliasreq	 ifra;
86 	char			 type[IF_NAMESIZE];
87 
88 	switch (imsg->hdr.type) {
89 	case IMSG_VMDOP_PRIV_IFDESCR:
90 	case IMSG_VMDOP_PRIV_IFCREATE:
91 	case IMSG_VMDOP_PRIV_IFRDOMAIN:
92 	case IMSG_VMDOP_PRIV_IFADD:
93 	case IMSG_VMDOP_PRIV_IFUP:
94 	case IMSG_VMDOP_PRIV_IFDOWN:
95 	case IMSG_VMDOP_PRIV_IFGROUP:
96 	case IMSG_VMDOP_PRIV_IFADDR:
97 		IMSG_SIZE_CHECK(imsg, &vfr);
98 		memcpy(&vfr, imsg->data, sizeof(vfr));
99 
100 		/* We should not get malicious requests from the parent */
101 		if (priv_getiftype(vfr.vfr_name, type, NULL) == -1 ||
102 		    priv_findname(type, desct) == -1)
103 			fatalx("%s: rejected priv operation on interface: %s",
104 			    __func__, vfr.vfr_name);
105 		break;
106 	case IMSG_VMDOP_CONFIG:
107 	case IMSG_CTL_RESET:
108 		break;
109 	default:
110 		return (-1);
111 	}
112 
113 	switch (imsg->hdr.type) {
114 	case IMSG_VMDOP_PRIV_IFDESCR:
115 		/* Set the interface description */
116 		strlcpy(ifr.ifr_name, vfr.vfr_name, sizeof(ifr.ifr_name));
117 		ifr.ifr_data = (caddr_t)vfr.vfr_value;
118 		if (ioctl(env->vmd_fd, SIOCSIFDESCR, &ifr) < 0)
119 			log_warn("SIOCSIFDESCR");
120 		break;
121 	case IMSG_VMDOP_PRIV_IFCREATE:
122 		/* Create the bridge if it doesn't exist */
123 		strlcpy(ifr.ifr_name, vfr.vfr_name, sizeof(ifr.ifr_name));
124 		if (ioctl(env->vmd_fd, SIOCIFCREATE, &ifr) < 0 &&
125 		    errno != EEXIST)
126 			log_warn("SIOCIFCREATE");
127 		break;
128 	case IMSG_VMDOP_PRIV_IFRDOMAIN:
129 		strlcpy(ifr.ifr_name, vfr.vfr_name, sizeof(ifr.ifr_name));
130 		ifr.ifr_rdomainid = vfr.vfr_id;
131 		if (ioctl(env->vmd_fd, SIOCSIFRDOMAIN, &ifr) < 0)
132 			log_warn("SIOCSIFRDOMAIN");
133 		break;
134 	case IMSG_VMDOP_PRIV_IFADD:
135 		if (priv_getiftype(vfr.vfr_value, type, NULL) == -1)
136 			fatalx("%s: rejected to add interface: %s",
137 			    __func__, vfr.vfr_value);
138 
139 		/* Attach the device to the bridge */
140 		strlcpy(ifbr.ifbr_name, vfr.vfr_name,
141 		    sizeof(ifbr.ifbr_name));
142 		strlcpy(ifbr.ifbr_ifsname, vfr.vfr_value,
143 		    sizeof(ifbr.ifbr_ifsname));
144 		if (ioctl(env->vmd_fd, SIOCBRDGADD, &ifbr) < 0 &&
145 		    errno != EEXIST)
146 			log_warn("SIOCBRDGADD");
147 		break;
148 	case IMSG_VMDOP_PRIV_IFUP:
149 	case IMSG_VMDOP_PRIV_IFDOWN:
150 		/* Set the interface status */
151 		strlcpy(ifr.ifr_name, vfr.vfr_name, sizeof(ifr.ifr_name));
152 		if (ioctl(env->vmd_fd, SIOCGIFFLAGS, &ifr) < 0) {
153 			log_warn("SIOCGIFFLAGS");
154 			break;
155 		}
156 		if (imsg->hdr.type == IMSG_VMDOP_PRIV_IFUP)
157 			ifr.ifr_flags |= IFF_UP;
158 		else
159 			ifr.ifr_flags &= ~IFF_UP;
160 		if (ioctl(env->vmd_fd, SIOCSIFFLAGS, &ifr) < 0)
161 			log_warn("SIOCSIFFLAGS");
162 		break;
163 	case IMSG_VMDOP_PRIV_IFGROUP:
164 		if (priv_validgroup(vfr.vfr_value) == -1)
165 			fatalx("%s: invalid group name", __func__);
166 
167 		if (strlcpy(ifgr.ifgr_name, vfr.vfr_name,
168 		    sizeof(ifgr.ifgr_name)) >= sizeof(ifgr.ifgr_name) ||
169 		    strlcpy(ifgr.ifgr_group, vfr.vfr_value,
170 		    sizeof(ifgr.ifgr_group)) >= sizeof(ifgr.ifgr_group))
171 			fatalx("%s: group name too long", __func__);
172 
173 		if (ioctl(env->vmd_fd, SIOCAIFGROUP, &ifgr) < 0 &&
174 		    errno != EEXIST)
175 			log_warn("SIOCAIFGROUP");
176 		break;
177 	case IMSG_VMDOP_PRIV_IFADDR:
178 		memset(&ifra, 0, sizeof(ifra));
179 
180 		/* Set the interface address */
181 		strlcpy(ifra.ifra_name, vfr.vfr_name, sizeof(ifra.ifra_name));
182 
183 		memcpy(&ifra.ifra_addr, &vfr.vfr_ifra.ifra_addr,
184 		    sizeof(ifra.ifra_addr));
185 		ifra.ifra_addr.sa_family = AF_INET;
186 		ifra.ifra_addr.sa_len = sizeof(struct sockaddr_in);
187 
188 		memcpy(&ifra.ifra_mask, &vfr.vfr_ifra.ifra_mask,
189 		    sizeof(ifra.ifra_mask));
190 		ifra.ifra_mask.sa_family = AF_INET;
191 		ifra.ifra_mask.sa_len = sizeof(struct sockaddr_in);
192 
193 		if (ioctl(env->vmd_fd, SIOCAIFADDR, &ifra) < 0)
194 			log_warn("SIOCAIFADDR");
195 		break;
196 	case IMSG_VMDOP_CONFIG:
197 		config_getconfig(env, imsg);
198 		break;
199 	case IMSG_CTL_RESET:
200 		config_getreset(env, imsg);
201 		break;
202 	default:
203 		return (-1);
204 	}
205 
206 	return (0);
207 }
208 
209 int
210 priv_getiftype(char *ifname, char *type, unsigned int *unitptr)
211 {
212 	const char	*errstr;
213 	size_t		 span;
214 	unsigned int	 unit;
215 
216 	/* Extract the name part */
217 	span = strcspn(ifname, "0123456789");
218 	if (span == 0 || span >= strlen(ifname) || span >= (IF_NAMESIZE - 1))
219 		return (-1);
220 	memcpy(type, ifname, span);
221 	type[span] = 0;
222 
223 	/* Now parse the unit (we don't strictly validate the format here) */
224 	unit = strtonum(ifname + span, 0, UINT_MAX, &errstr);
225 	if (errstr != NULL)
226 		return (-1);
227 	if (unitptr != NULL)
228 		*unitptr = unit;
229 
230 	return (0);
231 }
232 
233 int
234 priv_findname(const char *name, const char **names)
235 {
236 	unsigned int	 i;
237 
238 	for (i = 0; names[i] != NULL; i++) {
239 		if (strcmp(name, names[i]) == 0)
240 			return (0);
241 	}
242 
243 	return (-1);
244 }
245 
246 int
247 priv_validgroup(const char *name)
248 {
249 	if (strlen(name) >= IF_NAMESIZE)
250 		return (-1);
251 	/* Group can not end with a digit */
252 	if (name[0] && isdigit(name[strlen(name) - 1]))
253 		return (-1);
254 	return (0);
255 }
256 
257 /*
258  * Called from the process peer
259  */
260 
261 int
262 vm_priv_ifconfig(struct privsep *ps, struct vmd_vm *vm)
263 {
264 	struct vmd		*env = ps->ps_env;
265 	struct vm_create_params	*vcp = &vm->vm_params.vmc_params;
266 	struct vmd_if		*vif;
267 	struct vmd_switch	*vsw;
268 	unsigned int		 i;
269 	struct vmop_ifreq	 vfr, vfbr;
270 	struct sockaddr_in	*sin4;
271 
272 	for (i = 0; i < VMM_MAX_NICS_PER_VM; i++) {
273 		vif = &vm->vm_ifs[i];
274 
275 		if (vif->vif_name == NULL)
276 			break;
277 
278 		if (strlcpy(vfr.vfr_name, vif->vif_name,
279 		    sizeof(vfr.vfr_name)) >= sizeof(vfr.vfr_name))
280 			return (-1);
281 
282 		/* Set non-default rdomain */
283 		if (vif->vif_flags & VMIFF_RDOMAIN) {
284 			vfr.vfr_id = vif->vif_rdomain;
285 
286 			log_debug("%s: interface %s rdomain %u", __func__,
287 			    vfr.vfr_name, vfr.vfr_id);
288 
289 			proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFRDOMAIN,
290 			    &vfr, sizeof(vfr));
291 		}
292 
293 		/* Description can be truncated */
294 		(void)snprintf(vfr.vfr_value, sizeof(vfr.vfr_value),
295 		    "vm%u-if%u-%s", vm->vm_vmid, i, vcp->vcp_name);
296 
297 		log_debug("%s: interface %s description %s", __func__,
298 		    vfr.vfr_name, vfr.vfr_value);
299 
300 		proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFDESCR,
301 		    &vfr, sizeof(vfr));
302 
303 		/* Add interface to bridge/switch */
304 		if ((vsw = switch_getbyname(vif->vif_switch)) != NULL) {
305 			memset(&vfbr, 0, sizeof(vfbr));
306 
307 			if (strlcpy(vfbr.vfr_name, vsw->sw_ifname,
308 			    sizeof(vfbr.vfr_name)) >= sizeof(vfbr.vfr_name))
309 				return (-1);
310 			if (strlcpy(vfbr.vfr_value, vif->vif_name,
311 			    sizeof(vfbr.vfr_value)) >= sizeof(vfbr.vfr_value))
312 				return (-1);
313 			vfbr.vfr_id = vsw->sw_rdomain;
314 
315 			log_debug("%s: interface %s add %s", __func__,
316 			    vfbr.vfr_name, vfbr.vfr_value);
317 
318 			proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFCREATE,
319 			    &vfbr, sizeof(vfbr));
320 			if (vsw->sw_flags & VMIFF_RDOMAIN)
321 				proc_compose(ps,
322 				    PROC_PRIV, IMSG_VMDOP_PRIV_IFRDOMAIN,
323 				    &vfbr, sizeof(vfbr));
324 			proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFADD,
325 			    &vfbr, sizeof(vfbr));
326 		} else if (vif->vif_switch != NULL)
327 			log_warnx("switch %s not found", vif->vif_switch);
328 
329 		/* First group is defined per-interface */
330 		if (vif->vif_group) {
331 			if (strlcpy(vfr.vfr_value, vif->vif_group,
332 			    sizeof(vfr.vfr_value)) >= sizeof(vfr.vfr_value))
333 				return (-1);
334 
335 			log_debug("%s: interface %s group %s", __func__,
336 			    vfr.vfr_name, vfr.vfr_value);
337 
338 			proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFGROUP,
339 			    &vfr, sizeof(vfr));
340 		}
341 
342 		/* The second group is defined per-switch */
343 		if (vsw != NULL && vsw->sw_group != NULL) {
344 			if (strlcpy(vfr.vfr_value, vsw->sw_group,
345 			    sizeof(vfr.vfr_value)) >= sizeof(vfr.vfr_value))
346 				return (-1);
347 
348 			log_debug("%s: interface %s group %s switch %s",
349 			    __func__, vfr.vfr_name, vfr.vfr_value,
350 			    vsw->sw_name);
351 
352 			proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFGROUP,
353 			    &vfr, sizeof(vfr));
354 		}
355 
356 		/* Set the new interface status to up or down */
357 		proc_compose(ps, PROC_PRIV, (vif->vif_flags & VMIFF_UP) ?
358 		    IMSG_VMDOP_PRIV_IFUP : IMSG_VMDOP_PRIV_IFDOWN,
359 		    &vfr, sizeof(vfr));
360 
361 		if (vm->vm_params.vmc_ifflags[i] & VMIFF_LOCAL) {
362 			sin4 = (struct sockaddr_in *)&vfr.vfr_ifra.ifra_mask;
363 			sin4->sin_family = AF_INET;
364 			sin4->sin_len = sizeof(*sin4);
365 			sin4->sin_addr.s_addr = htonl(0xfffffffe);
366 
367 			sin4 = (struct sockaddr_in *)&vfr.vfr_ifra.ifra_addr;
368 			sin4->sin_family = AF_INET;
369 			sin4->sin_len = sizeof(*sin4);
370 			if ((sin4->sin_addr.s_addr =
371 			    vm_priv_addr(&env->vmd_cfg.cfg_localprefix,
372 			    vm->vm_vmid, i, 0)) == 0)
373 				return (-1);
374 
375 			log_debug("%s: interface %s address %s/31",
376 			    __func__, vfr.vfr_name,
377 			    inet_ntoa(sin4->sin_addr));
378 
379 			proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFADDR,
380 			    &vfr, sizeof(vfr));
381 		}
382 	}
383 
384 	return (0);
385 }
386 
387 int
388 vm_priv_brconfig(struct privsep *ps, struct vmd_switch *vsw)
389 {
390 	struct vmd_if		*vif;
391 	struct vmop_ifreq	 vfr;
392 
393 	memset(&vfr, 0, sizeof(vfr));
394 
395 	if (strlcpy(vfr.vfr_name, vsw->sw_ifname,
396 	    sizeof(vfr.vfr_name)) >= sizeof(vfr.vfr_name))
397 		return (-1);
398 	vfr.vfr_id = vsw->sw_rdomain;
399 
400 	proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFCREATE,
401 	    &vfr, sizeof(vfr));
402 
403 	/* Set non-default rdomain */
404 	if (vsw->sw_flags & VMIFF_RDOMAIN)
405 		proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFRDOMAIN,
406 		    &vfr, sizeof(vfr));
407 
408 	/* Description can be truncated */
409 	(void)snprintf(vfr.vfr_value, sizeof(vfr.vfr_value),
410 	    "switch%u-%s", vsw->sw_id, vsw->sw_name);
411 
412 	log_debug("%s: interface %s description %s", __func__,
413 	    vfr.vfr_name, vfr.vfr_value);
414 
415 	proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFDESCR,
416 	    &vfr, sizeof(vfr));
417 
418 	TAILQ_FOREACH(vif, &vsw->sw_ifs, vif_entry) {
419 		if (strlcpy(vfr.vfr_value, vif->vif_name,
420 		    sizeof(vfr.vfr_value)) >= sizeof(vfr.vfr_value))
421 			return (-1);
422 
423 		log_debug("%s: interface %s add %s", __func__,
424 		    vfr.vfr_name, vfr.vfr_value);
425 
426 		proc_compose(ps, PROC_PRIV, IMSG_VMDOP_PRIV_IFADD,
427 		    &vfr, sizeof(vfr));
428 	}
429 
430 	/* Set the new interface status to up or down */
431 	proc_compose(ps, PROC_PRIV, (vsw->sw_flags & VMIFF_UP) ?
432 	    IMSG_VMDOP_PRIV_IFUP : IMSG_VMDOP_PRIV_IFDOWN,
433 	    &vfr, sizeof(vfr));
434 
435 	vsw->sw_running = 1;
436 	return (0);
437 }
438 
439 uint32_t
440 vm_priv_addr(struct address *h, uint32_t vmid, int idx, int isvm)
441 {
442 	in_addr_t		prefix, mask, addr;
443 
444 	/*
445 	 * 1. Set the address prefix and mask, 100.64.0.0/10 by default.
446 	 */
447 	if (h->ss.ss_family != AF_INET ||
448 	    h->prefixlen < 0 || h->prefixlen > 32)
449 		fatal("local prefix");
450 	prefix = ss2sin(&h->ss)->sin_addr.s_addr;
451 	mask = prefixlen2mask(h->prefixlen);
452 
453 	/* 2. Encode the VM ID as a per-VM subnet range N, 10.64.N.0/24. */
454 	addr = vmid << 8;
455 
456 	/*
457 	 * 3. Assign a /31 subnet M per VM interface, 10.64.N.M/31.
458 	 * Each subnet contains exactly two IP addresses; skip the
459 	 * first subnet to avoid a gateway address ending with .0.
460 	 */
461 	addr |= (idx + 1) * 2;
462 
463 	/* 4. Use the first address for the gateway, the second for the VM. */
464 	if (isvm)
465 		addr++;
466 
467 	/* 5. Convert to network byte order and add the prefix. */
468 	addr = htonl(addr) | prefix;
469 
470 	/*
471 	 * Validate the results:
472 	 * - the address should not exceed the prefix (eg. VM ID to high).
473 	 * - up to 126 interfaces can be encoded per VM.
474 	 */
475 	if (prefix != (addr & mask) || idx >= 0x7f) {
476 		log_warnx("%s: dhcp address range exceeded,"
477 		    " vm id %u interface %d", __func__, vmid, idx);
478 		return (0);
479 	}
480 
481 	return (addr);
482 }
483