1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD: head/sys/dev/virtio/balloon/virtio_balloon.c 326255 2017-11-27 14:52:40Z pfg $
29  */
30 
31 /*
32  * Copyright (c) 2018 The DragonFly Project.  All rights reserved.
33  *
34  * This code is derived from software contributed to The DragonFly Project
35  * by Diederik de Groot <info@talon.nl>
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  *
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in
45  *    the documentation and/or other materials provided with the
46  *    distribution.
47  * 3. Neither the name of The DragonFly Project nor the names of its
48  *    contributors may be used to endorse or promote products derived
49  *    from this software without specific, prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
52  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
53  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
54  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
55  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
56  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
57  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
58  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
59  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
60  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
61  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  */
64 
65 /* Driver for VirtIO memory balloon devices. */
66 
67 #include <sys/cdefs.h>
68 #include <sys/param.h>
69 #include <sys/systm.h>
70 #include <sys/kernel.h>
71 #include <sys/endian.h>
72 #include <sys/kthread.h>
73 #include <sys/malloc.h>
74 #include <sys/module.h>
75 #include <sys/sglist.h>
76 #include <sys/sysctl.h>
77 #include <sys/lock.h>
78 #include <sys/queue.h>
79 
80 #include <vm/vm.h>
81 #include <vm/vm_page.h>
82 #include <sys/bus.h>
83 #include <sys/rman.h>
84 
85 #include <dev/virtual/virtio/virtio/virtio.h>
86 #include <dev/virtual/virtio/virtio/virtqueue.h>
87 #include <dev/virtual/virtio/balloon/virtio_balloon.h>
88 
89 struct vtballoon_softc {
90 	device_t		 vtballoon_dev;
91 	struct lwkt_serialize    vtballoon_slz;
92 	uint64_t		 vtballoon_features;
93 	uint32_t		 vtballoon_flags;
94 #define VTBALLOON_FLAG_DETACH	 0x01
95 
96 	struct virtqueue	*vtballoon_inflate_vq;
97 	struct virtqueue	*vtballoon_deflate_vq;
98 
99 	uint32_t		 vtballoon_desired_npages;
100 	uint32_t		 vtballoon_current_npages;
101 	TAILQ_HEAD(,vm_page)	 vtballoon_pages;
102 
103 	struct thread		*vtballoon_td;
104 	uint32_t		*vtballoon_page_frames;
105 	int			 vtballoon_pagereq;
106 	int			 vtballoon_timeout;
107 	int			 vtballoon_nintr;
108 	int			 vtballoon_debug;
109 #define VTBALLOON_INFO     	 0x01
110 #define VTBALLOON_ERROR    	 0x02
111 #define VTBALLOON_DEBUG    	 0x04
112 #define VTBALLOON_TRACE    	 0x08
113 
114 	struct virtqueue	*vtballoon_stats_vq;
115 	struct vtballoon_stat	 vtballoon_stats[VTBALLOON_S_NR];
116 	bool			 vtballoon_update_stats;
117 };
118 
119 static struct virtio_feature_desc vtballoon_feature_desc[] = {
120 	{ VIRTIO_BALLOON_F_MUST_TELL_HOST,	"MustTellHost"		},
121 	{ VIRTIO_BALLOON_F_STATS_VQ,		"StatsVq"		},
122 	{ VIRTIO_BALLOON_F_DEFLATE_ON_OOM,	"DeflateOnOutOfMemory"	},
123 	{ 0, NULL }
124 };
125 
126 #define vtballoon_dprintf(_sc, _level, _msg, _args ...) do {	    \
127 	if ((_sc)->vtballoon_debug & (_level))			  \
128 		device_printf((_sc)->vtballoon_dev, "%s:%d: "_msg,      \
129 		  __FUNCTION__, __LINE__, ##_args);		     \
130 } while (0)
131 
132 static int		vtballoon_probe(device_t);
133 static int		vtballoon_attach(device_t);
134 static int		vtballoon_detach(device_t);
135 
136 static int		vtballoon_alloc_intrs(struct vtballoon_softc *sc);
137 
138 static void		vtballoon_negotiate_features(struct vtballoon_softc *);
139 static int		vtballoon_alloc_virtqueues(struct vtballoon_softc *);
140 
141 static void 		vtballoon_config_change_intr(void *);
142 
143 static void		vtballoon_update_stats(struct vtballoon_softc *sc);
144 static void		vtballoon_stats_vq_intr(void *);
145 
146 static void		vtballoon_inflate_vq_intr(void *);
147 static void		vtballoon_deflate_vq_intr(void *);
148 static void		vtballoon_inflate(struct vtballoon_softc *, int);
149 static void		vtballoon_deflate(struct vtballoon_softc *, int);
150 
151 static void		vtballoon_send_page_frames(struct vtballoon_softc *,
152 			    struct virtqueue *, int);
153 
154 static void		vtballoon_pop(struct vtballoon_softc *);
155 static void		vtballoon_stop(struct vtballoon_softc *);
156 
157 static vm_page_t	vtballoon_alloc_page(struct vtballoon_softc *);
158 static void		vtballoon_free_page(struct vtballoon_softc *, vm_page_t);
159 
160 static int		vtballoon_sleep(struct vtballoon_softc *);
161 static void		vtballoon_thread(void *);
162 static void		vtballoon_get_tunables(struct vtballoon_softc *);
163 static void		vtballoon_add_sysctl(struct vtballoon_softc *);
164 
165 /*
166  * Features desired/implemented by this driver.
167  * VIRTIO_BALLOON_F_STATS_VQ | VIRTIO_BALLOON_F_MUST_TELL_HOST
168  */
169 #define VTBALLOON_FEATURES		VIRTIO_BALLOON_F_STATS_VQ
170 
171 /* Timeout between retries when the balloon needs inflating. */
172 #define VTBALLOON_LOWMEM_TIMEOUT	hz * 100
173 
174 /* vm_page_alloc flags */
175 #define VTBALLOON_REGULAR_ALLOC		VM_ALLOC_NORMAL
176 #define VTBALLOON_LOWMEM_ALLOC		VM_ALLOC_SYSTEM
177 
178 /*
179  * Maximum number of pages we'll request to inflate or deflate
180  * the balloon in one virtqueue request. Both Linux and NetBSD
181  * have settled on 256, doing up to 1MB at a time.
182  */
183 #define VTBALLOON_PAGES_PER_REQUEST	256
184 
185 /*
186  * Default Debug Level
187  * VTBALLOON_INFO | VTBALLOON_ERROR | VTBALLOON_DEBUG | VTBALLOON_TRACE
188  */
189 #define VTBALLOON_DEFAULT_DEBUG_LEVEL   VTBALLOON_INFO | VTBALLOON_ERROR
190 
191 /*
192  * Maximum number of interrupts to request
193  */
194 #define VTBALLOON_MAX_INTERRUPTS	4
195 
196 /* Must be able to fix all pages frames in one page (segment). */
197 CTASSERT(VTBALLOON_PAGES_PER_REQUEST * sizeof(uint32_t) <= PAGE_SIZE);
198 
199 #define VTBALLOON_SLZ(_sc)		&(_sc)->vtballoon_slz
200 #define VTBALLOON_ENTER_SLZ(_sc)	lwkt_serialize_enter(VTBALLOON_SLZ(sc));
201 #define VTBALLOON_EXIT_SLZ(_sc)		lwkt_serialize_exit(VTBALLOON_SLZ(sc));
202 
203 static device_method_t vtballoon_methods[] = {
204 	/* Device methods. */
205 	DEVMETHOD(device_probe,		vtballoon_probe),
206 	DEVMETHOD(device_attach,	vtballoon_attach),
207 	DEVMETHOD(device_detach,	vtballoon_detach),
208 
209 	DEVMETHOD_END
210 };
211 
212 static driver_t vtballoon_driver = {
213 	"vtballoon",
214 	vtballoon_methods,
215 	sizeof(struct vtballoon_softc)
216 };
217 static devclass_t vtballoon_devclass;
218 
219 DRIVER_MODULE(virtio_balloon, virtio_pci, vtballoon_driver,
220     vtballoon_devclass, NULL, NULL);
221 MODULE_VERSION(virtio_balloon, 1);
222 MODULE_DEPEND(virtio_balloon, virtio, 1, 1, 1);
223 
224 static int
vtballoon_probe(device_t dev)225 vtballoon_probe(device_t dev)
226 {
227 	struct vtballoon_softc *sc = device_get_softc(dev);
228 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "\n");
229 	if (virtio_get_device_type(dev) != VIRTIO_ID_BALLOON)
230 		return (ENXIO);
231 
232 	device_set_desc(dev, "VirtIO Balloon Adapter");
233 
234 	return (BUS_PROBE_DEFAULT);
235 }
236 
237 struct irqmap {
238 	int irq;
239 	int idx;
240 	driver_intr_t *handler;
241 	const char * handler_name;
242 };
243 
244 static int
vtballoon_attach(device_t dev)245 vtballoon_attach(device_t dev)
246 {
247 	struct vtballoon_softc *sc;
248 	int error, i;
249 
250 	sc = device_get_softc(dev);
251 	sc->vtballoon_dev = dev;
252 	sc->vtballoon_debug = VTBALLOON_DEFAULT_DEBUG_LEVEL;
253 
254 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "\n");
255 
256 	lwkt_serialize_init(VTBALLOON_SLZ(sc));
257 	TAILQ_INIT(&sc->vtballoon_pages);
258 
259 	vtballoon_get_tunables(sc);
260 	vtballoon_add_sysctl(sc);
261 
262 	virtio_set_feature_desc(dev, vtballoon_feature_desc);
263 	vtballoon_negotiate_features(sc);
264 
265 	sc->vtballoon_page_frames = contigmalloc(VTBALLOON_PAGES_PER_REQUEST *
266 	    sizeof(uint32_t), M_DEVBUF, M_NOWAIT | M_ZERO, 0, BUS_SPACE_MAXADDR, 16, 0);
267 	if (sc->vtballoon_page_frames == NULL) {
268 		error = ENOMEM;
269 		vtballoon_dprintf(sc, VTBALLOON_ERROR, "cannot allocate page frame request array (error:%d)\n", error);
270 		goto fail;
271 	}
272 	error = vtballoon_alloc_intrs(sc);
273 	if (error) {
274 		vtballoon_dprintf(sc, VTBALLOON_ERROR, "cannot allocate interrupts (error:%d)\n", error);
275 		goto fail;
276 	}
277 
278 	error = vtballoon_alloc_virtqueues(sc);
279 	if (error) {
280 		vtballoon_dprintf(sc, VTBALLOON_ERROR, "cannot allocate virtqueues (error:%d)\n", error);
281 		goto fail;
282 	}
283 
284 	int nrhandlers = virtio_with_feature(sc->vtballoon_dev, VIRTIO_BALLOON_F_STATS_VQ) ? 4 : 3;
285 	struct irqmap info[4];
286 
287 	/* Possible "Virtqueue <-> IRQ" configurations */
288 	switch (sc->vtballoon_nintr) {
289 	case 1:
290 		info[2] = (struct irqmap){0, -1, vtballoon_config_change_intr, "config"};
291 		info[0] = (struct irqmap){0, 0, vtballoon_inflate_vq_intr, "inflate"};
292 		info[1] = (struct irqmap){0, 1, vtballoon_deflate_vq_intr, "deflate"};
293 		info[3] = (struct irqmap){0, 2, vtballoon_stats_vq_intr, "stats"};
294 		break;
295 	case 2:
296 		info[2] = (struct irqmap){1, -1, vtballoon_config_change_intr, "config"};
297 		info[0] = (struct irqmap){0, 0, vtballoon_inflate_vq_intr, "inflate"};
298 		info[1] = (struct irqmap){0, 1, vtballoon_deflate_vq_intr, "deflate"};
299 		info[3] = (struct irqmap){0, 2, vtballoon_stats_vq_intr, "stats"};
300 		break;
301 	case 3:
302 		info[2] = (struct irqmap){2, -1, vtballoon_config_change_intr, "config"};
303 		info[0] = (struct irqmap){0, 0, vtballoon_inflate_vq_intr, "inflate"};
304 		info[1] = (struct irqmap){1, 1, vtballoon_deflate_vq_intr, "deflate"};
305 		info[3] = (struct irqmap){2, 2, vtballoon_stats_vq_intr, "stats"};
306 		break;
307 	case 4:
308 		info[2] = (struct irqmap){3, -1, vtballoon_config_change_intr, "config"};
309 		info[0] = (struct irqmap){0, 0, vtballoon_inflate_vq_intr, "inflate"};
310 		info[1] = (struct irqmap){1, 1, vtballoon_deflate_vq_intr, "deflate"};
311 		info[3] = (struct irqmap){2, 2, vtballoon_stats_vq_intr, "stats"};
312 		break;
313 	default:
314 		vtballoon_dprintf(sc, VTBALLOON_ERROR, "Invalid interrupt vector count: %d\n", sc->vtballoon_nintr);
315 		goto fail;
316 	}
317 	for (i = 0; i < nrhandlers; i++) {
318 		error = virtio_bind_intr(sc->vtballoon_dev, info[i].irq, info[i].idx,
319 		    info[i].handler, sc);
320 		if (error) {
321 			vtballoon_dprintf(sc, VTBALLOON_ERROR, "cannot bind virtqueue '%s' handler to IRQ:%d/%d\n",
322 				info[i].handler_name, info[i].irq, sc->vtballoon_nintr);
323 			goto fail;
324 		}
325 	}
326 
327 	for (i = 0; i < sc->vtballoon_nintr; i++) {
328 		error = virtio_setup_intr(dev, i, VTBALLOON_SLZ(sc));
329 		if (error) {
330 			vtballoon_dprintf(sc, VTBALLOON_ERROR, "cannot setup virtqueue interrupt:%d (error:%d)\n", i, error);
331 			goto fail;
332 		}
333 	}
334 
335 	error = kthread_create(vtballoon_thread, sc, &sc->vtballoon_td, "virtio_balloon");
336 	if (error) {
337 		vtballoon_dprintf(sc, VTBALLOON_ERROR, "cannot create balloon kthread (error:%d)\n", error);
338 		goto fail;
339 	}
340 
341 	virtqueue_enable_intr(sc->vtballoon_inflate_vq);
342 	virtqueue_enable_intr(sc->vtballoon_deflate_vq);
343 
344 	if (virtio_with_feature(sc->vtballoon_dev, VIRTIO_BALLOON_F_STATS_VQ)) {
345 		virtqueue_enable_intr(sc->vtballoon_stats_vq);
346 #if 0		/* enabling this causes a panic, on asserting ASSERT_SERIALIZED(sc) in vtballoon_update_stats */
347 		/*
348 		 * Prime this stats virtqueue with one buffer so the hypervisor can
349 		 * use it to signal us later.
350 		 */
351 		VTBALLOON_ENTER_SLZ(sc);
352 		vtballoon_update_stats(sc);
353 		VTBALLOON_EXIT_SLZ(sc);
354 #endif
355 	}
356 
357 fail:
358 	if (error)
359 		vtballoon_detach(dev);
360 
361 	return (error);
362 }
363 
364 static int
vtballoon_detach(device_t dev)365 vtballoon_detach(device_t dev)
366 {
367 	struct vtballoon_softc *sc;
368 	int i;
369 
370 	sc = device_get_softc(dev);
371 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "\n");
372 
373 	if (sc->vtballoon_td != NULL) {
374 		VTBALLOON_ENTER_SLZ(sc);
375 		sc->vtballoon_flags |= VTBALLOON_FLAG_DETACH;
376 
377 		/* drain */
378 		wakeup_one(sc);
379 		zsleep(sc->vtballoon_td, VTBALLOON_SLZ(sc), 0, "vtbdth", 0);
380 		VTBALLOON_EXIT_SLZ(sc);
381 		sc->vtballoon_td = NULL;
382 	}
383 
384 	lwkt_serialize_handler_disable(VTBALLOON_SLZ(sc));
385 
386 	for (i = 0; i < sc->vtballoon_nintr; i++)
387 		virtio_teardown_intr(dev, i);
388 
389 	if (device_is_attached(dev)) {
390 		vtballoon_pop(sc);
391 		vtballoon_stop(sc);
392 	}
393 
394 	if (sc->vtballoon_page_frames != NULL) {
395 		contigfree(sc->vtballoon_page_frames, VTBALLOON_PAGES_PER_REQUEST *
396 			sizeof(uint32_t), M_DEVBUF);
397 		sc->vtballoon_page_frames = NULL;
398 	}
399 	return (0);
400 }
401 
402 static void
vtballoon_negotiate_features(struct vtballoon_softc * sc)403 vtballoon_negotiate_features(struct vtballoon_softc *sc)
404 {
405 	device_t dev;
406 	uint64_t features;
407 
408 	dev = sc->vtballoon_dev;
409 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "\n");
410 	features = virtio_negotiate_features(dev, VTBALLOON_FEATURES);
411 	sc->vtballoon_features = features;
412 }
413 
vtballoon_alloc_intrs(struct vtballoon_softc * sc)414 static int vtballoon_alloc_intrs(struct vtballoon_softc *sc)
415 {
416 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "\n");
417 	int cnt, error;
418 	int intrcount = virtio_intr_count(sc->vtballoon_dev);
419 	int use_config = 1;
420 
421 	intrcount = imin(intrcount, VTBALLOON_MAX_INTERRUPTS);
422 	if (intrcount < 1)
423 		return (ENXIO);
424 
425 	cnt = intrcount;
426 	error = virtio_intr_alloc(sc->vtballoon_dev, &cnt, use_config, NULL);
427 	if (error != 0) {
428 		virtio_intr_release(sc->vtballoon_dev);
429 		return (error);
430 	}
431 	sc->vtballoon_nintr = cnt;
432 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "%d Interrupts Allocated\n", sc->vtballoon_nintr);
433 	return (0);
434 }
435 
436 static int
vtballoon_alloc_virtqueues(struct vtballoon_softc * sc)437 vtballoon_alloc_virtqueues(struct vtballoon_softc *sc)
438 {
439 	device_t dev;
440 	struct vq_alloc_info vq_info[3];
441 	int nvqs;
442 
443 	dev = sc->vtballoon_dev;
444 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "\n");
445 	nvqs = 2;
446 
447 	VQ_ALLOC_INFO_INIT(&vq_info[0], 0, &sc->vtballoon_inflate_vq,
448 		"%s inflate", device_get_nameunit(dev));
449 
450 	VQ_ALLOC_INFO_INIT(&vq_info[1], 0, &sc->vtballoon_deflate_vq,
451 		"%s deflate", device_get_nameunit(dev));
452 
453 	if (virtio_with_feature(sc->vtballoon_dev, VIRTIO_BALLOON_F_STATS_VQ)) {
454 		VQ_ALLOC_INFO_INIT(&vq_info[2], 0, &sc->vtballoon_stats_vq,
455 			"%s stats", device_get_nameunit(dev));
456 		nvqs = 3;
457 	}
458 	return (virtio_alloc_virtqueues(dev, nvqs, vq_info));
459 }
460 
461 static void
vtballoon_config_change_intr(void * arg)462 vtballoon_config_change_intr(void *arg)
463 {
464 	struct vtballoon_softc *sc = arg;
465 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "\n");
466 	ASSERT_SERIALIZED(VTBALLOON_SLZ(sc));
467 	wakeup_one(sc);
468 }
469 
470 static inline void
vtballoon_update_stat(struct vtballoon_softc * sc,int idx,uint16_t tag,uint64_t val)471 vtballoon_update_stat(struct vtballoon_softc *sc, int idx,
472 	uint16_t tag, uint64_t val)
473 {
474 	KASSERT(idx >= VTBALLOON_S_NR, ("Stats index out of bounds"));
475 	/*
476 	 * XXX: Required for endianess in the future
477 	 * sc->vtballoon_stats[idx].tag = virtio_is_little_endian(sc->vtballoon_dev) ? le16toh(tag) : tag;
478 	 * sc->vtballoon_stats[idx].val = virtio_is_little_endian(sc->vtballoon_dev) ? le64toh(val) : val;
479 	 * at the moment virtio balloon is always little endian.
480 	 *
481 	 */
482 	sc->vtballoon_stats[idx].tag = le16toh(tag);
483 	sc->vtballoon_stats[idx].val = le64toh(val);
484 
485 }
486 
487 /*
488  * collect guest side statistics
489  *
490  * XXX: am i using the correct memory and pagefault values
491  */
collect_balloon_stats(struct vtballoon_softc * sc)492 static unsigned int collect_balloon_stats(struct vtballoon_softc *sc)
493 {
494 	#define pages_to_bytes(x) ((uint64_t)(x) << PAGE_SHIFT)
495 	unsigned int idx = 0;
496 	struct vmtotal total;
497 	struct vmmeter vmm;
498 	struct vmstats vms;
499 	size_t vmt_size = sizeof(total);
500 	size_t vmm_size = sizeof(vmm);
501 	size_t vms_size = sizeof(vms);
502 
503 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "Updating Stats Buffer\n");
504 	if (!kernel_sysctlbyname("vm.vmtotal", &total, &vmt_size, NULL, 0, NULL)) {
505 		/* Total amount of free memory )*/
506 		vtballoon_update_stat(sc, idx++, VTBALLOON_S_MEMFREE,
507 					pages_to_bytes(total.t_rm - total.t_arm));
508 		/* Total amount of memory */
509 		vtballoon_update_stat(sc, idx++, VTBALLOON_S_MEMTOT,
510 					pages_to_bytes(total.t_rm));
511 		/* Available memory as in /proc	*/
512 		vtballoon_update_stat(sc, idx++, VTBALLOON_S_AVAIL,
513 					pages_to_bytes(total.t_arm));
514 	}
515 	if (!kernel_sysctlbyname("vm.vmstats", &vms, &vms_size, NULL, 0, NULL)) {
516 		/* Disk caches */
517 		vtballoon_update_stat(sc, idx++, VTBALLOON_S_CACHES,
518 					pages_to_bytes(vms.v_cache_count));
519 	}
520 	if (!kernel_sysctlbyname("vm.vmmeter", &vmm, &vmm_size, NULL, 0, NULL)) {
521 		/* Amount of memory swapped in */
522 		vtballoon_update_stat(sc, idx++, VTBALLOON_S_SWAP_IN,
523 					pages_to_bytes(vmm.v_swappgsin));
524 		/* Amount of memory swapped out */
525 		vtballoon_update_stat(sc, idx++, VTBALLOON_S_SWAP_OUT,
526 					pages_to_bytes(vmm.v_swappgsout));
527 		/* Number of major faults */
528 		vtballoon_update_stat(sc, idx++, VTBALLOON_S_MAJFLT,
529 					vmm.v_vm_faults);
530 		/* Number of minor faults */
531 		vtballoon_update_stat(sc, idx++, VTBALLOON_S_MINFLT,
532 					vmm.v_intrans);
533 	}
534 
535 	if (sc->vtballoon_debug & VTBALLOON_TRACE)  {
536 		static const char *vt_balloon_names[]=VTBALLOON_S_NAMES;
537 		int i;
538 		for (i=0; i < idx; i++) {
539 			kprintf("\t%s = %lu\n", vt_balloon_names[sc->vtballoon_stats[i].tag], sc->vtballoon_stats[i].val);
540 		}
541 	}
542 
543 	return idx;
544 }
545 
546 static void
vtballoon_update_stats(struct vtballoon_softc * sc)547 vtballoon_update_stats(struct vtballoon_softc *sc)
548 {
549 	struct virtqueue *vq = sc->vtballoon_stats_vq;
550 
551 	ASSERT_SERIALIZED(VTBALLOON_SLZ(sc));
552 
553 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "Stats Requested\n");
554 
555 	struct sglist sg;
556 	struct sglist_seg segs[1];
557 	unsigned int num_stats;
558 	int error;
559 
560 	num_stats = collect_balloon_stats(sc);
561 
562 	sglist_init(&sg, 1, segs);
563 	error = sglist_append(&sg, sc->vtballoon_stats, sizeof(sc->vtballoon_stats[0]) * num_stats);
564 	KASSERT(error == 0, ("error adding page frames to sglist"));
565 
566 	error = virtqueue_enqueue(vq, vq, &sg, 1, 0);
567 	KASSERT(error == 0, ("error enqueuing page frames to virtqueue"));
568 	virtqueue_notify(sc->vtballoon_stats_vq, NULL);
569 }
570 
571 /*
572  * While most virtqueues communicate guest-initiated requests to the hypervisor,
573  * the stats queue operates in reverse.  The driver(host) initializes the virtqueue
574  * with a single buffer. From that point forward, all conversations consist of
575  * a hypervisor request (a call to this function) which directs us to refill
576  * the virtqueue with a fresh stats buffer. Since stats collection can sleep,
577  * we delegate the job to the vtballoon_thread which will do the actual stats
578  * collecting work.
579  */
580 static void
vtballoon_stats_vq_intr(void * arg)581 vtballoon_stats_vq_intr(void *arg)
582 {
583 	struct vtballoon_softc *sc = arg;
584 	struct virtqueue *vq = sc->vtballoon_stats_vq;
585 
586 	ASSERT_SERIALIZED(VTBALLOON_SLZ(sc));
587 	if (sc->vtballoon_update_stats || !virtqueue_pending(vq))
588 		return;
589 
590 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "Ballooon Stats Requested\n");
591 	sc->vtballoon_update_stats = true;
592 	wakeup_one(sc);
593 	virtqueue_dequeue(vq, NULL);
594 }
595 
596 static void
vtballoon_inflate_vq_intr(void * arg)597 vtballoon_inflate_vq_intr(void *arg)
598 {
599 	struct vtballoon_softc *sc = arg;
600 	struct virtqueue *vq = sc->vtballoon_inflate_vq;
601 	ASSERT_SERIALIZED(VTBALLOON_SLZ(sc));
602 	if (!virtqueue_pending(vq))
603 		return;
604 	wakeup_one(sc);
605 }
606 
607 static void
vtballoon_deflate_vq_intr(void * arg)608 vtballoon_deflate_vq_intr(void *arg)
609 {
610 	struct vtballoon_softc *sc = arg;
611 	struct virtqueue *vq = sc->vtballoon_deflate_vq;
612 	ASSERT_SERIALIZED(VTBALLOON_SLZ(sc));
613 	if (!virtqueue_pending(vq))
614 		return;
615 	wakeup_one(sc);
616 }
617 
618 static void
vtballoon_inflate(struct vtballoon_softc * sc,int npages)619 vtballoon_inflate(struct vtballoon_softc *sc, int npages)
620 {
621 	struct virtqueue *vq;
622 
623 	vm_page_t m;
624 	int i;
625 
626 	vq = sc->vtballoon_inflate_vq;
627 
628 	if (npages > VTBALLOON_PAGES_PER_REQUEST)
629 		npages = VTBALLOON_PAGES_PER_REQUEST;
630 
631 	for (i = 0; i < npages; i++) {
632 		if ((m = vtballoon_alloc_page(sc)) == NULL) {
633 			/* First allocate usign VTBALLOON_REGULAR_ALLOC and fall back to VTBALLOON_LOWMEM_ALLOC
634 			 * when the guest is under severe memory pressure. Quickly decrease the
635 			 * allocation rate, allowing the system to swap out pages.
636 			 */
637 			sc->vtballoon_pagereq = VM_ALLOC_SYSTEM | VM_ALLOC_INTERRUPT;
638 			sc->vtballoon_timeout = VTBALLOON_LOWMEM_TIMEOUT;
639 			break;
640 		}
641 
642 		sc->vtballoon_page_frames[i] =
643 		    VM_PAGE_TO_PHYS(m) >> VIRTIO_BALLOON_PFN_SHIFT;
644 
645 		KASSERT(m->queue == PQ_NONE,
646 		    ("%s: allocated page %p on queue", __func__, m));
647 		TAILQ_INSERT_TAIL(&sc->vtballoon_pages, m, pageq);
648 	}
649 
650 	if (i > 0)
651 		vtballoon_send_page_frames(sc, vq, i);
652 }
653 
654 static void
vtballoon_deflate(struct vtballoon_softc * sc,int npages)655 vtballoon_deflate(struct vtballoon_softc *sc, int npages)
656 {
657 	TAILQ_HEAD(, vm_page) free_pages;
658 	struct virtqueue *vq;
659 	vm_page_t m;
660 	int i;
661 
662 	vq = sc->vtballoon_deflate_vq;
663 	TAILQ_INIT(&free_pages);
664 
665 	if (npages > VTBALLOON_PAGES_PER_REQUEST)
666 		npages = VTBALLOON_PAGES_PER_REQUEST;
667 
668 	for (i = 0; i < npages; i++) {
669 		m = TAILQ_FIRST(&sc->vtballoon_pages);
670 		KASSERT(m != NULL, ("%s: no more pages to deflate", __func__));
671 
672 		sc->vtballoon_page_frames[i] =
673 		    VM_PAGE_TO_PHYS(m) >> VIRTIO_BALLOON_PFN_SHIFT;
674 
675 		TAILQ_REMOVE(&sc->vtballoon_pages, m, pageq);
676 		TAILQ_INSERT_TAIL(&free_pages, m, pageq);
677 	}
678 
679 	if (i > 0) {
680 		/*
681 		 * Note that if virtio VIRTIO_BALLOON_F_MUST_TELL_HOST
682 		 * feature is true, we *have* to tell host first
683 		 * before freeing the pages.
684 		 */
685 		vtballoon_send_page_frames(sc, vq, i);
686 
687 		while ((m = TAILQ_FIRST(&free_pages)) != NULL) {
688 			TAILQ_REMOVE(&free_pages, m, pageq);
689 			vtballoon_free_page(sc, m);
690 		}
691 	}
692 
693 	KASSERT((TAILQ_EMPTY(&sc->vtballoon_pages) &&
694 	    sc->vtballoon_current_npages == 0) ||
695 	    (!TAILQ_EMPTY(&sc->vtballoon_pages) &&
696 	    sc->vtballoon_current_npages != 0),
697 	    ("%s: bogus page count %d", __func__,
698 	    sc->vtballoon_current_npages));
699 }
700 
701 static void
vtballoon_send_page_frames(struct vtballoon_softc * sc,struct virtqueue * vq,int npages)702 vtballoon_send_page_frames(struct vtballoon_softc *sc, struct virtqueue *vq,
703     int npages)
704 {
705 	struct sglist sg;
706 	struct sglist_seg segs[1];
707 	void *c;
708 	int error;
709 
710 	sglist_init(&sg, 1, segs);
711 
712 	error = sglist_append(&sg, sc->vtballoon_page_frames,
713 	    npages * sizeof(uint32_t));
714 	KASSERT(error == 0, ("error adding page frames to sglist"));
715 
716 	error = virtqueue_enqueue(vq, vq, &sg, 1, 0);
717 	KASSERT(error == 0, ("error enqueuing page frames to virtqueue"));
718 	virtqueue_notify(vq, NULL);
719 
720 	/*
721 	 * Inflate and deflate operations are done synchronously. The
722 	 * interrupt handler will wake us up.
723 	 */
724 	VTBALLOON_ENTER_SLZ(sc);
725 	while ((c = virtqueue_dequeue(vq, NULL)) == NULL) {
726 		zsleep(sc, VTBALLOON_SLZ(sc), 0, "vtbspf", 0);
727 	}
728 	VTBALLOON_EXIT_SLZ(sc);
729 
730 	KASSERT(c == vq, ("unexpected balloon operation response"));
731 }
732 
733 static void
vtballoon_pop(struct vtballoon_softc * sc)734 vtballoon_pop(struct vtballoon_softc *sc)
735 {
736 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "Popping\n");
737 
738 	while (!TAILQ_EMPTY(&sc->vtballoon_pages))
739 		vtballoon_deflate(sc, sc->vtballoon_current_npages);
740 }
741 
742 static void
vtballoon_stop(struct vtballoon_softc * sc)743 vtballoon_stop(struct vtballoon_softc *sc)
744 {
745 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "Stopping\n");
746 
747 	virtqueue_disable_intr(sc->vtballoon_inflate_vq);
748 	virtqueue_disable_intr(sc->vtballoon_deflate_vq);
749 /*
750 	if (virtio_with_feature(sc->vtballoon_dev, VIRTIO_BALLOON_F_STATS_VQ)) {
751 		virtqueue_disable_intr(sc->vtballoon_stats_vq);
752 	}
753 */
754 	virtio_stop(sc->vtballoon_dev);
755 }
756 
757 static vm_page_t
vtballoon_alloc_page(struct vtballoon_softc * sc)758 vtballoon_alloc_page(struct vtballoon_softc *sc)
759 {
760 	vm_page_t m;
761 
762 	m = vm_page_alloc(NULL, 0, sc->vtballoon_pagereq);
763 	if (m != NULL)
764 		sc->vtballoon_current_npages++;
765 
766 	return (m);
767 }
768 
769 static void
vtballoon_free_page(struct vtballoon_softc * sc,vm_page_t m)770 vtballoon_free_page(struct vtballoon_softc *sc, vm_page_t m)
771 {
772 	vm_page_free_toq(m);
773 	sc->vtballoon_current_npages--;
774 }
775 
776 static uint32_t
vtballoon_desired_size(struct vtballoon_softc * sc)777 vtballoon_desired_size(struct vtballoon_softc *sc)
778 {
779 	uint32_t desired;
780 
781 	desired = virtio_read_dev_config_4(sc->vtballoon_dev,
782 	    offsetof(struct virtio_balloon_config, num_pages));
783 
784 	return (le32toh(desired));
785 }
786 
787 static void
vtballoon_update_size(struct vtballoon_softc * sc)788 vtballoon_update_size(struct vtballoon_softc *sc)
789 {
790 	virtio_write_dev_config_4(sc->vtballoon_dev,
791 	    offsetof(struct virtio_balloon_config, actual),
792 	    htole32(sc->vtballoon_current_npages));
793 }
794 
795 static int
vtballoon_sleep(struct vtballoon_softc * sc)796 vtballoon_sleep(struct vtballoon_softc *sc)
797 {
798 	int rc, timeout;
799 	uint32_t current, desired;
800 
801 	rc = 0;
802 	current = sc->vtballoon_current_npages;
803 	sc->vtballoon_pagereq = VM_ALLOC_NORMAL | VM_ALLOC_INTERRUPT;
804 
805 	VTBALLOON_ENTER_SLZ(sc);
806 	for (;;) {
807 		if (sc->vtballoon_flags & VTBALLOON_FLAG_DETACH) {
808 			rc = 1;
809 			break;
810 		}
811 
812 		desired = vtballoon_desired_size(sc);
813 		if (desired != sc->vtballoon_desired_npages)
814 			vtballoon_dprintf(sc, VTBALLOON_DEBUG, "balloon %s %d -> %d (4K pages)\n",
815 				desired < sc->vtballoon_desired_npages ? "deflating" : "inflating",
816 				current, desired);
817 
818 		sc->vtballoon_desired_npages = desired;
819 
820 		/*
821 		 * If given, use non-zero timeout on the first time through
822 		 * the loop. On subsequent times, timeout will be zero so
823 		 * we will reevaluate the desired size of the balloon and
824 		 * break out to retry if needed.
825 		 */
826 		timeout = sc->vtballoon_timeout;
827 		sc->vtballoon_timeout = 0;
828 
829 		if (current > desired)
830 			break;
831 		else if (current < desired && timeout == 0)
832 			break;
833 		else if (sc->vtballoon_update_stats)
834 			break;
835 		else if (!timeout)
836 			vtballoon_dprintf(sc, VTBALLOON_TRACE, "balloon %d (4K pages) reached\n", current);
837 
838 		zsleep(sc, VTBALLOON_SLZ(sc), 0, "vtbslp", timeout);
839 	}
840 	VTBALLOON_EXIT_SLZ(sc);
841 
842 	return (rc);
843 }
844 
845 static void
vtballoon_thread(void * arg)846 vtballoon_thread(void *arg)
847 {
848 	struct vtballoon_softc *sc = arg;
849 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "Thread started.\n");
850 
851 	uint32_t current, desired;
852 	for (;;) {
853 		if (vtballoon_sleep(sc) != 0)
854 			break;
855 
856 		current = sc->vtballoon_current_npages;
857 		desired = sc->vtballoon_desired_npages;
858 
859 		if (desired != current) {
860 			if (desired > current)
861 				vtballoon_inflate(sc, desired - current);
862 			else
863 				vtballoon_deflate(sc, current - desired);
864 
865 			vtballoon_update_size(sc);
866 		}
867 		if (sc->vtballoon_update_stats) {
868 			vtballoon_update_stats(sc);
869 			sc->vtballoon_update_stats = false;
870 		}
871 	}
872 
873 	kthread_exit();
874 }
875 
876 static void
vtballoon_get_tunables(struct vtballoon_softc * sc)877 vtballoon_get_tunables(struct vtballoon_softc *sc)
878 {
879 	char tmpstr[64];
880 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "\n");
881 
882 	TUNABLE_INT_FETCH("hw.vtballoon.debug_level", &sc->vtballoon_debug);
883 
884 	ksnprintf(tmpstr, sizeof(tmpstr), "dev.vtballoon.%d.debug_level",
885 	    device_get_unit(sc->vtballoon_dev));
886 	TUNABLE_INT_FETCH(tmpstr, &sc->vtballoon_debug);
887 }
888 
889 static void
vtballoon_add_sysctl(struct vtballoon_softc * sc)890 vtballoon_add_sysctl(struct vtballoon_softc *sc)
891 {
892 	device_t dev;
893 	struct sysctl_ctx_list *ctx;
894 	struct sysctl_oid *tree;
895 	struct sysctl_oid_list *child;
896 
897 	dev = sc->vtballoon_dev;
898 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "\n");
899 
900 	ctx = device_get_sysctl_ctx(dev);
901 	tree = device_get_sysctl_tree(dev);
902 	child = SYSCTL_CHILDREN(tree);
903 
904 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "debug_level",
905 	    CTLFLAG_RW, &sc->vtballoon_debug, 0,
906 	    "Debug level");
907 
908 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "desired",
909 	    CTLFLAG_RD, &sc->vtballoon_desired_npages, sizeof(uint32_t),
910 	    "Desired balloon size in pages");
911 
912 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "current",
913 	    CTLFLAG_RD, &sc->vtballoon_current_npages, sizeof(uint32_t),
914 	    "Current balloon size in pages");
915 }
916