1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice unmodified, this list of conditions, and the following
12  *    disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD: head/sys/dev/virtio/balloon/virtio_balloon.c 326255 2017-11-27 14:52:40Z pfg $
29  */
30 
31 /*
32  * Copyright (c) 2018 The DragonFly Project.  All rights reserved.
33  *
34  * This code is derived from software contributed to The DragonFly Project
35  * by Diederik de Groot <info@talon.nl>
36  *
37  * Redistribution and use in source and binary forms, with or without
38  * modification, are permitted provided that the following conditions
39  * are met:
40  *
41  * 1. Redistributions of source code must retain the above copyright
42  *    notice, this list of conditions and the following disclaimer.
43  * 2. Redistributions in binary form must reproduce the above copyright
44  *    notice, this list of conditions and the following disclaimer in
45  *    the documentation and/or other materials provided with the
46  *    distribution.
47  * 3. Neither the name of The DragonFly Project nor the names of its
48  *    contributors may be used to endorse or promote products derived
49  *    from this software without specific, prior written permission.
50  *
51  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
52  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
53  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
54  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
55  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
56  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
57  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
58  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
59  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
60  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
61  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
62  * SUCH DAMAGE.
63  */
64 
65 /* Driver for VirtIO memory balloon devices. */
66 
67 #include <sys/cdefs.h>
68 #include <sys/param.h>
69 #include <sys/systm.h>
70 #include <sys/kernel.h>
71 #include <sys/endian.h>
72 #include <sys/kthread.h>
73 #include <sys/malloc.h>
74 #include <sys/module.h>
75 #include <sys/sglist.h>
76 #include <sys/sysctl.h>
77 #include <sys/lock.h>
78 #include <sys/mutex.h>
79 #include <sys/queue.h>
80 
81 #include <vm/vm.h>
82 #include <vm/vm_page.h>
83 #include <sys/bus.h>
84 #include <sys/rman.h>
85 
86 #include <dev/virtual/virtio/virtio/virtio.h>
87 #include <dev/virtual/virtio/virtio/virtqueue.h>
88 #include <dev/virtual/virtio/balloon/virtio_balloon.h>
89 
90 struct vtballoon_softc {
91 	device_t		 vtballoon_dev;
92 	struct lwkt_serialize    vtballoon_slz;
93 	uint64_t		 vtballoon_features;
94 	uint32_t		 vtballoon_flags;
95 #define VTBALLOON_FLAG_DETACH	 0x01
96 
97 	struct virtqueue	*vtballoon_inflate_vq;
98 	struct virtqueue	*vtballoon_deflate_vq;
99 
100 	uint32_t		 vtballoon_desired_npages;
101 	uint32_t		 vtballoon_current_npages;
102 	TAILQ_HEAD(,vm_page)	 vtballoon_pages;
103 
104 	struct thread		*vtballoon_td;
105 	uint32_t		*vtballoon_page_frames;
106 	int			 vtballoon_pagereq;
107 	int			 vtballoon_timeout;
108 	int			 vtballoon_nintr;
109 	int			 vtballoon_debug;
110 #define VTBALLOON_INFO     	 0x01
111 #define VTBALLOON_ERROR    	 0x02
112 #define VTBALLOON_DEBUG    	 0x04
113 #define VTBALLOON_TRACE    	 0x08
114 
115 	struct virtqueue	*vtballoon_stats_vq;
116 	struct vtballoon_stat	 vtballoon_stats[VTBALLOON_S_NR];
117 	bool			 vtballoon_update_stats;
118 };
119 
120 static struct virtio_feature_desc vtballoon_feature_desc[] = {
121 	{ VIRTIO_BALLOON_F_MUST_TELL_HOST,	"MustTellHost"		},
122 	{ VIRTIO_BALLOON_F_STATS_VQ,		"StatsVq"		},
123 	{ VIRTIO_BALLOON_F_DEFLATE_ON_OOM,	"DeflateOnOutOfMemory"	},
124 	{ 0, NULL }
125 };
126 
127 #define vtballoon_dprintf(_sc, _level, _msg, _args ...) do {	    \
128 	if ((_sc)->vtballoon_debug & (_level))			  \
129 		device_printf((_sc)->vtballoon_dev, "%s:%d: "_msg,      \
130 		  __FUNCTION__, __LINE__, ##_args);		     \
131 } while (0)
132 
133 static int		vtballoon_probe(device_t);
134 static int		vtballoon_attach(device_t);
135 static int		vtballoon_detach(device_t);
136 
137 static int		vtballoon_alloc_intrs(struct vtballoon_softc *sc);
138 
139 static void		vtballoon_negotiate_features(struct vtballoon_softc *);
140 static int		vtballoon_alloc_virtqueues(struct vtballoon_softc *);
141 
142 static void 		vtballoon_config_change_intr(void *);
143 
144 static void		vtballoon_update_stats(struct vtballoon_softc *sc);
145 static void		vtballoon_stats_vq_intr(void *);
146 
147 static void		vtballoon_inflate_vq_intr(void *);
148 static void		vtballoon_deflate_vq_intr(void *);
149 static void		vtballoon_inflate(struct vtballoon_softc *, int);
150 static void		vtballoon_deflate(struct vtballoon_softc *, int);
151 
152 static void		vtballoon_send_page_frames(struct vtballoon_softc *,
153 			    struct virtqueue *, int);
154 
155 static void		vtballoon_pop(struct vtballoon_softc *);
156 static void		vtballoon_stop(struct vtballoon_softc *);
157 
158 static vm_page_t	vtballoon_alloc_page(struct vtballoon_softc *);
159 static void		vtballoon_free_page(struct vtballoon_softc *, vm_page_t);
160 
161 static int		vtballoon_sleep(struct vtballoon_softc *);
162 static void		vtballoon_thread(void *);
163 static void		vtballoon_get_tunables(struct vtballoon_softc *);
164 static void		vtballoon_add_sysctl(struct vtballoon_softc *);
165 
166 /*
167  * Features desired/implemented by this driver.
168  * VIRTIO_BALLOON_F_STATS_VQ | VIRTIO_BALLOON_F_MUST_TELL_HOST
169  */
170 #define VTBALLOON_FEATURES		VIRTIO_BALLOON_F_STATS_VQ
171 
172 /* Timeout between retries when the balloon needs inflating. */
173 #define VTBALLOON_LOWMEM_TIMEOUT	hz * 100
174 
175 /* vm_page_alloc flags */
176 #define VTBALLOON_REGULAR_ALLOC		VM_ALLOC_NORMAL
177 #define VTBALLOON_LOWMEM_ALLOC		VM_ALLOC_SYSTEM
178 
179 /*
180  * Maximum number of pages we'll request to inflate or deflate
181  * the balloon in one virtqueue request. Both Linux and NetBSD
182  * have settled on 256, doing up to 1MB at a time.
183  */
184 #define VTBALLOON_PAGES_PER_REQUEST	256
185 
186 /*
187  * Default Debug Level
188  * VTBALLOON_INFO | VTBALLOON_ERROR | VTBALLOON_DEBUG | VTBALLOON_TRACE
189  */
190 #define VTBALLOON_DEFAULT_DEBUG_LEVEL   VTBALLOON_INFO | VTBALLOON_ERROR
191 
192 /*
193  * Maximum number of interrupts to request
194  */
195 #define VTBALLOON_MAX_INTERRUPTS	4
196 
197 /* Must be able to fix all pages frames in one page (segment). */
198 CTASSERT(VTBALLOON_PAGES_PER_REQUEST * sizeof(uint32_t) <= PAGE_SIZE);
199 
200 #define VTBALLOON_SLZ(_sc)		&(_sc)->vtballoon_slz
201 #define VTBALLOON_ENTER_SLZ(_sc)	lwkt_serialize_enter(VTBALLOON_SLZ(sc));
202 #define VTBALLOON_EXIT_SLZ(_sc)		lwkt_serialize_exit(VTBALLOON_SLZ(sc));
203 
204 static device_method_t vtballoon_methods[] = {
205 	/* Device methods. */
206 	DEVMETHOD(device_probe,		vtballoon_probe),
207 	DEVMETHOD(device_attach,	vtballoon_attach),
208 	DEVMETHOD(device_detach,	vtballoon_detach),
209 
210 	DEVMETHOD_END
211 };
212 
213 static driver_t vtballoon_driver = {
214 	"vtballoon",
215 	vtballoon_methods,
216 	sizeof(struct vtballoon_softc)
217 };
218 static devclass_t vtballoon_devclass;
219 
220 DRIVER_MODULE(virtio_balloon, virtio_pci, vtballoon_driver,
221     vtballoon_devclass, NULL, NULL);
222 MODULE_VERSION(virtio_balloon, 1);
223 MODULE_DEPEND(virtio_balloon, virtio, 1, 1, 1);
224 
225 static int
226 vtballoon_probe(device_t dev)
227 {
228 	struct vtballoon_softc *sc = device_get_softc(dev);
229 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "\n");
230 	if (virtio_get_device_type(dev) != VIRTIO_ID_BALLOON)
231 		return (ENXIO);
232 
233 	device_set_desc(dev, "VirtIO Balloon Adapter");
234 
235 	return (BUS_PROBE_DEFAULT);
236 }
237 
238 struct irqmap {
239 	int irq;
240 	int idx;
241 	driver_intr_t *handler;
242 	const char * handler_name;
243 };
244 
245 static int
246 vtballoon_attach(device_t dev)
247 {
248 	struct vtballoon_softc *sc;
249 	int error, i;
250 
251 	sc = device_get_softc(dev);
252 	sc->vtballoon_dev = dev;
253 	sc->vtballoon_debug = VTBALLOON_DEFAULT_DEBUG_LEVEL;
254 
255 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "\n");
256 
257 	lwkt_serialize_init(VTBALLOON_SLZ(sc));
258 	TAILQ_INIT(&sc->vtballoon_pages);
259 
260 	vtballoon_get_tunables(sc);
261 	vtballoon_add_sysctl(sc);
262 
263 	virtio_set_feature_desc(dev, vtballoon_feature_desc);
264 	vtballoon_negotiate_features(sc);
265 
266 	sc->vtballoon_page_frames = contigmalloc(VTBALLOON_PAGES_PER_REQUEST *
267 	    sizeof(uint32_t), M_DEVBUF, M_NOWAIT | M_ZERO, 0, BUS_SPACE_MAXADDR, 16, 0);
268 	if (sc->vtballoon_page_frames == NULL) {
269 		error = ENOMEM;
270 		vtballoon_dprintf(sc, VTBALLOON_ERROR, "cannot allocate page frame request array (error:%d)\n", error);
271 		goto fail;
272 	}
273 	error = vtballoon_alloc_intrs(sc);
274 	if (error) {
275 		vtballoon_dprintf(sc, VTBALLOON_ERROR, "cannot allocate interrupts (error:%d)\n", error);
276 		goto fail;
277 	}
278 
279 	error = vtballoon_alloc_virtqueues(sc);
280 	if (error) {
281 		vtballoon_dprintf(sc, VTBALLOON_ERROR, "cannot allocate virtqueues (error:%d)\n", error);
282 		goto fail;
283 	}
284 
285 	int nrhandlers = virtio_with_feature(sc->vtballoon_dev, VIRTIO_BALLOON_F_STATS_VQ) ? 4 : 3;
286 	struct irqmap info[4];
287 
288 	/* Possible "Virtqueue <-> IRQ" configurations */
289 	switch (sc->vtballoon_nintr) {
290 	case 1:
291 		info[2] = (struct irqmap){0, -1, vtballoon_config_change_intr, "config"};
292 		info[0] = (struct irqmap){0, 0, vtballoon_inflate_vq_intr, "inflate"};
293 		info[1] = (struct irqmap){0, 1, vtballoon_deflate_vq_intr, "deflate"};
294 		info[3] = (struct irqmap){0, 2, vtballoon_stats_vq_intr, "stats"};
295 		break;
296 	case 2:
297 		info[2] = (struct irqmap){1, -1, vtballoon_config_change_intr, "config"};
298 		info[0] = (struct irqmap){0, 0, vtballoon_inflate_vq_intr, "inflate"};
299 		info[1] = (struct irqmap){0, 1, vtballoon_deflate_vq_intr, "deflate"};
300 		info[3] = (struct irqmap){0, 2, vtballoon_stats_vq_intr, "stats"};
301 		break;
302 	case 3:
303 		info[2] = (struct irqmap){2, -1, vtballoon_config_change_intr, "config"};
304 		info[0] = (struct irqmap){0, 0, vtballoon_inflate_vq_intr, "inflate"};
305 		info[1] = (struct irqmap){1, 1, vtballoon_deflate_vq_intr, "deflate"};
306 		info[3] = (struct irqmap){2, 2, vtballoon_stats_vq_intr, "stats"};
307 		break;
308 	case 4:
309 		info[2] = (struct irqmap){3, -1, vtballoon_config_change_intr, "config"};
310 		info[0] = (struct irqmap){0, 0, vtballoon_inflate_vq_intr, "inflate"};
311 		info[1] = (struct irqmap){1, 1, vtballoon_deflate_vq_intr, "deflate"};
312 		info[3] = (struct irqmap){2, 2, vtballoon_stats_vq_intr, "stats"};
313 		break;
314 	default:
315 		vtballoon_dprintf(sc, VTBALLOON_ERROR, "Invalid interrupt vector count: %d\n", sc->vtballoon_nintr);
316 		goto fail;
317 	}
318 	for (i = 0; i < nrhandlers; i++) {
319 		error = virtio_bind_intr(sc->vtballoon_dev, info[i].irq, info[i].idx,
320 		    info[i].handler, sc);
321 		if (error) {
322 			vtballoon_dprintf(sc, VTBALLOON_ERROR, "cannot bind virtqueue '%s' handler to IRQ:%d/%d\n",
323 				info[i].handler_name, info[i].irq, sc->vtballoon_nintr);
324 			goto fail;
325 		}
326 	}
327 
328 	for (i = 0; i < sc->vtballoon_nintr; i++) {
329 		error = virtio_setup_intr(dev, i, VTBALLOON_SLZ(sc));
330 		if (error) {
331 			vtballoon_dprintf(sc, VTBALLOON_ERROR, "cannot setup virtqueue interrupt:%d (error:%d)\n", i, error);
332 			goto fail;
333 		}
334 	}
335 
336 	error = kthread_create(vtballoon_thread, sc, &sc->vtballoon_td, "virtio_balloon");
337 	if (error) {
338 		vtballoon_dprintf(sc, VTBALLOON_ERROR, "cannot create balloon kthread (error:%d)\n", error);
339 		goto fail;
340 	}
341 
342 	virtqueue_enable_intr(sc->vtballoon_inflate_vq);
343 	virtqueue_enable_intr(sc->vtballoon_deflate_vq);
344 
345 	if (virtio_with_feature(sc->vtballoon_dev, VIRTIO_BALLOON_F_STATS_VQ)) {
346 		virtqueue_enable_intr(sc->vtballoon_stats_vq);
347 #if 0		/* enabling this causes a panic, on asserting ASSERT_SERIALIZED(sc) in vtballoon_update_stats */
348 		/*
349 		 * Prime this stats virtqueue with one buffer so the hypervisor can
350 		 * use it to signal us later.
351 		 */
352 		VTBALLOON_ENTER_SLZ(sc);
353 		vtballoon_update_stats(sc);
354 		VTBALLOON_EXIT_SLZ(sc);
355 #endif
356 	}
357 
358 fail:
359 	if (error)
360 		vtballoon_detach(dev);
361 
362 	return (error);
363 }
364 
365 static int
366 vtballoon_detach(device_t dev)
367 {
368 	struct vtballoon_softc *sc;
369 	int i;
370 
371 	sc = device_get_softc(dev);
372 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "\n");
373 
374 	if (sc->vtballoon_td != NULL) {
375 		VTBALLOON_ENTER_SLZ(sc);
376 		sc->vtballoon_flags |= VTBALLOON_FLAG_DETACH;
377 
378 		/* drain */
379 		wakeup_one(sc);
380 		zsleep(sc->vtballoon_td, VTBALLOON_SLZ(sc), 0, "vtbdth", 0);
381 		VTBALLOON_EXIT_SLZ(sc);
382 		sc->vtballoon_td = NULL;
383 	}
384 
385 	lwkt_serialize_handler_disable(VTBALLOON_SLZ(sc));
386 
387 	for (i = 0; i < sc->vtballoon_nintr; i++)
388 		virtio_teardown_intr(dev, i);
389 
390 	if (device_is_attached(dev)) {
391 		vtballoon_pop(sc);
392 		vtballoon_stop(sc);
393 	}
394 
395 	if (sc->vtballoon_page_frames != NULL) {
396 		contigfree(sc->vtballoon_page_frames, VTBALLOON_PAGES_PER_REQUEST *
397 			sizeof(uint32_t), M_DEVBUF);
398 		sc->vtballoon_page_frames = NULL;
399 	}
400 	return (0);
401 }
402 
403 static void
404 vtballoon_negotiate_features(struct vtballoon_softc *sc)
405 {
406 	device_t dev;
407 	uint64_t features;
408 
409 	dev = sc->vtballoon_dev;
410 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "\n");
411 	features = virtio_negotiate_features(dev, VTBALLOON_FEATURES);
412 	sc->vtballoon_features = features;
413 }
414 
415 static int vtballoon_alloc_intrs(struct vtballoon_softc *sc)
416 {
417 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "\n");
418 	int cnt, error;
419 	int intrcount = virtio_intr_count(sc->vtballoon_dev);
420 	int use_config = 1;
421 
422 	intrcount = imin(intrcount, VTBALLOON_MAX_INTERRUPTS);
423 	if (intrcount < 1)
424 		return (ENXIO);
425 
426 	cnt = intrcount;
427 	error = virtio_intr_alloc(sc->vtballoon_dev, &cnt, use_config, NULL);
428 	if (error != 0) {
429 		virtio_intr_release(sc->vtballoon_dev);
430 		return (error);
431 	}
432 	sc->vtballoon_nintr = cnt;
433 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "%d Interrupts Allocated\n", sc->vtballoon_nintr);
434 	return (0);
435 }
436 
437 static int
438 vtballoon_alloc_virtqueues(struct vtballoon_softc *sc)
439 {
440 	device_t dev;
441 	struct vq_alloc_info vq_info[3];
442 	int nvqs;
443 
444 	dev = sc->vtballoon_dev;
445 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "\n");
446 	nvqs = 2;
447 
448 	VQ_ALLOC_INFO_INIT(&vq_info[0], 0, &sc->vtballoon_inflate_vq,
449 		"%s inflate", device_get_nameunit(dev));
450 
451 	VQ_ALLOC_INFO_INIT(&vq_info[1], 0, &sc->vtballoon_deflate_vq,
452 		"%s deflate", device_get_nameunit(dev));
453 
454 	if (virtio_with_feature(sc->vtballoon_dev, VIRTIO_BALLOON_F_STATS_VQ)) {
455 		VQ_ALLOC_INFO_INIT(&vq_info[2], 0, &sc->vtballoon_stats_vq,
456 			"%s stats", device_get_nameunit(dev));
457 		nvqs = 3;
458 	}
459 	return (virtio_alloc_virtqueues(dev, nvqs, vq_info));
460 }
461 
462 static void
463 vtballoon_config_change_intr(void *arg)
464 {
465 	struct vtballoon_softc *sc = arg;
466 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "\n");
467 	ASSERT_SERIALIZED(VTBALLOON_SLZ(sc));
468 	wakeup_one(sc);
469 }
470 
471 static inline void
472 vtballoon_update_stat(struct vtballoon_softc *sc, int idx,
473 	uint16_t tag, uint64_t val)
474 {
475 	KASSERT(idx >= VTBALLOON_S_NR, ("Stats index out of bounds"));
476 	/*
477 	 * XXX: Required for endianess in the future
478 	 * sc->vtballoon_stats[idx].tag = virtio_is_little_endian(sc->vtballoon_dev) ? le16toh(tag) : tag;
479 	 * sc->vtballoon_stats[idx].val = virtio_is_little_endian(sc->vtballoon_dev) ? le64toh(val) : val;
480 	 * at the moment virtio balloon is always little endian.
481 	 *
482 	 */
483 	sc->vtballoon_stats[idx].tag = le16toh(tag);
484 	sc->vtballoon_stats[idx].val = le64toh(val);
485 
486 }
487 
488 /*
489  * collect guest side statistics
490  *
491  * XXX: am i using the correct memory and pagefault values
492  */
493 static unsigned int collect_balloon_stats(struct vtballoon_softc *sc)
494 {
495 	#define pages_to_bytes(x) ((uint64_t)(x) << PAGE_SHIFT)
496 	unsigned int idx = 0;
497 	struct vmtotal total;
498 	struct vmmeter vmm;
499 	struct vmstats vms;
500 	size_t vmt_size = sizeof(total);
501 	size_t vmm_size = sizeof(vmm);
502 	size_t vms_size = sizeof(vms);
503 
504 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "Updating Stats Buffer\n");
505 	if (!kernel_sysctlbyname("vm.vmtotal", &total, &vmt_size, NULL, 0, NULL)) {
506 		/* Total amount of free memory )*/
507 		vtballoon_update_stat(sc, idx++, VTBALLOON_S_MEMFREE,
508 					pages_to_bytes(total.t_rm - total.t_arm));
509 		/* Total amount of memory */
510 		vtballoon_update_stat(sc, idx++, VTBALLOON_S_MEMTOT,
511 					pages_to_bytes(total.t_rm));
512 		/* Available memory as in /proc	*/
513 		vtballoon_update_stat(sc, idx++, VTBALLOON_S_AVAIL,
514 					pages_to_bytes(total.t_arm));
515 	}
516 	if (!kernel_sysctlbyname("vm.vmstats", &vms, &vms_size, NULL, 0, NULL)) {
517 		/* Disk caches */
518 		vtballoon_update_stat(sc, idx++, VTBALLOON_S_CACHES,
519 					pages_to_bytes(vms.v_cache_count));
520 	}
521 	if (!kernel_sysctlbyname("vm.vmmeter", &vmm, &vmm_size, NULL, 0, NULL)) {
522 		/* Amount of memory swapped in */
523 		vtballoon_update_stat(sc, idx++, VTBALLOON_S_SWAP_IN,
524 					pages_to_bytes(vmm.v_swappgsin));
525 		/* Amount of memory swapped out */
526 		vtballoon_update_stat(sc, idx++, VTBALLOON_S_SWAP_OUT,
527 					pages_to_bytes(vmm.v_swappgsout));
528 		/* Number of major faults */
529 		vtballoon_update_stat(sc, idx++, VTBALLOON_S_MAJFLT,
530 					vmm.v_vm_faults);
531 		/* Number of minor faults */
532 		vtballoon_update_stat(sc, idx++, VTBALLOON_S_MINFLT,
533 					vmm.v_intrans);
534 	}
535 
536 	if (sc->vtballoon_debug & VTBALLOON_TRACE)  {
537 		static const char *vt_balloon_names[]=VTBALLOON_S_NAMES;
538 		int i;
539 		for (i=0; i < idx; i++) {
540 			kprintf("\t%s = %lu\n", vt_balloon_names[sc->vtballoon_stats[i].tag], sc->vtballoon_stats[i].val);
541 		}
542 	}
543 
544 	return idx;
545 }
546 
547 static void
548 vtballoon_update_stats(struct vtballoon_softc *sc)
549 {
550 	struct virtqueue *vq = sc->vtballoon_stats_vq;
551 
552 	ASSERT_SERIALIZED(VTBALLOON_SLZ(sc));
553 
554 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "Stats Requested\n");
555 
556 	struct sglist sg;
557 	struct sglist_seg segs[1];
558 	unsigned int num_stats;
559 	int error;
560 
561 	num_stats = collect_balloon_stats(sc);
562 
563 	sglist_init(&sg, 1, segs);
564 	error = sglist_append(&sg, sc->vtballoon_stats, sizeof(sc->vtballoon_stats[0]) * num_stats);
565 	KASSERT(error == 0, ("error adding page frames to sglist"));
566 
567 	error = virtqueue_enqueue(vq, vq, &sg, 1, 0);
568 	KASSERT(error == 0, ("error enqueuing page frames to virtqueue"));
569 	virtqueue_notify(sc->vtballoon_stats_vq, NULL);
570 }
571 
572 /*
573  * While most virtqueues communicate guest-initiated requests to the hypervisor,
574  * the stats queue operates in reverse.  The driver(host) initializes the virtqueue
575  * with a single buffer. From that point forward, all conversations consist of
576  * a hypervisor request (a call to this function) which directs us to refill
577  * the virtqueue with a fresh stats buffer. Since stats collection can sleep,
578  * we delegate the job to the vtballoon_thread which will do the actual stats
579  * collecting work.
580  */
581 static void
582 vtballoon_stats_vq_intr(void *arg)
583 {
584 	struct vtballoon_softc *sc = arg;
585 	struct virtqueue *vq = sc->vtballoon_stats_vq;
586 
587 	ASSERT_SERIALIZED(VTBALLOON_SLZ(sc));
588 	if (sc->vtballoon_update_stats || !virtqueue_pending(vq))
589 		return;
590 
591 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "Ballooon Stats Requested\n");
592 	sc->vtballoon_update_stats = true;
593 	wakeup_one(sc);
594 	virtqueue_dequeue(vq, NULL);
595 }
596 
597 static void
598 vtballoon_inflate_vq_intr(void *arg)
599 {
600 	struct vtballoon_softc *sc = arg;
601 	struct virtqueue *vq = sc->vtballoon_inflate_vq;
602 	ASSERT_SERIALIZED(VTBALLOON_SLZ(sc));
603 	if (!virtqueue_pending(vq))
604 		return;
605 	wakeup_one(sc);
606 }
607 
608 static void
609 vtballoon_deflate_vq_intr(void *arg)
610 {
611 	struct vtballoon_softc *sc = arg;
612 	struct virtqueue *vq = sc->vtballoon_deflate_vq;
613 	ASSERT_SERIALIZED(VTBALLOON_SLZ(sc));
614 	if (!virtqueue_pending(vq))
615 		return;
616 	wakeup_one(sc);
617 }
618 
619 static void
620 vtballoon_inflate(struct vtballoon_softc *sc, int npages)
621 {
622 	struct virtqueue *vq;
623 
624 	vm_page_t m;
625 	int i;
626 
627 	vq = sc->vtballoon_inflate_vq;
628 
629 	if (npages > VTBALLOON_PAGES_PER_REQUEST)
630 		npages = VTBALLOON_PAGES_PER_REQUEST;
631 
632 	for (i = 0; i < npages; i++) {
633 		if ((m = vtballoon_alloc_page(sc)) == NULL) {
634 			/* First allocate usign VTBALLOON_REGULAR_ALLOC and fall back to VTBALLOON_LOWMEM_ALLOC
635 			 * when the guest is under severe memory pressure. Quickly decrease the
636 			 * allocation rate, allowing the system to swap out pages.
637 			 */
638 			sc->vtballoon_pagereq = VM_ALLOC_SYSTEM | VM_ALLOC_INTERRUPT;
639 			sc->vtballoon_timeout = VTBALLOON_LOWMEM_TIMEOUT;
640 			break;
641 		}
642 
643 		sc->vtballoon_page_frames[i] =
644 		    VM_PAGE_TO_PHYS(m) >> VIRTIO_BALLOON_PFN_SHIFT;
645 
646 		KASSERT(m->queue == PQ_NONE,
647 		    ("%s: allocated page %p on queue", __func__, m));
648 		TAILQ_INSERT_TAIL(&sc->vtballoon_pages, m, pageq);
649 	}
650 
651 	if (i > 0)
652 		vtballoon_send_page_frames(sc, vq, i);
653 }
654 
655 static void
656 vtballoon_deflate(struct vtballoon_softc *sc, int npages)
657 {
658 	TAILQ_HEAD(, vm_page) free_pages;
659 	struct virtqueue *vq;
660 	vm_page_t m;
661 	int i;
662 
663 	vq = sc->vtballoon_deflate_vq;
664 	TAILQ_INIT(&free_pages);
665 
666 	if (npages > VTBALLOON_PAGES_PER_REQUEST)
667 		npages = VTBALLOON_PAGES_PER_REQUEST;
668 
669 	for (i = 0; i < npages; i++) {
670 		m = TAILQ_FIRST(&sc->vtballoon_pages);
671 		KASSERT(m != NULL, ("%s: no more pages to deflate", __func__));
672 
673 		sc->vtballoon_page_frames[i] =
674 		    VM_PAGE_TO_PHYS(m) >> VIRTIO_BALLOON_PFN_SHIFT;
675 
676 		TAILQ_REMOVE(&sc->vtballoon_pages, m, pageq);
677 		TAILQ_INSERT_TAIL(&free_pages, m, pageq);
678 	}
679 
680 	if (i > 0) {
681 		/*
682 		 * Note that if virtio VIRTIO_BALLOON_F_MUST_TELL_HOST
683 		 * feature is true, we *have* to tell host first
684 		 * before freeing the pages.
685 		 */
686 		vtballoon_send_page_frames(sc, vq, i);
687 
688 		while ((m = TAILQ_FIRST(&free_pages)) != NULL) {
689 			TAILQ_REMOVE(&free_pages, m, pageq);
690 			vtballoon_free_page(sc, m);
691 		}
692 	}
693 
694 	KASSERT((TAILQ_EMPTY(&sc->vtballoon_pages) &&
695 	    sc->vtballoon_current_npages == 0) ||
696 	    (!TAILQ_EMPTY(&sc->vtballoon_pages) &&
697 	    sc->vtballoon_current_npages != 0),
698 	    ("%s: bogus page count %d", __func__,
699 	    sc->vtballoon_current_npages));
700 }
701 
702 static void
703 vtballoon_send_page_frames(struct vtballoon_softc *sc, struct virtqueue *vq,
704     int npages)
705 {
706 	struct sglist sg;
707 	struct sglist_seg segs[1];
708 	void *c;
709 	int error;
710 
711 	sglist_init(&sg, 1, segs);
712 
713 	error = sglist_append(&sg, sc->vtballoon_page_frames,
714 	    npages * sizeof(uint32_t));
715 	KASSERT(error == 0, ("error adding page frames to sglist"));
716 
717 	error = virtqueue_enqueue(vq, vq, &sg, 1, 0);
718 	KASSERT(error == 0, ("error enqueuing page frames to virtqueue"));
719 	virtqueue_notify(vq, NULL);
720 
721 	/*
722 	 * Inflate and deflate operations are done synchronously. The
723 	 * interrupt handler will wake us up.
724 	 */
725 	VTBALLOON_ENTER_SLZ(sc);
726 	while ((c = virtqueue_dequeue(vq, NULL)) == NULL) {
727 		zsleep(sc, VTBALLOON_SLZ(sc), 0, "vtbspf", 0);
728 	}
729 	VTBALLOON_EXIT_SLZ(sc);
730 
731 	KASSERT(c == vq, ("unexpected balloon operation response"));
732 }
733 
734 static void
735 vtballoon_pop(struct vtballoon_softc *sc)
736 {
737 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "Popping\n");
738 
739 	while (!TAILQ_EMPTY(&sc->vtballoon_pages))
740 		vtballoon_deflate(sc, sc->vtballoon_current_npages);
741 }
742 
743 static void
744 vtballoon_stop(struct vtballoon_softc *sc)
745 {
746 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "Stopping\n");
747 
748 	virtqueue_disable_intr(sc->vtballoon_inflate_vq);
749 	virtqueue_disable_intr(sc->vtballoon_deflate_vq);
750 /*
751 	if (virtio_with_feature(sc->vtballoon_dev, VIRTIO_BALLOON_F_STATS_VQ)) {
752 		virtqueue_disable_intr(sc->vtballoon_stats_vq);
753 	}
754 */
755 	virtio_stop(sc->vtballoon_dev);
756 }
757 
758 static vm_page_t
759 vtballoon_alloc_page(struct vtballoon_softc *sc)
760 {
761 	vm_page_t m;
762 
763 	m = vm_page_alloc(NULL, 0, sc->vtballoon_pagereq);
764 	if (m != NULL)
765 		sc->vtballoon_current_npages++;
766 
767 	return (m);
768 }
769 
770 static void
771 vtballoon_free_page(struct vtballoon_softc *sc, vm_page_t m)
772 {
773 	vm_page_free_toq(m);
774 	sc->vtballoon_current_npages--;
775 }
776 
777 static uint32_t
778 vtballoon_desired_size(struct vtballoon_softc *sc)
779 {
780 	uint32_t desired;
781 
782 	desired = virtio_read_dev_config_4(sc->vtballoon_dev,
783 	    offsetof(struct virtio_balloon_config, num_pages));
784 
785 	return (le32toh(desired));
786 }
787 
788 static void
789 vtballoon_update_size(struct vtballoon_softc *sc)
790 {
791 	virtio_write_dev_config_4(sc->vtballoon_dev,
792 	    offsetof(struct virtio_balloon_config, actual),
793 	    htole32(sc->vtballoon_current_npages));
794 }
795 
796 static int
797 vtballoon_sleep(struct vtballoon_softc *sc)
798 {
799 	int rc, timeout;
800 	uint32_t current, desired;
801 
802 	rc = 0;
803 	current = sc->vtballoon_current_npages;
804 	sc->vtballoon_pagereq = VM_ALLOC_NORMAL | VM_ALLOC_INTERRUPT;
805 
806 	VTBALLOON_ENTER_SLZ(sc);
807 	for (;;) {
808 		if (sc->vtballoon_flags & VTBALLOON_FLAG_DETACH) {
809 			rc = 1;
810 			break;
811 		}
812 
813 		desired = vtballoon_desired_size(sc);
814 		if (desired != sc->vtballoon_desired_npages)
815 			vtballoon_dprintf(sc, VTBALLOON_DEBUG, "balloon %s %d -> %d (4K pages)\n",
816 				desired < sc->vtballoon_desired_npages ? "deflating" : "inflating",
817 				current, desired);
818 
819 		sc->vtballoon_desired_npages = desired;
820 
821 		/*
822 		 * If given, use non-zero timeout on the first time through
823 		 * the loop. On subsequent times, timeout will be zero so
824 		 * we will reevaluate the desired size of the balloon and
825 		 * break out to retry if needed.
826 		 */
827 		timeout = sc->vtballoon_timeout;
828 		sc->vtballoon_timeout = 0;
829 
830 		if (current > desired)
831 			break;
832 		else if (current < desired && timeout == 0)
833 			break;
834 		else if (sc->vtballoon_update_stats)
835 			break;
836 		else if (!timeout)
837 			vtballoon_dprintf(sc, VTBALLOON_TRACE, "balloon %d (4K pages) reached\n", current);
838 
839 		zsleep(sc, VTBALLOON_SLZ(sc), 0, "vtbslp", timeout);
840 	}
841 	VTBALLOON_EXIT_SLZ(sc);
842 
843 	return (rc);
844 }
845 
846 static void
847 vtballoon_thread(void *arg)
848 {
849 	struct vtballoon_softc *sc = arg;
850 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "Thread started.\n");
851 
852 	uint32_t current, desired;
853 	for (;;) {
854 		if (vtballoon_sleep(sc) != 0)
855 			break;
856 
857 		current = sc->vtballoon_current_npages;
858 		desired = sc->vtballoon_desired_npages;
859 
860 		if (desired != current) {
861 			if (desired > current)
862 				vtballoon_inflate(sc, desired - current);
863 			else
864 				vtballoon_deflate(sc, current - desired);
865 
866 			vtballoon_update_size(sc);
867 		}
868 		if (sc->vtballoon_update_stats) {
869 			vtballoon_update_stats(sc);
870 			sc->vtballoon_update_stats = false;
871 		}
872 	}
873 
874 	kthread_exit();
875 }
876 
877 static void
878 vtballoon_get_tunables(struct vtballoon_softc *sc)
879 {
880 	char tmpstr[64];
881 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "\n");
882 
883 	TUNABLE_INT_FETCH("hw.vtballoon.debug_level", &sc->vtballoon_debug);
884 
885 	ksnprintf(tmpstr, sizeof(tmpstr), "dev.vtballoon.%d.debug_level",
886 	    device_get_unit(sc->vtballoon_dev));
887 	TUNABLE_INT_FETCH(tmpstr, &sc->vtballoon_debug);
888 }
889 
890 static void
891 vtballoon_add_sysctl(struct vtballoon_softc *sc)
892 {
893 	device_t dev;
894 	struct sysctl_ctx_list *ctx;
895 	struct sysctl_oid *tree;
896 	struct sysctl_oid_list *child;
897 
898 	dev = sc->vtballoon_dev;
899 	vtballoon_dprintf(sc, VTBALLOON_TRACE, "\n");
900 
901 	ctx = device_get_sysctl_ctx(dev);
902 	tree = device_get_sysctl_tree(dev);
903 	child = SYSCTL_CHILDREN(tree);
904 
905 	SYSCTL_ADD_INT(ctx, child, OID_AUTO, "debug_level",
906 	    CTLFLAG_RW, &sc->vtballoon_debug, 0,
907 	    "Debug level");
908 
909 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "desired",
910 	    CTLFLAG_RD, &sc->vtballoon_desired_npages, sizeof(uint32_t),
911 	    "Desired balloon size in pages");
912 
913 	SYSCTL_ADD_UINT(ctx, child, OID_AUTO, "current",
914 	    CTLFLAG_RD, &sc->vtballoon_current_npages, sizeof(uint32_t),
915 	    "Current balloon size in pages");
916 }
917