xref: /openbsd/usr.sbin/vmd/pci.c (revision a4052f0f)
1 /*	$OpenBSD: pci.c,v 1.35 2024/10/02 17:05:56 dv Exp $	*/
2 
3 /*
4  * Copyright (c) 2015 Mike Larkin <mlarkin@openbsd.org>
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  */
18 
19 #include <sys/types.h>
20 
21 #include <dev/pci/pcireg.h>
22 #include <dev/pci/pcidevs.h>
23 #include <dev/vmm/vmm.h>
24 
25 #include <string.h>
26 #include <unistd.h>
27 
28 #include "vmd.h"
29 #include "pci.h"
30 #include "atomicio.h"
31 
32 struct pci pci;
33 
34 extern struct vmd_vm current_vm;
35 extern char *__progname;
36 
37 /* PIC IRQs, assigned to devices in order */
38 const uint8_t pci_pic_irqs[PCI_MAX_PIC_IRQS] = {3, 5, 6, 7, 9, 10, 11, 12,
39     14, 15};
40 
41 /*
42  * pci_add_bar
43  *
44  * Adds a BAR for the PCI device 'id'. On access, 'barfn' will be
45  * called, and passed 'cookie' as an identifier.
46  *
47  * BARs are fixed size, meaning all I/O BARs requested have the
48  * same size and all MMIO BARs have the same size.
49  *
50  * Parameters:
51  *  id: PCI device to add the BAR to (local count, eg if id == 4,
52  *      this BAR is to be added to the VM's 5th PCI device)
53  *  type: type of the BAR to add (PCI_MAPREG_TYPE_xxx)
54  *  barfn: callback function invoked on BAR access
55  *  cookie: cookie passed to barfn on access
56  *
57  * Returns 0 if the BAR was added successfully, 1 otherwise.
58  */
59 int
pci_add_bar(uint8_t id,uint32_t type,void * barfn,void * cookie)60 pci_add_bar(uint8_t id, uint32_t type, void *barfn, void *cookie)
61 {
62 	uint8_t bar_reg_idx, bar_ct;
63 
64 	/* Check id */
65 	if (id >= pci.pci_dev_ct)
66 		return (1);
67 
68 	/* Can only add PCI_MAX_BARS BARs to any device */
69 	bar_ct = pci.pci_devices[id].pd_bar_ct;
70 	if (bar_ct >= PCI_MAX_BARS)
71 		return (1);
72 
73 	/* Compute BAR address and add */
74 	bar_reg_idx = (PCI_MAPREG_START + (bar_ct * 4)) / 4;
75 	if (type == PCI_MAPREG_TYPE_MEM) {
76 		if (pci.pci_next_mmio_bar >= PCI_MMIO_BAR_END)
77 			return (1);
78 
79 		pci.pci_devices[id].pd_cfg_space[bar_reg_idx] =
80 		    PCI_MAPREG_MEM_ADDR(pci.pci_next_mmio_bar);
81 		pci.pci_next_mmio_bar += VM_PCI_MMIO_BAR_SIZE;
82 		pci.pci_devices[id].pd_barfunc[bar_ct] = barfn;
83 		pci.pci_devices[id].pd_bar_cookie[bar_ct] = cookie;
84 		pci.pci_devices[id].pd_bartype[bar_ct] = PCI_BAR_TYPE_MMIO;
85 		pci.pci_devices[id].pd_barsize[bar_ct] = VM_PCI_MMIO_BAR_SIZE;
86 		pci.pci_devices[id].pd_bar_ct++;
87 	}
88 #ifdef __amd64__
89 	else if (type == PCI_MAPREG_TYPE_IO) {
90 		if (pci.pci_next_io_bar >= VM_PCI_IO_BAR_END)
91 			return (1);
92 
93 		pci.pci_devices[id].pd_cfg_space[bar_reg_idx] =
94 		    PCI_MAPREG_IO_ADDR(pci.pci_next_io_bar) |
95 		    PCI_MAPREG_TYPE_IO;
96 		pci.pci_next_io_bar += VM_PCI_IO_BAR_SIZE;
97 		pci.pci_devices[id].pd_barfunc[bar_ct] = barfn;
98 		pci.pci_devices[id].pd_bar_cookie[bar_ct] = cookie;
99 		DPRINTF("%s: adding pci bar cookie for dev %d bar %d = %p",
100 		    __progname, id, bar_ct, cookie);
101 		pci.pci_devices[id].pd_bartype[bar_ct] = PCI_BAR_TYPE_IO;
102 		pci.pci_devices[id].pd_barsize[bar_ct] = VM_PCI_IO_BAR_SIZE;
103 		pci.pci_devices[id].pd_bar_ct++;
104 	}
105 #endif /* __amd64__ */
106 
107 	return (0);
108 }
109 
110 int
pci_set_bar_fn(uint8_t id,uint8_t bar_ct,void * barfn,void * cookie)111 pci_set_bar_fn(uint8_t id, uint8_t bar_ct, void *barfn, void *cookie)
112 {
113 	/* Check id */
114 	if (id >= pci.pci_dev_ct)
115 		return (1);
116 
117 	if (bar_ct >= PCI_MAX_BARS)
118 		return (1);
119 
120 	pci.pci_devices[id].pd_barfunc[bar_ct] = barfn;
121 	pci.pci_devices[id].pd_bar_cookie[bar_ct] = cookie;
122 
123 	return (0);
124 }
125 
126 /*
127  * pci_get_dev_irq
128  *
129  * Returns the IRQ for the specified PCI device
130  *
131  * Parameters:
132  *  id: PCI device id to return IRQ for
133  *
134  * Return values:
135  *  The IRQ for the device, or 0xff if no device IRQ assigned
136  */
137 uint8_t
pci_get_dev_irq(uint8_t id)138 pci_get_dev_irq(uint8_t id)
139 {
140 	if (pci.pci_devices[id].pd_int)
141 		return pci.pci_devices[id].pd_irq;
142 	else
143 		return 0xFF;
144 }
145 
146 /*
147  * pci_add_device
148  *
149  * Adds a PCI device to the guest VM defined by the supplied parameters.
150  *
151  * Parameters:
152  *  id: the new PCI device ID (0 .. PCI_CONFIG_MAX_DEV)
153  *  vid: PCI VID of the new device
154  *  pid: PCI PID of the new device
155  *  class: PCI 'class' of the new device
156  *  subclass: PCI 'subclass' of the new device
157  *  subsys_vid: subsystem VID of the new device
158  *  subsys_id: subsystem ID of the new device
159  *  irq_needed: 1 if an IRQ should be assigned to this PCI device, 0 otherwise
160  *  csfunc: PCI config space callback function when the guest VM accesses
161  *      CS of this PCI device
162  *
163  * Return values:
164  *  0: the PCI device was added successfully. The PCI device ID is in 'id'.
165  *  1: the PCI device addition failed.
166  */
167 int
pci_add_device(uint8_t * id,uint16_t vid,uint16_t pid,uint8_t class,uint8_t subclass,uint16_t subsys_vid,uint16_t subsys_id,uint8_t irq_needed,pci_cs_fn_t csfunc)168 pci_add_device(uint8_t *id, uint16_t vid, uint16_t pid, uint8_t class,
169     uint8_t subclass, uint16_t subsys_vid, uint16_t subsys_id,
170     uint8_t irq_needed, pci_cs_fn_t csfunc)
171 {
172 	/* Exceeded max devices? */
173 	if (pci.pci_dev_ct >= PCI_CONFIG_MAX_DEV)
174 		return (1);
175 
176 	/* Exceeded max IRQs? */
177 	/* XXX we could share IRQs ... */
178 	if (pci.pci_next_pic_irq >= PCI_MAX_PIC_IRQS && irq_needed)
179 		return (1);
180 
181 	*id = pci.pci_dev_ct;
182 
183 	pci.pci_devices[*id].pd_vid = vid;
184 	pci.pci_devices[*id].pd_did = pid;
185 	pci.pci_devices[*id].pd_class = class;
186 	pci.pci_devices[*id].pd_subclass = subclass;
187 	pci.pci_devices[*id].pd_subsys_vid = subsys_vid;
188 	pci.pci_devices[*id].pd_subsys_id = subsys_id;
189 
190 	pci.pci_devices[*id].pd_csfunc = csfunc;
191 
192 	if (irq_needed) {
193 		pci.pci_devices[*id].pd_irq =
194 		    pci_pic_irqs[pci.pci_next_pic_irq];
195 		pci.pci_devices[*id].pd_int = 1;
196 		pci.pci_next_pic_irq++;
197 		DPRINTF("assigned irq %d to pci dev %d",
198 		    pci.pci_devices[*id].pd_irq, *id);
199 		intr_toggle_el(&current_vm, pci.pci_devices[*id].pd_irq, 1);
200 	}
201 
202 	pci.pci_dev_ct ++;
203 
204 	return (0);
205 }
206 
207 /*
208  * pci_init
209  *
210  * Initializes the PCI subsystem for the VM by adding a PCI host bridge
211  * as the first PCI device.
212  */
213 void
pci_init(void)214 pci_init(void)
215 {
216 	uint8_t id;
217 
218 	memset(&pci, 0, sizeof(pci));
219 	pci.pci_next_mmio_bar = PCI_MMIO_BAR_BASE;
220 
221 #ifdef __amd64__
222 	pci.pci_next_io_bar = VM_PCI_IO_BAR_BASE;
223 #endif /* __amd64__ */
224 
225 	if (pci_add_device(&id, PCI_VENDOR_OPENBSD, PCI_PRODUCT_OPENBSD_PCHB,
226 	    PCI_CLASS_BRIDGE, PCI_SUBCLASS_BRIDGE_HOST,
227 	    PCI_VENDOR_OPENBSD, 0, 0, NULL)) {
228 		log_warnx("%s: can't add PCI host bridge", __progname);
229 		return;
230 	}
231 }
232 
233 #ifdef __amd64__
234 void
pci_handle_address_reg(struct vm_run_params * vrp)235 pci_handle_address_reg(struct vm_run_params *vrp)
236 {
237 	struct vm_exit *vei = vrp->vrp_exit;
238 
239 	/*
240 	 * vei_dir == VEI_DIR_OUT : out instruction
241 	 *
242 	 * The guest wrote to the address register.
243 	 */
244 	if (vei->vei.vei_dir == VEI_DIR_OUT) {
245 		get_input_data(vei, &pci.pci_addr_reg);
246 	} else {
247 		/*
248 		 * vei_dir == VEI_DIR_IN : in instruction
249 		 *
250 		 * The guest read the address register
251 		 */
252 		set_return_data(vei, pci.pci_addr_reg);
253 	}
254 }
255 
256 uint8_t
pci_handle_io(struct vm_run_params * vrp)257 pci_handle_io(struct vm_run_params *vrp)
258 {
259 	int i, j, k, l;
260 	uint16_t reg, b_hi, b_lo;
261 	pci_iobar_fn_t fn;
262 	struct vm_exit *vei = vrp->vrp_exit;
263 	uint8_t intr, dir;
264 
265 	k = -1;
266 	l = -1;
267 	reg = vei->vei.vei_port;
268 	dir = vei->vei.vei_dir;
269 	intr = 0xFF;
270 
271 	for (i = 0 ; i < pci.pci_dev_ct ; i++) {
272 		for (j = 0 ; j < pci.pci_devices[i].pd_bar_ct; j++) {
273 			b_lo = PCI_MAPREG_IO_ADDR(pci.pci_devices[i].pd_bar[j]);
274 			b_hi = b_lo + VM_PCI_IO_BAR_SIZE;
275 			if (reg >= b_lo && reg < b_hi) {
276 				if (pci.pci_devices[i].pd_barfunc[j]) {
277 					k = j;
278 					l = i;
279 				}
280 			}
281 		}
282 	}
283 
284 	if (k >= 0 && l >= 0) {
285 		fn = (pci_iobar_fn_t)pci.pci_devices[l].pd_barfunc[k];
286 		if (fn(vei->vei.vei_dir, reg -
287 		    PCI_MAPREG_IO_ADDR(pci.pci_devices[l].pd_bar[k]),
288 		    &vei->vei.vei_data, &intr,
289 		    pci.pci_devices[l].pd_bar_cookie[k],
290 		    vei->vei.vei_size)) {
291 			log_warnx("%s: pci i/o access function failed",
292 			    __progname);
293 		}
294 	} else {
295 		DPRINTF("%s: no pci i/o function for reg 0x%llx (dir=%d "
296 		    "guest %%rip=0x%llx", __progname, (uint64_t)reg, dir,
297 		    vei->vrs.vrs_gprs[VCPU_REGS_RIP]);
298 		/* Reads from undefined ports return 0xFF */
299 		if (dir == VEI_DIR_IN)
300 			set_return_data(vei, 0xFFFFFFFF);
301 	}
302 
303 	if (intr != 0xFF) {
304 		intr = pci.pci_devices[l].pd_irq;
305 	}
306 
307 	return (intr);
308 }
309 
310 void
pci_handle_data_reg(struct vm_run_params * vrp)311 pci_handle_data_reg(struct vm_run_params *vrp)
312 {
313 	struct vm_exit *vei = vrp->vrp_exit;
314 	uint8_t b, d, f, o, baridx, ofs, sz;
315 	int ret;
316 	pci_cs_fn_t csfunc;
317 
318 	/* abort if the address register is wack */
319 	if (!(pci.pci_addr_reg & PCI_MODE1_ENABLE)) {
320 		/* if read, return FFs */
321 		if (vei->vei.vei_dir == VEI_DIR_IN)
322 			set_return_data(vei, 0xFFFFFFFF);
323 		log_warnx("invalid address register during pci read: "
324 		    "0x%llx", (uint64_t)pci.pci_addr_reg);
325 		return;
326 	}
327 
328 	/* I/Os to 0xCFC..0xCFF are permitted */
329 	ofs = vei->vei.vei_port - 0xCFC;
330 	sz = vei->vei.vei_size;
331 
332 	b = (pci.pci_addr_reg >> 16) & 0xff;
333 	d = (pci.pci_addr_reg >> 11) & 0x1f;
334 	f = (pci.pci_addr_reg >> 8) & 0x7;
335 	o = (pci.pci_addr_reg & 0xfc);
336 
337 	csfunc = pci.pci_devices[d].pd_csfunc;
338 	if (csfunc != NULL) {
339 		ret = csfunc(vei->vei.vei_dir, (o / 4), &vei->vei.vei_data);
340 		if (ret)
341 			log_warnx("cfg space access function failed for "
342 			    "pci device %d", d);
343 		return;
344 	}
345 
346 	/* No config space function, fallback to default simple r/w impl. */
347 
348 	o += ofs;
349 
350 	/*
351 	 * vei_dir == VEI_DIR_OUT : out instruction
352 	 *
353 	 * The guest wrote to the config space location denoted by the current
354 	 * value in the address register.
355 	 */
356 	if (vei->vei.vei_dir == VEI_DIR_OUT) {
357 		if ((o >= 0x10 && o <= 0x24) &&
358 		    vei->vei.vei_data == 0xffffffff) {
359 			/*
360 			 * Compute BAR index:
361 			 * o = 0x10 -> baridx = 0
362 			 * o = 0x14 -> baridx = 1
363 			 * o = 0x18 -> baridx = 2
364 			 * o = 0x1c -> baridx = 3
365 			 * o = 0x20 -> baridx = 4
366 			 * o = 0x24 -> baridx = 5
367 			 */
368 			baridx = (o / 4) - 4;
369 			if (baridx < pci.pci_devices[d].pd_bar_ct)
370 				vei->vei.vei_data = 0xfffff000;
371 			else
372 				vei->vei.vei_data = 0;
373 		}
374 
375 		/* IOBAR registers must have bit 0 set */
376 		if (o >= 0x10 && o <= 0x24) {
377 			baridx = (o / 4) - 4;
378 			if (baridx < pci.pci_devices[d].pd_bar_ct &&
379 			    pci.pci_devices[d].pd_bartype[baridx] ==
380 			    PCI_BAR_TYPE_IO)
381 				vei->vei.vei_data |= 1;
382 		}
383 
384 		/*
385 		 * Discard writes to "option rom base address" as none of our
386 		 * emulated devices have PCI option roms. Accept any other
387 		 * writes and copy data to config space registers.
388 		 */
389 		if (o != PCI_EXROMADDR_0)
390 			get_input_data(vei,
391 			    &pci.pci_devices[d].pd_cfg_space[o / 4]);
392 	} else {
393 		/*
394 		 * vei_dir == VEI_DIR_IN : in instruction
395 		 *
396 		 * The guest read from the config space location determined by
397 		 * the current value in the address register.
398 		 */
399 		if (d > pci.pci_dev_ct || b > 0 || f > 0)
400 			set_return_data(vei, 0xFFFFFFFF);
401 		else {
402 			switch (sz) {
403 			case 4:
404 				set_return_data(vei,
405 				    pci.pci_devices[d].pd_cfg_space[o / 4]);
406 				break;
407 			case 2:
408 				if (ofs == 0)
409 					set_return_data(vei, pci.pci_devices[d].
410 					    pd_cfg_space[o / 4]);
411 				else
412 					set_return_data(vei, pci.pci_devices[d].
413 					    pd_cfg_space[o / 4] >> 16);
414 				break;
415 			case 1:
416 				set_return_data(vei, pci.pci_devices[d].
417 				    pd_cfg_space[o / 4] >> (ofs * 8));
418 				break;
419 			}
420 		}
421 	}
422 }
423 #endif /* __amd64__ */
424 
425 int
pci_dump(int fd)426 pci_dump(int fd)
427 {
428 	log_debug("%s: sending pci", __func__);
429 	if (atomicio(vwrite, fd, &pci, sizeof(pci)) != sizeof(pci)) {
430 		log_warnx("%s: error writing pci to fd", __func__);
431 		return (-1);
432 	}
433 	return (0);
434 }
435 
436 int
pci_restore(int fd)437 pci_restore(int fd)
438 {
439 	log_debug("%s: receiving pci", __func__);
440 	if (atomicio(read, fd, &pci, sizeof(pci)) != sizeof(pci)) {
441 		log_warnx("%s: error reading pci from fd", __func__);
442 		return (-1);
443 	}
444 	return (0);
445 }
446 
447 /*
448  * Find the first PCI device based on PCI Subsystem ID
449  * (e.g. PCI_PRODUCT_VIRTIO_BLOCK).
450  *
451  * Returns the PCI device id of the first matching device, if found.
452  * Otherwise, returns -1.
453  */
454 int
pci_find_first_device(uint16_t subsys_id)455 pci_find_first_device(uint16_t subsys_id)
456 {
457 	int i;
458 
459 	for (i = 0; i < pci.pci_dev_ct; i++)
460 		if (pci.pci_devices[i].pd_subsys_id == subsys_id)
461 			return (i);
462 	return (-1);
463 }
464