xref: /freebsd/sys/dev/hyperv/pcib/vmbus_pcib.c (revision 9dbf5b0e)
1 /*-
2  * Copyright (c) 2016-2017 Microsoft Corp.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 #ifdef NEW_PCIB
29 #include "opt_acpi.h"
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/types.h>
34 #include <sys/malloc.h>
35 #include <sys/module.h>
36 #include <sys/kernel.h>
37 #include <sys/queue.h>
38 #include <sys/lock.h>
39 #include <sys/sx.h>
40 #include <sys/smp.h>
41 #include <sys/sysctl.h>
42 #include <sys/bus.h>
43 #include <sys/rman.h>
44 #include <sys/mutex.h>
45 #include <sys/errno.h>
46 
47 #include <vm/vm.h>
48 #include <vm/vm_param.h>
49 #include <vm/vm_kern.h>
50 #include <vm/pmap.h>
51 
52 #if defined(__aarch64__)
53 #include <arm64/include/intr.h>
54 #endif
55 #include <machine/atomic.h>
56 #include <machine/bus.h>
57 #include <machine/frame.h>
58 #include <machine/pci_cfgreg.h>
59 #include <machine/resource.h>
60 
61 #include <sys/pciio.h>
62 #include <dev/pci/pcireg.h>
63 #include <dev/pci/pcivar.h>
64 #include <dev/pci/pci_private.h>
65 #include <dev/pci/pcib_private.h>
66 #include "pcib_if.h"
67 #if defined(__i386__) || defined(__amd64__)
68 #include <machine/intr_machdep.h>
69 #include <x86/apicreg.h>
70 #include <x86/apicvar.h>
71 #endif
72 #if defined(__aarch64__)
73 #include <contrib/dev/acpica/include/acpi.h>
74 #include <contrib/dev/acpica/include/accommon.h>
75 #include <dev/acpica/acpivar.h>
76 #include <dev/acpica/acpi_pcibvar.h>
77 #endif
78 #include <dev/hyperv/include/hyperv.h>
79 #include <dev/hyperv/include/vmbus_xact.h>
80 #include <dev/hyperv/vmbus/vmbus_reg.h>
81 #include <dev/hyperv/vmbus/vmbus_chanvar.h>
82 
83 #include "vmbus_if.h"
84 
85 struct completion {
86 	unsigned int done;
87 	struct mtx lock;
88 };
89 
90 static void
init_completion(struct completion * c)91 init_completion(struct completion *c)
92 {
93 	memset(c, 0, sizeof(*c));
94 	mtx_init(&c->lock, "hvcmpl", NULL, MTX_DEF);
95 	c->done = 0;
96 }
97 static void
reinit_completion(struct completion * c)98 reinit_completion(struct completion *c)
99 {
100 	c->done = 0;
101 }
102 static void
free_completion(struct completion * c)103 free_completion(struct completion *c)
104 {
105 	mtx_destroy(&c->lock);
106 }
107 
108 static void
complete(struct completion * c)109 complete(struct completion *c)
110 {
111 	mtx_lock(&c->lock);
112 	c->done++;
113 	mtx_unlock(&c->lock);
114 	wakeup(c);
115 }
116 
117 static void
wait_for_completion(struct completion * c)118 wait_for_completion(struct completion *c)
119 {
120 	mtx_lock(&c->lock);
121 	while (c->done == 0)
122 		mtx_sleep(c, &c->lock, 0, "hvwfc", 0);
123 	c->done--;
124 	mtx_unlock(&c->lock);
125 }
126 
127 /*
128  * Return: 0 if completed, a non-zero value if timed out.
129  */
130 static int
wait_for_completion_timeout(struct completion * c,int timeout)131 wait_for_completion_timeout(struct completion *c, int timeout)
132 {
133 	int ret;
134 
135 	mtx_lock(&c->lock);
136 
137 	if (c->done == 0)
138 		mtx_sleep(c, &c->lock, 0, "hvwfc", timeout);
139 
140 	if (c->done > 0) {
141 		c->done--;
142 		ret = 0;
143 	} else {
144 		ret = 1;
145 	}
146 
147 	mtx_unlock(&c->lock);
148 
149 	return (ret);
150 }
151 
152 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
153 #define PCI_MAKE_VERSION(major, minor) ((uint32_t)(((major) << 16) | (minor)))
154 
155 enum pci_protocol_version_t {
156 	PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1),
157 	PCI_PROTOCOL_VERSION_1_4 = PCI_MAKE_VERSION(1, 4),
158 };
159 
160 static enum pci_protocol_version_t pci_protocol_versions[] = {
161 	PCI_PROTOCOL_VERSION_1_4,
162 	PCI_PROTOCOL_VERSION_1_1,
163 };
164 
165 #define PCI_CONFIG_MMIO_LENGTH	0x2000
166 #define CFG_PAGE_OFFSET 0x1000
167 #define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET)
168 
169 /*
170  * Message Types
171  */
172 
173 enum pci_message_type {
174 	/*
175 	 * Version 1.1
176 	 */
177 	PCI_MESSAGE_BASE                = 0x42490000,
178 	PCI_BUS_RELATIONS               = PCI_MESSAGE_BASE + 0,
179 	PCI_QUERY_BUS_RELATIONS         = PCI_MESSAGE_BASE + 1,
180 	PCI_POWER_STATE_CHANGE          = PCI_MESSAGE_BASE + 4,
181 	PCI_QUERY_RESOURCE_REQUIREMENTS = PCI_MESSAGE_BASE + 5,
182 	PCI_QUERY_RESOURCE_RESOURCES    = PCI_MESSAGE_BASE + 6,
183 	PCI_BUS_D0ENTRY                 = PCI_MESSAGE_BASE + 7,
184 	PCI_BUS_D0EXIT                  = PCI_MESSAGE_BASE + 8,
185 	PCI_READ_BLOCK                  = PCI_MESSAGE_BASE + 9,
186 	PCI_WRITE_BLOCK                 = PCI_MESSAGE_BASE + 0xA,
187 	PCI_EJECT                       = PCI_MESSAGE_BASE + 0xB,
188 	PCI_QUERY_STOP                  = PCI_MESSAGE_BASE + 0xC,
189 	PCI_REENABLE                    = PCI_MESSAGE_BASE + 0xD,
190 	PCI_QUERY_STOP_FAILED           = PCI_MESSAGE_BASE + 0xE,
191 	PCI_EJECTION_COMPLETE           = PCI_MESSAGE_BASE + 0xF,
192 	PCI_RESOURCES_ASSIGNED          = PCI_MESSAGE_BASE + 0x10,
193 	PCI_RESOURCES_RELEASED          = PCI_MESSAGE_BASE + 0x11,
194 	PCI_INVALIDATE_BLOCK            = PCI_MESSAGE_BASE + 0x12,
195 	PCI_QUERY_PROTOCOL_VERSION      = PCI_MESSAGE_BASE + 0x13,
196 	PCI_CREATE_INTERRUPT_MESSAGE    = PCI_MESSAGE_BASE + 0x14,
197 	PCI_DELETE_INTERRUPT_MESSAGE    = PCI_MESSAGE_BASE + 0x15,
198 	PCI_RESOURCES_ASSIGNED2         = PCI_MESSAGE_BASE + 0x16,
199 	PCI_CREATE_INTERRUPT_MESSAGE2   = PCI_MESSAGE_BASE + 0x17,
200 	PCI_DELETE_INTERRUPT_MESSAGE2   = PCI_MESSAGE_BASE + 0x18, /* unused */
201 	PCI_BUS_RELATIONS2              = PCI_MESSAGE_BASE + 0x19,
202 	PCI_RESOURCES_ASSIGNED3         = PCI_MESSAGE_BASE + 0x1A,
203 	PCI_CREATE_INTERRUPT_MESSAGE3   = PCI_MESSAGE_BASE + 0x1B,
204 	PCI_MESSAGE_MAXIMUM
205 };
206 
207 #define STATUS_REVISION_MISMATCH 0xC0000059
208 
209 /*
210  * Structures defining the virtual PCI Express protocol.
211  */
212 
213 union pci_version {
214 	struct {
215 		uint16_t minor_version;
216 		uint16_t major_version;
217 	} parts;
218 	uint32_t version;
219 } __packed;
220 
221 /*
222  * This representation is the one used in Windows, which is
223  * what is expected when sending this back and forth with
224  * the Hyper-V parent partition.
225  */
226 union win_slot_encoding {
227 	struct {
228 		uint32_t	slot:5;
229 		uint32_t	func:3;
230 		uint32_t	reserved:24;
231 	} bits;
232 	uint32_t val;
233 } __packed;
234 
235 struct pci_func_desc {
236 	uint16_t	v_id;	/* vendor ID */
237 	uint16_t	d_id;	/* device ID */
238 	uint8_t		rev;
239 	uint8_t		prog_intf;
240 	uint8_t		subclass;
241 	uint8_t		base_class;
242 	uint32_t	subsystem_id;
243 	union win_slot_encoding wslot;
244 	uint32_t	ser;	/* serial number */
245 } __packed;
246 
247 struct pci_func_desc2 {
248 	uint16_t	v_id;	/* vendor ID */
249 	uint16_t	d_id;	/* device ID */
250 	uint8_t		rev;
251 	uint8_t		prog_intf;
252 	uint8_t		subclass;
253 	uint8_t		base_class;
254 	uint32_t	subsystem_id;
255 	union		win_slot_encoding wslot;
256 	uint32_t	ser;	/* serial number */
257 	uint32_t	flags;
258 	uint16_t	virtual_numa_node;
259 	uint16_t	reserved;
260 } __packed;
261 
262 
263 struct hv_msi_desc {
264 	uint8_t		vector;
265 	uint8_t		delivery_mode;
266 	uint16_t	vector_count;
267 	uint32_t	reserved;
268 	uint64_t	cpu_mask;
269 } __packed;
270 
271 struct hv_msi_desc3 {
272 	uint32_t	vector;
273 	uint8_t		delivery_mode;
274 	uint8_t		reserved;
275 	uint16_t	vector_count;
276 	uint16_t	processor_count;
277 	uint16_t	processor_array[32];
278 } __packed;
279 
280 struct tran_int_desc {
281 	uint16_t	reserved;
282 	uint16_t	vector_count;
283 	uint32_t	data;
284 	uint64_t	address;
285 } __packed;
286 
287 struct pci_message {
288 	uint32_t type;
289 } __packed;
290 
291 struct pci_child_message {
292 	struct pci_message message_type;
293 	union win_slot_encoding wslot;
294 } __packed;
295 
296 struct pci_incoming_message {
297 	struct vmbus_chanpkt_hdr hdr;
298 	struct pci_message message_type;
299 } __packed;
300 
301 struct pci_response {
302 	struct vmbus_chanpkt_hdr hdr;
303 	int32_t status;	/* negative values are failures */
304 } __packed;
305 
306 struct pci_packet {
307 	void (*completion_func)(void *context, struct pci_response *resp,
308 	    int resp_packet_size);
309 	void *compl_ctxt;
310 
311 	struct pci_message message[0];
312 };
313 
314 /*
315  * Specific message types supporting the PCI protocol.
316  */
317 
318 struct pci_version_request {
319 	struct pci_message message_type;
320 	uint32_t protocol_version;
321 	uint32_t reservedz:31;
322 } __packed;
323 
324 struct pci_bus_d0_entry {
325 	struct pci_message message_type;
326 	uint32_t reserved;
327 	uint64_t mmio_base;
328 } __packed;
329 
330 struct pci_bus_relations {
331 	struct pci_incoming_message incoming;
332 	uint32_t device_count;
333 	struct pci_func_desc func[0];
334 } __packed;
335 
336 struct pci_bus_relations2 {
337 	struct pci_incoming_message incoming;
338 	uint32_t device_count;
339 	struct pci_func_desc2 func[0];
340 } __packed;
341 
342 #define MAX_NUM_BARS	(PCIR_MAX_BAR_0 + 1)
343 struct pci_q_res_req_response {
344 	struct vmbus_chanpkt_hdr hdr;
345 	int32_t status; /* negative values are failures */
346 	uint32_t probed_bar[MAX_NUM_BARS];
347 } __packed;
348 
349 struct pci_resources_assigned {
350 	struct pci_message message_type;
351 	union win_slot_encoding wslot;
352 	uint8_t memory_range[0x14][MAX_NUM_BARS]; /* unused here */
353 	uint32_t msi_descriptors;
354 	uint32_t reserved[4];
355 } __packed;
356 
357 struct pci_resources_assigned2 {
358 	struct pci_message message_type;
359 	union win_slot_encoding wslot;
360 	uint8_t memory_range[0x14][6];   /* not used here */
361 	uint32_t msi_descriptor_count;
362 	uint8_t reserved[70];
363 } __packed;
364 
365 struct pci_create_interrupt {
366 	struct pci_message message_type;
367 	union win_slot_encoding wslot;
368 	struct hv_msi_desc int_desc;
369 } __packed;
370 
371 struct pci_create_interrupt3 {
372 	struct pci_message message_type;
373 	union win_slot_encoding wslot;
374 	struct hv_msi_desc3 int_desc;
375 } __packed;
376 
377 struct pci_create_int_response {
378 	struct pci_response response;
379 	uint32_t reserved;
380 	struct tran_int_desc int_desc;
381 } __packed;
382 
383 struct pci_delete_interrupt {
384 	struct pci_message message_type;
385 	union win_slot_encoding wslot;
386 	struct tran_int_desc int_desc;
387 } __packed;
388 
389 struct pci_dev_incoming {
390 	struct pci_incoming_message incoming;
391 	union win_slot_encoding wslot;
392 } __packed;
393 
394 struct pci_eject_response {
395 	struct pci_message message_type;
396 	union win_slot_encoding wslot;
397 	uint32_t status;
398 } __packed;
399 
400 /*
401  * Driver specific state.
402  */
403 
404 enum hv_pcibus_state {
405 	hv_pcibus_init = 0,
406 	hv_pcibus_installed,
407 };
408 
409 struct hv_pcibus {
410 	device_t pcib;
411 	device_t pci_bus;
412 	struct vmbus_pcib_softc *sc;
413 
414 	uint16_t pci_domain;
415 
416 	enum hv_pcibus_state state;
417 
418 	struct resource *cfg_res;
419 
420 	struct completion query_completion, *query_comp;
421 
422 	struct mtx config_lock; /* Avoid two threads writing index page */
423 	struct mtx device_list_lock;    /* Protect lists below */
424 	uint32_t protocol_version;
425 	TAILQ_HEAD(, hv_pci_dev) children;
426 	TAILQ_HEAD(, hv_dr_state) dr_list;
427 
428 	volatile int detaching;
429 };
430 
431 struct hv_pcidev_desc {
432 	uint16_t v_id;	/* vendor ID */
433 	uint16_t d_id;	/* device ID */
434 	uint8_t rev;
435 	uint8_t prog_intf;
436 	uint8_t subclass;
437 	uint8_t base_class;
438 	uint32_t subsystem_id;
439 	union win_slot_encoding wslot;
440 	uint32_t ser;	/* serial number */
441 	uint32_t flags;
442 	uint16_t virtual_numa_node;
443 } __packed;
444 
445 struct hv_pci_dev {
446 	TAILQ_ENTRY(hv_pci_dev) link;
447 
448 	struct hv_pcidev_desc desc;
449 
450 	bool reported_missing;
451 
452 	struct hv_pcibus *hbus;
453 	struct task eject_task;
454 
455 	TAILQ_HEAD(, hv_irq_desc) irq_desc_list;
456 
457 	/*
458 	 * What would be observed if one wrote 0xFFFFFFFF to a BAR and then
459 	 * read it back, for each of the BAR offsets within config space.
460 	 */
461 	uint32_t probed_bar[MAX_NUM_BARS];
462 };
463 
464 /*
465  * Tracks "Device Relations" messages from the host, which must be both
466  * processed in order.
467  */
468 struct hv_dr_work {
469 	struct task task;
470 	struct hv_pcibus *bus;
471 };
472 
473 struct hv_dr_state {
474 	TAILQ_ENTRY(hv_dr_state) link;
475 	uint32_t device_count;
476 	struct hv_pcidev_desc func[0];
477 };
478 
479 struct hv_irq_desc {
480 	TAILQ_ENTRY(hv_irq_desc) link;
481 	struct tran_int_desc desc;
482 	int irq;
483 };
484 
485 #define PCI_DEVFN(slot, func)   ((((slot) & 0x1f) << 3) | ((func) & 0x07))
486 #define PCI_SLOT(devfn)         (((devfn) >> 3) & 0x1f)
487 #define PCI_FUNC(devfn)         ((devfn) & 0x07)
488 
489 static uint32_t
devfn_to_wslot(unsigned int devfn)490 devfn_to_wslot(unsigned int devfn)
491 {
492 	union win_slot_encoding wslot;
493 
494 	wslot.val = 0;
495 	wslot.bits.slot = PCI_SLOT(devfn);
496 	wslot.bits.func = PCI_FUNC(devfn);
497 
498 	return (wslot.val);
499 }
500 
501 static unsigned int
wslot_to_devfn(uint32_t wslot)502 wslot_to_devfn(uint32_t wslot)
503 {
504 	union win_slot_encoding encoding;
505 	unsigned int slot;
506 	unsigned int func;
507 
508 	encoding.val = wslot;
509 
510 	slot = encoding.bits.slot;
511 	func = encoding.bits.func;
512 
513 	return (PCI_DEVFN(slot, func));
514 }
515 
516 struct vmbus_pcib_softc {
517 	struct vmbus_channel	*chan;
518 	void *rx_buf;
519 
520 	struct taskqueue	*taskq;
521 
522 	struct hv_pcibus	*hbus;
523 };
524 
525 /* {44C4F61D-4444-4400-9D52-802E27EDE19F} */
526 static const struct hyperv_guid g_pass_through_dev_type = {
527 	.hv_guid = {0x1D, 0xF6, 0xC4, 0x44, 0x44, 0x44, 0x00, 0x44,
528 	    0x9D, 0x52, 0x80, 0x2E, 0x27, 0xED, 0xE1, 0x9F}
529 };
530 
531 struct hv_pci_compl {
532 	struct completion host_event;
533 	int32_t completion_status;
534 };
535 
536 struct q_res_req_compl {
537 	struct completion host_event;
538 	struct hv_pci_dev *hpdev;
539 };
540 
541 struct compose_comp_ctxt {
542 	struct hv_pci_compl comp_pkt;
543 	struct tran_int_desc int_desc;
544 };
545 
546 /*
547  * It is possible the device is revoked during initialization.
548  * Check if this happens during wait.
549  * Return: 0 if response arrived, ENODEV if device revoked.
550  */
551 static int
wait_for_response(struct hv_pcibus * hbus,struct completion * c)552 wait_for_response(struct hv_pcibus *hbus, struct completion *c)
553 {
554 	do {
555 		if (vmbus_chan_is_revoked(hbus->sc->chan)) {
556 			device_printf(hbus->pcib,
557 			    "The device is revoked.\n");
558 			return (ENODEV);
559 		}
560 	} while (wait_for_completion_timeout(c, hz /10) != 0);
561 
562 	return 0;
563 }
564 
565 static void
hv_pci_generic_compl(void * context,struct pci_response * resp,int resp_packet_size)566 hv_pci_generic_compl(void *context, struct pci_response *resp,
567     int resp_packet_size)
568 {
569 	struct hv_pci_compl *comp_pkt = context;
570 
571 	if (resp_packet_size >= sizeof(struct pci_response))
572 		comp_pkt->completion_status = resp->status;
573 	else
574 		comp_pkt->completion_status = -1;
575 
576 	complete(&comp_pkt->host_event);
577 }
578 
579 static void
q_resource_requirements(void * context,struct pci_response * resp,int resp_packet_size)580 q_resource_requirements(void *context, struct pci_response *resp,
581     int resp_packet_size)
582 {
583 	struct q_res_req_compl *completion = context;
584 	struct pci_q_res_req_response *q_res_req =
585 	    (struct pci_q_res_req_response *)resp;
586 	int i;
587 
588 	if (resp->status < 0) {
589 		printf("vmbus_pcib: failed to query resource requirements\n");
590 	} else {
591 		for (i = 0; i < MAX_NUM_BARS; i++)
592 			completion->hpdev->probed_bar[i] =
593 			    q_res_req->probed_bar[i];
594 	}
595 
596 	complete(&completion->host_event);
597 }
598 
599 static void
hv_pci_compose_compl(void * context,struct pci_response * resp,int resp_packet_size)600 hv_pci_compose_compl(void *context, struct pci_response *resp,
601     int resp_packet_size)
602 {
603 	struct compose_comp_ctxt *comp_pkt = context;
604 	struct pci_create_int_response *int_resp =
605 	    (struct pci_create_int_response *)resp;
606 
607 	comp_pkt->comp_pkt.completion_status = resp->status;
608 	comp_pkt->int_desc = int_resp->int_desc;
609 	complete(&comp_pkt->comp_pkt.host_event);
610 }
611 
612 static void
hv_int_desc_free(struct hv_pci_dev * hpdev,struct hv_irq_desc * hid)613 hv_int_desc_free(struct hv_pci_dev *hpdev, struct hv_irq_desc *hid)
614 {
615 	struct pci_delete_interrupt *int_pkt;
616 	struct {
617 		struct pci_packet pkt;
618 		uint8_t buffer[sizeof(struct pci_delete_interrupt)];
619 	} ctxt;
620 
621 	memset(&ctxt, 0, sizeof(ctxt));
622 	int_pkt = (struct pci_delete_interrupt *)&ctxt.pkt.message;
623 	int_pkt->message_type.type = PCI_DELETE_INTERRUPT_MESSAGE;
624 	int_pkt->wslot.val = hpdev->desc.wslot.val;
625 	int_pkt->int_desc = hid->desc;
626 
627 	vmbus_chan_send(hpdev->hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
628 	    int_pkt, sizeof(*int_pkt), 0);
629 
630 	free(hid, M_DEVBUF);
631 }
632 
633 static void
hv_pci_delete_device(struct hv_pci_dev * hpdev)634 hv_pci_delete_device(struct hv_pci_dev *hpdev)
635 {
636 	struct hv_pcibus *hbus = hpdev->hbus;
637 	struct hv_irq_desc *hid, *tmp_hid;
638 	device_t pci_dev;
639 	int devfn;
640 
641 	devfn = wslot_to_devfn(hpdev->desc.wslot.val);
642 
643 	bus_topo_lock();
644 
645 	pci_dev = pci_find_dbsf(hbus->pci_domain,
646 	    0, PCI_SLOT(devfn), PCI_FUNC(devfn));
647 	if (pci_dev)
648 		device_delete_child(hbus->pci_bus, pci_dev);
649 
650 	bus_topo_unlock();
651 
652 	mtx_lock(&hbus->device_list_lock);
653 	TAILQ_REMOVE(&hbus->children, hpdev, link);
654 	mtx_unlock(&hbus->device_list_lock);
655 
656 	TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid)
657 		hv_int_desc_free(hpdev, hid);
658 
659 	free(hpdev, M_DEVBUF);
660 }
661 
662 static struct hv_pci_dev *
new_pcichild_device(struct hv_pcibus * hbus,struct hv_pcidev_desc * desc)663 new_pcichild_device(struct hv_pcibus *hbus, struct hv_pcidev_desc *desc)
664 {
665 	struct hv_pci_dev *hpdev;
666 	struct pci_child_message *res_req;
667 	struct q_res_req_compl comp_pkt;
668 	struct {
669 		struct pci_packet pkt;
670 		uint8_t buffer[sizeof(struct pci_child_message)];
671 	} ctxt;
672 	int ret;
673 
674 	hpdev = malloc(sizeof(*hpdev), M_DEVBUF, M_WAITOK | M_ZERO);
675 	hpdev->hbus = hbus;
676 
677 	TAILQ_INIT(&hpdev->irq_desc_list);
678 
679 	init_completion(&comp_pkt.host_event);
680 	comp_pkt.hpdev = hpdev;
681 
682 	ctxt.pkt.compl_ctxt = &comp_pkt;
683 	ctxt.pkt.completion_func = q_resource_requirements;
684 
685 	res_req = (struct pci_child_message *)&ctxt.pkt.message;
686 	res_req->message_type.type = PCI_QUERY_RESOURCE_REQUIREMENTS;
687 	res_req->wslot.val = desc->wslot.val;
688 
689 	ret = vmbus_chan_send(hbus->sc->chan,
690 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
691 	    res_req, sizeof(*res_req), (uint64_t)(uintptr_t)&ctxt.pkt);
692 	if (ret)
693 		goto err;
694 
695 	if (wait_for_response(hbus, &comp_pkt.host_event))
696 		goto err;
697 
698 	free_completion(&comp_pkt.host_event);
699 
700 	hpdev->desc = *desc;
701 
702 	mtx_lock(&hbus->device_list_lock);
703 	if (TAILQ_EMPTY(&hbus->children))
704 		hbus->pci_domain = desc->ser & 0xFFFF;
705 	TAILQ_INSERT_TAIL(&hbus->children, hpdev, link);
706 	mtx_unlock(&hbus->device_list_lock);
707 	return (hpdev);
708 err:
709 	free_completion(&comp_pkt.host_event);
710 	free(hpdev, M_DEVBUF);
711 	return (NULL);
712 }
713 
714 static int
pci_rescan(device_t dev)715 pci_rescan(device_t dev)
716 {
717 	return (BUS_RESCAN(dev));
718 }
719 
720 static void
pci_devices_present_work(void * arg,int pending __unused)721 pci_devices_present_work(void *arg, int pending __unused)
722 {
723 	struct hv_dr_work *dr_wrk = arg;
724 	struct hv_dr_state *dr = NULL;
725 	struct hv_pcibus *hbus;
726 	uint32_t child_no;
727 	bool found;
728 	struct hv_pcidev_desc *new_desc;
729 	struct hv_pci_dev *hpdev, *tmp_hpdev;
730 	struct completion *query_comp;
731 	bool need_rescan = false;
732 
733 	hbus = dr_wrk->bus;
734 	free(dr_wrk, M_DEVBUF);
735 
736 	/* Pull this off the queue and process it if it was the last one. */
737 	mtx_lock(&hbus->device_list_lock);
738 	while (!TAILQ_EMPTY(&hbus->dr_list)) {
739 		dr = TAILQ_FIRST(&hbus->dr_list);
740 		TAILQ_REMOVE(&hbus->dr_list, dr, link);
741 
742 		/* Throw this away if the list still has stuff in it. */
743 		if (!TAILQ_EMPTY(&hbus->dr_list)) {
744 			free(dr, M_DEVBUF);
745 			continue;
746 		}
747 	}
748 	mtx_unlock(&hbus->device_list_lock);
749 
750 	if (!dr)
751 		return;
752 
753 	/* First, mark all existing children as reported missing. */
754 	mtx_lock(&hbus->device_list_lock);
755 	TAILQ_FOREACH(hpdev, &hbus->children, link)
756 		hpdev->reported_missing = true;
757 	mtx_unlock(&hbus->device_list_lock);
758 
759 	/* Next, add back any reported devices. */
760 	for (child_no = 0; child_no < dr->device_count; child_no++) {
761 		found = false;
762 		new_desc = &dr->func[child_no];
763 
764 		mtx_lock(&hbus->device_list_lock);
765 		TAILQ_FOREACH(hpdev, &hbus->children, link) {
766 			if ((hpdev->desc.wslot.val ==
767 			    new_desc->wslot.val) &&
768 			    (hpdev->desc.v_id == new_desc->v_id) &&
769 			    (hpdev->desc.d_id == new_desc->d_id) &&
770 			    (hpdev->desc.ser == new_desc->ser)) {
771 				hpdev->reported_missing = false;
772 				found = true;
773 				break;
774 			}
775 		}
776 		mtx_unlock(&hbus->device_list_lock);
777 
778 		if (!found) {
779 			if (!need_rescan)
780 				need_rescan = true;
781 
782 			hpdev = new_pcichild_device(hbus, new_desc);
783 			if (!hpdev)
784 				printf("vmbus_pcib: failed to add a child\n");
785 		}
786 	}
787 
788 	/* Remove missing device(s), if any */
789 	TAILQ_FOREACH_SAFE(hpdev, &hbus->children, link, tmp_hpdev) {
790 		if (hpdev->reported_missing)
791 			hv_pci_delete_device(hpdev);
792 	}
793 
794 	/* Rescan the bus to find any new device, if necessary. */
795 	if (hbus->state == hv_pcibus_installed && need_rescan)
796 		pci_rescan(hbus->pci_bus);
797 
798 	/* Wake up hv_pci_query_relations(), if it's waiting. */
799 	query_comp = hbus->query_comp;
800 	if (query_comp) {
801 		hbus->query_comp = NULL;
802 		complete(query_comp);
803 	}
804 
805 	free(dr, M_DEVBUF);
806 }
807 
808 static struct hv_pci_dev *
get_pcichild_wslot(struct hv_pcibus * hbus,uint32_t wslot)809 get_pcichild_wslot(struct hv_pcibus *hbus, uint32_t wslot)
810 {
811 	struct hv_pci_dev *hpdev, *ret = NULL;
812 
813 	mtx_lock(&hbus->device_list_lock);
814 	TAILQ_FOREACH(hpdev, &hbus->children, link) {
815 		if (hpdev->desc.wslot.val == wslot) {
816 			ret = hpdev;
817 			break;
818 		}
819 	}
820 	mtx_unlock(&hbus->device_list_lock);
821 
822 	return (ret);
823 }
824 
825 static void
hv_pci_devices_present(struct hv_pcibus * hbus,struct pci_bus_relations * relations)826 hv_pci_devices_present(struct hv_pcibus *hbus,
827     struct pci_bus_relations *relations)
828 {
829 	struct hv_dr_state *dr;
830 	struct hv_dr_work *dr_wrk;
831 	unsigned long dr_size;
832 
833 	if (hbus->detaching && relations->device_count > 0)
834 		return;
835 
836 	dr_size = offsetof(struct hv_dr_state, func) +
837 	    (sizeof(struct pci_func_desc) * relations->device_count);
838 	dr = malloc(dr_size, M_DEVBUF, M_WAITOK | M_ZERO);
839 
840 	dr->device_count = relations->device_count;
841 	if (dr->device_count != 0)
842 		memcpy(dr->func, relations->func,
843 		    sizeof(struct hv_pcidev_desc) * dr->device_count);
844 
845 	mtx_lock(&hbus->device_list_lock);
846 	TAILQ_INSERT_TAIL(&hbus->dr_list, dr, link);
847 	mtx_unlock(&hbus->device_list_lock);
848 
849 	dr_wrk = malloc(sizeof(*dr_wrk), M_DEVBUF, M_WAITOK | M_ZERO);
850 	dr_wrk->bus = hbus;
851 	TASK_INIT(&dr_wrk->task, 0, pci_devices_present_work, dr_wrk);
852 	taskqueue_enqueue(hbus->sc->taskq, &dr_wrk->task);
853 }
854 
855 static void
hv_pci_devices_present2(struct hv_pcibus * hbus,struct pci_bus_relations2 * relations)856 hv_pci_devices_present2(struct hv_pcibus *hbus,
857     struct pci_bus_relations2 *relations)
858 {
859 	struct hv_dr_state *dr;
860 	struct hv_dr_work *dr_wrk;
861 	unsigned long dr_size;
862 
863 	if (hbus->detaching && relations->device_count > 0)
864 		return;
865 
866 	dr_size = offsetof(struct hv_dr_state, func) +
867 	    (sizeof(struct pci_func_desc2) * relations->device_count);
868 	dr = malloc(dr_size, M_DEVBUF, M_WAITOK | M_ZERO);
869 
870 	dr->device_count = relations->device_count;
871 	if (dr->device_count != 0)
872 		memcpy(dr->func, relations->func,
873 		    sizeof(struct pci_func_desc2) * dr->device_count);
874 
875 	mtx_lock(&hbus->device_list_lock);
876 	TAILQ_INSERT_TAIL(&hbus->dr_list, dr, link);
877 	mtx_unlock(&hbus->device_list_lock);
878 
879 	dr_wrk = malloc(sizeof(*dr_wrk), M_DEVBUF, M_WAITOK | M_ZERO);
880 	dr_wrk->bus = hbus;
881 	TASK_INIT(&dr_wrk->task, 0, pci_devices_present_work, dr_wrk);
882 	taskqueue_enqueue(hbus->sc->taskq, &dr_wrk->task);
883 }
884 
885 static void
hv_eject_device_work(void * arg,int pending __unused)886 hv_eject_device_work(void *arg, int pending __unused)
887 {
888 	struct hv_pci_dev *hpdev = arg;
889 	union win_slot_encoding wslot = hpdev->desc.wslot;
890 	struct hv_pcibus *hbus = hpdev->hbus;
891 	struct pci_eject_response *eject_pkt;
892 	struct {
893 		struct pci_packet pkt;
894 		uint8_t buffer[sizeof(struct pci_eject_response)];
895 	} ctxt;
896 
897 	hv_pci_delete_device(hpdev);
898 
899 	memset(&ctxt, 0, sizeof(ctxt));
900 	eject_pkt = (struct pci_eject_response *)&ctxt.pkt.message;
901 	eject_pkt->message_type.type = PCI_EJECTION_COMPLETE;
902 	eject_pkt->wslot.val = wslot.val;
903 	vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
904 	    eject_pkt, sizeof(*eject_pkt), 0);
905 }
906 
907 static void
hv_pci_eject_device(struct hv_pci_dev * hpdev)908 hv_pci_eject_device(struct hv_pci_dev *hpdev)
909 {
910 	struct hv_pcibus *hbus = hpdev->hbus;
911 	struct taskqueue *taskq;
912 
913 	if (hbus->detaching)
914 		return;
915 
916 	/*
917 	 * Push this task into the same taskqueue on which
918 	 * vmbus_pcib_attach() runs, so we're sure this task can't run
919 	 * concurrently with vmbus_pcib_attach().
920 	 */
921 	TASK_INIT(&hpdev->eject_task, 0, hv_eject_device_work, hpdev);
922 	taskq = vmbus_chan_mgmt_tq(hbus->sc->chan);
923 	taskqueue_enqueue(taskq, &hpdev->eject_task);
924 }
925 
926 #define PCIB_PACKET_SIZE	0x100
927 
928 static void
vmbus_pcib_on_channel_callback(struct vmbus_channel * chan,void * arg)929 vmbus_pcib_on_channel_callback(struct vmbus_channel *chan, void *arg)
930 {
931 	struct vmbus_pcib_softc *sc = arg;
932 	struct hv_pcibus *hbus = sc->hbus;
933 
934 	void *buffer;
935 	int bufferlen = PCIB_PACKET_SIZE;
936 
937 	struct pci_packet *comp_packet;
938 	struct pci_response *response;
939 	struct pci_incoming_message *new_msg;
940 	struct pci_bus_relations *bus_rel;
941 	struct pci_bus_relations2 *bus_rel2;
942 	struct pci_dev_incoming *dev_msg;
943 	struct hv_pci_dev *hpdev;
944 
945 	buffer = sc->rx_buf;
946 	do {
947 		struct vmbus_chanpkt_hdr *pkt = buffer;
948 		uint32_t bytes_rxed;
949 		int ret;
950 
951 		bytes_rxed = bufferlen;
952 		ret = vmbus_chan_recv_pkt(chan, pkt, &bytes_rxed);
953 
954 		if (ret == ENOBUFS) {
955 			/* Handle large packet */
956 			if (bufferlen > PCIB_PACKET_SIZE) {
957 				free(buffer, M_DEVBUF);
958 				buffer = NULL;
959 			}
960 
961 			/* alloc new buffer */
962 			buffer =
963 			    malloc(bytes_rxed, M_DEVBUF, M_WAITOK | M_ZERO);
964 			bufferlen = bytes_rxed;
965 
966 			continue;
967 		}
968 
969 		if (ret != 0) {
970 			/* ignore EIO or EAGAIN */
971 			break;
972 		}
973 
974 		if (bytes_rxed <= sizeof(struct pci_response))
975 			continue;
976 
977 		switch (pkt->cph_type) {
978 		case VMBUS_CHANPKT_TYPE_COMP:
979 			comp_packet =
980 			    (struct pci_packet *)(uintptr_t)pkt->cph_xactid;
981 			response = (struct pci_response *)pkt;
982 			comp_packet->completion_func(comp_packet->compl_ctxt,
983 			    response, bytes_rxed);
984 			break;
985 		case VMBUS_CHANPKT_TYPE_INBAND:
986 			new_msg = (struct pci_incoming_message *)buffer;
987 
988 			switch (new_msg->message_type.type) {
989 			case PCI_BUS_RELATIONS:
990 				bus_rel = (struct pci_bus_relations *)buffer;
991 
992 				if (bus_rel->device_count == 0)
993 					break;
994 
995 				if (bytes_rxed <
996 				    offsetof(struct pci_bus_relations, func) +
997 				        (sizeof(struct pci_func_desc) *
998 				            (bus_rel->device_count)))
999 					break;
1000 
1001 				hv_pci_devices_present(hbus, bus_rel);
1002 				break;
1003 
1004 			case PCI_BUS_RELATIONS2:
1005 				bus_rel2 = (struct pci_bus_relations2 *)buffer;
1006 
1007 				if (bus_rel2->device_count == 0)
1008 					break;
1009 
1010 				if (bytes_rxed <
1011 				    offsetof(struct pci_bus_relations2, func) +
1012 				    (sizeof(struct pci_func_desc2) *
1013 				    (bus_rel2->device_count)))
1014 					break;
1015 
1016 				hv_pci_devices_present2(hbus, bus_rel2);
1017 
1018 			case PCI_EJECT:
1019 				dev_msg = (struct pci_dev_incoming *)buffer;
1020 				hpdev = get_pcichild_wslot(hbus,
1021 				    dev_msg->wslot.val);
1022 
1023 				if (hpdev)
1024 					hv_pci_eject_device(hpdev);
1025 
1026 				break;
1027 			default:
1028 				printf("vmbus_pcib: Unknown msg type 0x%x\n",
1029 				    new_msg->message_type.type);
1030 				break;
1031 			}
1032 			break;
1033 		default:
1034 			printf("vmbus_pcib: Unknown VMBus msg type %hd\n",
1035 			    pkt->cph_type);
1036 			break;
1037 		}
1038 	} while (1);
1039 
1040 	if (bufferlen > PCIB_PACKET_SIZE)
1041 		free(buffer, M_DEVBUF);
1042 }
1043 
1044 static int
hv_pci_protocol_negotiation(struct hv_pcibus * hbus,enum pci_protocol_version_t version[],int num_version)1045 hv_pci_protocol_negotiation(struct hv_pcibus *hbus,
1046     enum pci_protocol_version_t version[],
1047     int num_version)
1048 {
1049 	struct pci_version_request *version_req;
1050 	struct hv_pci_compl comp_pkt;
1051 	struct {
1052 		struct pci_packet pkt;
1053 		uint8_t buffer[sizeof(struct pci_version_request)];
1054 	} ctxt;
1055 	int ret;
1056 	int i;
1057 
1058 	init_completion(&comp_pkt.host_event);
1059 
1060 	ctxt.pkt.completion_func = hv_pci_generic_compl;
1061 	ctxt.pkt.compl_ctxt = &comp_pkt;
1062 	version_req = (struct pci_version_request *)&ctxt.pkt.message;
1063 	version_req->message_type.type = PCI_QUERY_PROTOCOL_VERSION;
1064 
1065 	for(i=0; i< num_version; i++) {
1066 		version_req->protocol_version = version[i];
1067 		ret = vmbus_chan_send(hbus->sc->chan,
1068 		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
1069 		    version_req, sizeof(*version_req),
1070 		    (uint64_t)(uintptr_t)&ctxt.pkt);
1071 		if (!ret)
1072 			ret = wait_for_response(hbus, &comp_pkt.host_event);
1073 
1074 		if (ret) {
1075 			device_printf(hbus->pcib,
1076 				"vmbus_pcib failed to request version: %d\n",
1077 				ret);
1078 			goto out;
1079 		}
1080 
1081 		if (comp_pkt.completion_status >= 0) {
1082 			hbus->protocol_version = version[i];
1083 			device_printf(hbus->pcib,
1084 				"PCI VMBus using version 0x%x\n",
1085 				hbus->protocol_version);
1086 			ret = 0;
1087 			goto out;
1088 		}
1089 
1090 		if (comp_pkt.completion_status != STATUS_REVISION_MISMATCH) {
1091 			device_printf(hbus->pcib,
1092 				"vmbus_pcib version negotiation failed: %x\n",
1093 				comp_pkt.completion_status);
1094 			ret = EPROTO;
1095 			goto out;
1096 		}
1097 		reinit_completion(&comp_pkt.host_event);
1098 	}
1099 
1100 	device_printf(hbus->pcib,
1101 		"PCI pass-trhpugh VSP failed to find supported version\n");
1102 out:
1103 	free_completion(&comp_pkt.host_event);
1104 	return (ret);
1105 }
1106 
1107 /* Ask the host to send along the list of child devices */
1108 static int
hv_pci_query_relations(struct hv_pcibus * hbus)1109 hv_pci_query_relations(struct hv_pcibus *hbus)
1110 {
1111 	struct pci_message message;
1112 	int ret;
1113 
1114 	message.type = PCI_QUERY_BUS_RELATIONS;
1115 	ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
1116 	    &message, sizeof(message), 0);
1117 	return (ret);
1118 }
1119 
1120 static int
hv_pci_enter_d0(struct hv_pcibus * hbus)1121 hv_pci_enter_d0(struct hv_pcibus *hbus)
1122 {
1123 	struct pci_bus_d0_entry *d0_entry;
1124 	struct hv_pci_compl comp_pkt;
1125 	struct {
1126 		struct pci_packet pkt;
1127 		uint8_t buffer[sizeof(struct pci_bus_d0_entry)];
1128 	} ctxt;
1129 	int ret;
1130 
1131 	/*
1132 	 * Tell the host that the bus is ready to use, and moved into the
1133 	 * powered-on state.  This includes telling the host which region
1134 	 * of memory-mapped I/O space has been chosen for configuration space
1135 	 * access.
1136 	 */
1137 	init_completion(&comp_pkt.host_event);
1138 
1139 	ctxt.pkt.completion_func = hv_pci_generic_compl;
1140 	ctxt.pkt.compl_ctxt = &comp_pkt;
1141 
1142 	d0_entry = (struct pci_bus_d0_entry *)&ctxt.pkt.message;
1143 	memset(d0_entry, 0, sizeof(*d0_entry));
1144 	d0_entry->message_type.type = PCI_BUS_D0ENTRY;
1145 	d0_entry->mmio_base = rman_get_start(hbus->cfg_res);
1146 
1147 	ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND,
1148 	    VMBUS_CHANPKT_FLAG_RC, d0_entry, sizeof(*d0_entry),
1149 	    (uint64_t)(uintptr_t)&ctxt.pkt);
1150 	if (!ret)
1151 		ret = wait_for_response(hbus, &comp_pkt.host_event);
1152 
1153 	if (ret)
1154 		goto out;
1155 
1156 	if (comp_pkt.completion_status < 0) {
1157 		device_printf(hbus->pcib, "vmbus_pcib failed to enable D0\n");
1158 		ret = EPROTO;
1159 	} else {
1160 		ret = 0;
1161 	}
1162 
1163 out:
1164 	free_completion(&comp_pkt.host_event);
1165 	return (ret);
1166 }
1167 
1168 /*
1169  * It looks this is only needed by Windows VM, but let's send the message too
1170  * just to make the host happy.
1171  */
1172 static int
hv_send_resources_allocated(struct hv_pcibus * hbus)1173 hv_send_resources_allocated(struct hv_pcibus *hbus)
1174 {
1175 	struct pci_resources_assigned *res_assigned;
1176 	struct pci_resources_assigned2 *res_assigned2;
1177 	struct hv_pci_compl comp_pkt;
1178 	struct hv_pci_dev *hpdev;
1179 	struct pci_packet *pkt;
1180 	uint32_t wslot;
1181 	int ret = 0;
1182 	size_t size_res;
1183 
1184 	size_res = (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_4)
1185 			? sizeof(*res_assigned) : sizeof(*res_assigned2);
1186 	pkt = malloc(sizeof(*pkt) + size_res,
1187 	    M_DEVBUF, M_WAITOK | M_ZERO);
1188 
1189 	for (wslot = 0; wslot < 256; wslot++) {
1190 		hpdev = get_pcichild_wslot(hbus, wslot);
1191 		if (!hpdev)
1192 			continue;
1193 
1194 		init_completion(&comp_pkt.host_event);
1195 
1196 		memset(pkt, 0, sizeof(*pkt) + size_res);
1197 		pkt->completion_func = hv_pci_generic_compl;
1198 		pkt->compl_ctxt = &comp_pkt;
1199 
1200 		if (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_4) {
1201 			res_assigned =
1202 			    (struct pci_resources_assigned *)&pkt->message;
1203 			res_assigned->message_type.type =
1204 			    PCI_RESOURCES_ASSIGNED;
1205 			res_assigned->wslot.val = hpdev->desc.wslot.val;
1206 		} else {
1207 			res_assigned2 =
1208 			    (struct pci_resources_assigned2 *)&pkt->message;
1209 			res_assigned2->message_type.type =
1210 			    PCI_RESOURCES_ASSIGNED2;
1211 			res_assigned2->wslot.val = hpdev->desc.wslot.val;
1212 		}
1213 
1214 		ret = vmbus_chan_send(hbus->sc->chan,
1215 		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
1216 		    &pkt->message, size_res,
1217 		    (uint64_t)(uintptr_t)pkt);
1218 		if (!ret)
1219 			ret = wait_for_response(hbus, &comp_pkt.host_event);
1220 
1221 		free_completion(&comp_pkt.host_event);
1222 
1223 		if (ret)
1224 			break;
1225 
1226 		if (comp_pkt.completion_status < 0) {
1227 			ret = EPROTO;
1228 			device_printf(hbus->pcib,
1229 			    "failed to send PCI_RESOURCES_ASSIGNED\n");
1230 			break;
1231 		}
1232 	}
1233 
1234 	free(pkt, M_DEVBUF);
1235 	return (ret);
1236 }
1237 
1238 static int
hv_send_resources_released(struct hv_pcibus * hbus)1239 hv_send_resources_released(struct hv_pcibus *hbus)
1240 {
1241 	struct pci_child_message pkt;
1242 	struct hv_pci_dev *hpdev;
1243 	uint32_t wslot;
1244 	int ret;
1245 
1246 	for (wslot = 0; wslot < 256; wslot++) {
1247 		hpdev = get_pcichild_wslot(hbus, wslot);
1248 		if (!hpdev)
1249 			continue;
1250 
1251 		pkt.message_type.type = PCI_RESOURCES_RELEASED;
1252 		pkt.wslot.val = hpdev->desc.wslot.val;
1253 
1254 		ret = vmbus_chan_send(hbus->sc->chan,
1255 		    VMBUS_CHANPKT_TYPE_INBAND, 0, &pkt, sizeof(pkt), 0);
1256 		if (ret)
1257 			return (ret);
1258 	}
1259 
1260 	return (0);
1261 }
1262 
1263 #define hv_cfg_read(x, s)						\
1264 static inline uint##x##_t hv_cfg_read_##s(struct hv_pcibus *bus,	\
1265     bus_size_t offset)							\
1266 {									\
1267 	return (bus_read_##s(bus->cfg_res, offset));			\
1268 }
1269 
1270 #define hv_cfg_write(x, s)						\
1271 static inline void hv_cfg_write_##s(struct hv_pcibus *bus,		\
1272     bus_size_t offset, uint##x##_t val)					\
1273 {									\
1274 	return (bus_write_##s(bus->cfg_res, offset, val));		\
1275 }
1276 
1277 hv_cfg_read(8, 1)
1278 hv_cfg_read(16, 2)
1279 hv_cfg_read(32, 4)
1280 
1281 hv_cfg_write(8, 1)
1282 hv_cfg_write(16, 2)
1283 hv_cfg_write(32, 4)
1284 
1285 static void
_hv_pcifront_read_config(struct hv_pci_dev * hpdev,int where,int size,uint32_t * val)1286 _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where, int size,
1287     uint32_t *val)
1288 {
1289 	struct hv_pcibus *hbus = hpdev->hbus;
1290 	bus_size_t addr = CFG_PAGE_OFFSET + where;
1291 
1292 	/*
1293 	 * If the attempt is to read the IDs or the ROM BAR, simulate that.
1294 	 */
1295 	if (where + size <= PCIR_COMMAND) {
1296 		memcpy(val, ((uint8_t *)&hpdev->desc.v_id) + where, size);
1297 	} else if (where >= PCIR_REVID && where + size <=
1298 		   PCIR_CACHELNSZ) {
1299 		memcpy(val, ((uint8_t *)&hpdev->desc.rev) + where -
1300 		       PCIR_REVID, size);
1301 	} else if (where >= PCIR_SUBVEND_0 && where + size <=
1302 		   PCIR_BIOS) {
1303 		memcpy(val, (uint8_t *)&hpdev->desc.subsystem_id + where -
1304 		       PCIR_SUBVEND_0, size);
1305 	} else if (where >= PCIR_BIOS && where + size <=
1306 		   PCIR_CAP_PTR) {
1307 		/* ROM BARs are unimplemented */
1308 		*val = 0;
1309 	} else if ((where >= PCIR_INTLINE && where + size <=
1310 		   PCIR_INTPIN) ||(where == PCIR_INTPIN && size == 1)) {
1311 		/*
1312 		 * Interrupt Line and Interrupt PIN are hard-wired to zero
1313 		 * because this front-end only supports message-signaled
1314 		 * interrupts.
1315 		 */
1316 		*val = 0;
1317 	} else if (where + size <= CFG_PAGE_SIZE) {
1318 		mtx_lock(&hbus->config_lock);
1319 
1320 		/* Choose the function to be read. */
1321 		hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val);
1322 
1323 		/* Make sure the function was chosen before we start reading.*/
1324 		mb();
1325 
1326 		/* Read from that function's config space. */
1327 		switch (size) {
1328 		case 1:
1329 			*((uint8_t *)val) = hv_cfg_read_1(hbus, addr);
1330 			break;
1331 		case 2:
1332 			*((uint16_t *)val) = hv_cfg_read_2(hbus, addr);
1333 			break;
1334 		default:
1335 			*((uint32_t *)val) = hv_cfg_read_4(hbus, addr);
1336 			break;
1337 		}
1338 		/*
1339 		 * Make sure the write was done before we release the lock,
1340 		 * allowing consecutive reads/writes.
1341 		 */
1342 		mb();
1343 
1344 		mtx_unlock(&hbus->config_lock);
1345 	} else {
1346 		/* Invalid config read: it's unlikely to reach here. */
1347 		memset(val, 0, size);
1348 	}
1349 }
1350 
1351 static void
_hv_pcifront_write_config(struct hv_pci_dev * hpdev,int where,int size,uint32_t val)1352 _hv_pcifront_write_config(struct hv_pci_dev *hpdev, int where, int size,
1353     uint32_t val)
1354 {
1355 	struct hv_pcibus *hbus = hpdev->hbus;
1356 	bus_size_t addr = CFG_PAGE_OFFSET + where;
1357 
1358 	/* SSIDs and ROM BARs are read-only */
1359 	if (where >= PCIR_SUBVEND_0 && where + size <= PCIR_CAP_PTR)
1360 		return;
1361 
1362 	if (where >= PCIR_COMMAND && where + size <= CFG_PAGE_SIZE) {
1363 		mtx_lock(&hbus->config_lock);
1364 
1365 		/* Choose the function to be written. */
1366 		hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val);
1367 
1368 		/* Make sure the function was chosen before we start writing.*/
1369 		wmb();
1370 
1371 		/* Write to that function's config space. */
1372 		switch (size) {
1373 		case 1:
1374 			hv_cfg_write_1(hbus, addr, (uint8_t)val);
1375 			break;
1376 		case 2:
1377 			hv_cfg_write_2(hbus, addr, (uint16_t)val);
1378 			break;
1379 		default:
1380 			hv_cfg_write_4(hbus, addr, (uint32_t)val);
1381 			break;
1382 		}
1383 
1384 		/*
1385 		 * Make sure the write was done before we release the lock,
1386 		 * allowing consecutive reads/writes.
1387 		 */
1388 		mb();
1389 
1390 		mtx_unlock(&hbus->config_lock);
1391 	} else {
1392 		/* Invalid config write: it's unlikely to reach here. */
1393 		return;
1394 	}
1395 }
1396 
1397 /*
1398  * The vPCI in some Hyper-V releases do not initialize the last 4
1399  * bit of BAR registers. This could result weird problems causing PCI
1400  * code fail to configure BAR correctly.
1401  *
1402  * Just write all 1's to those BARs whose probed values are not zero.
1403  * This seems to make the Hyper-V vPCI and pci_write_bar() to cooperate
1404  * correctly.
1405  */
1406 
1407 static void
vmbus_pcib_prepopulate_bars(struct hv_pcibus * hbus)1408 vmbus_pcib_prepopulate_bars(struct hv_pcibus *hbus)
1409 {
1410 	struct hv_pci_dev *hpdev;
1411 	int i;
1412 
1413 	mtx_lock(&hbus->device_list_lock);
1414 	TAILQ_FOREACH(hpdev, &hbus->children, link) {
1415 		for (i = 0; i < 6; i++) {
1416 			/* Ignore empty bar */
1417 			if (hpdev->probed_bar[i] == 0)
1418 				continue;
1419 
1420 			uint32_t bar_val = 0;
1421 
1422 			_hv_pcifront_read_config(hpdev, PCIR_BAR(i),
1423 			    4, &bar_val);
1424 
1425 			if (hpdev->probed_bar[i] != bar_val) {
1426 				if (bootverbose)
1427 					printf("vmbus_pcib: initialize bar %d "
1428 					    "by writing all 1s\n", i);
1429 
1430 				_hv_pcifront_write_config(hpdev, PCIR_BAR(i),
1431 				    4, 0xffffffff);
1432 
1433 				/* Now write the original value back */
1434 				_hv_pcifront_write_config(hpdev, PCIR_BAR(i),
1435 				    4, bar_val);
1436 			}
1437 		}
1438 	}
1439 	mtx_unlock(&hbus->device_list_lock);
1440 }
1441 
1442 static void
vmbus_pcib_set_detaching(void * arg,int pending __unused)1443 vmbus_pcib_set_detaching(void *arg, int pending __unused)
1444 {
1445 	struct hv_pcibus *hbus = arg;
1446 
1447 	atomic_set_int(&hbus->detaching, 1);
1448 }
1449 
1450 static void
vmbus_pcib_pre_detach(struct hv_pcibus * hbus)1451 vmbus_pcib_pre_detach(struct hv_pcibus *hbus)
1452 {
1453 	struct task task;
1454 
1455 	TASK_INIT(&task, 0, vmbus_pcib_set_detaching, hbus);
1456 
1457 	/*
1458 	 * Make sure the channel callback won't push any possible new
1459 	 * PCI_BUS_RELATIONS and PCI_EJECT tasks to sc->taskq.
1460 	 */
1461 	vmbus_chan_run_task(hbus->sc->chan, &task);
1462 
1463 	taskqueue_drain_all(hbus->sc->taskq);
1464 }
1465 
1466 
1467 /*
1468  * Standard probe entry point.
1469  *
1470  */
1471 static int
vmbus_pcib_probe(device_t dev)1472 vmbus_pcib_probe(device_t dev)
1473 {
1474 	if (VMBUS_PROBE_GUID(device_get_parent(dev), dev,
1475 	    &g_pass_through_dev_type) == 0) {
1476 		device_set_desc(dev, "Hyper-V PCI Express Pass Through");
1477 		return (BUS_PROBE_DEFAULT);
1478 	}
1479 	return (ENXIO);
1480 }
1481 
1482 /*
1483  * Standard attach entry point.
1484  *
1485  */
1486 static int
vmbus_pcib_attach(device_t dev)1487 vmbus_pcib_attach(device_t dev)
1488 {
1489 	const int pci_ring_size = (4 * PAGE_SIZE);
1490 	const struct hyperv_guid *inst_guid;
1491 	struct vmbus_channel *channel;
1492 	struct vmbus_pcib_softc *sc;
1493 	struct hv_pcibus *hbus;
1494 	int rid = 0;
1495 	int ret;
1496 
1497 	hbus = malloc(sizeof(*hbus), M_DEVBUF, M_WAITOK | M_ZERO);
1498 	hbus->pcib = dev;
1499 
1500 	channel = vmbus_get_channel(dev);
1501 	inst_guid = vmbus_chan_guid_inst(channel);
1502 	hbus->pci_domain = inst_guid->hv_guid[9] |
1503 			  (inst_guid->hv_guid[8] << 8);
1504 
1505 	mtx_init(&hbus->config_lock, "hbcfg", NULL, MTX_DEF);
1506 	mtx_init(&hbus->device_list_lock, "hbdl", NULL, MTX_DEF);
1507 	TAILQ_INIT(&hbus->children);
1508 	TAILQ_INIT(&hbus->dr_list);
1509 
1510 	hbus->cfg_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid,
1511 	    0, RM_MAX_END, PCI_CONFIG_MMIO_LENGTH,
1512 	    RF_ACTIVE | rman_make_alignment_flags(PAGE_SIZE));
1513 
1514 	if (!hbus->cfg_res) {
1515 		device_printf(dev, "failed to get resource for cfg window\n");
1516 		ret = ENXIO;
1517 		goto free_bus;
1518 	}
1519 
1520 	sc = device_get_softc(dev);
1521 	sc->chan = channel;
1522 	sc->rx_buf = malloc(PCIB_PACKET_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
1523 	sc->hbus = hbus;
1524 
1525 	/*
1526 	 * The taskq is used to handle PCI_BUS_RELATIONS and PCI_EJECT
1527 	 * messages. NB: we can't handle the messages in the channel callback
1528 	 * directly, because the message handlers need to send new messages
1529 	 * to the host and waits for the host's completion messages, which
1530 	 * must also be handled by the channel callback.
1531 	 */
1532 	sc->taskq = taskqueue_create("vmbus_pcib_tq", M_WAITOK,
1533 	    taskqueue_thread_enqueue, &sc->taskq);
1534 	taskqueue_start_threads(&sc->taskq, 1, PI_NET, "vmbus_pcib_tq");
1535 
1536 	hbus->sc = sc;
1537 
1538 	init_completion(&hbus->query_completion);
1539 	hbus->query_comp = &hbus->query_completion;
1540 
1541 	ret = vmbus_chan_open(sc->chan, pci_ring_size, pci_ring_size,
1542 		NULL, 0, vmbus_pcib_on_channel_callback, sc);
1543 	if (ret)
1544 		goto free_res;
1545 
1546 	ret = hv_pci_protocol_negotiation(hbus, pci_protocol_versions,
1547 	    ARRAY_SIZE(pci_protocol_versions));
1548 	if (ret)
1549 		goto vmbus_close;
1550 
1551 	ret = hv_pci_query_relations(hbus);
1552 	if (!ret)
1553 		ret = wait_for_response(hbus, hbus->query_comp);
1554 
1555 	if (ret)
1556 		goto vmbus_close;
1557 
1558 	ret = hv_pci_enter_d0(hbus);
1559 	if (ret)
1560 		goto vmbus_close;
1561 
1562 	ret = hv_send_resources_allocated(hbus);
1563 	if (ret)
1564 		goto vmbus_close;
1565 
1566 	vmbus_pcib_prepopulate_bars(hbus);
1567 
1568 	hbus->pci_bus = device_add_child(dev, "pci", -1);
1569 	if (!hbus->pci_bus) {
1570 		device_printf(dev, "failed to create pci bus\n");
1571 		ret = ENXIO;
1572 		goto vmbus_close;
1573 	}
1574 
1575 	bus_generic_attach(dev);
1576 
1577 	hbus->state = hv_pcibus_installed;
1578 
1579 	return (0);
1580 
1581 vmbus_close:
1582 	vmbus_pcib_pre_detach(hbus);
1583 	vmbus_chan_close(sc->chan);
1584 free_res:
1585 	taskqueue_free(sc->taskq);
1586 	free_completion(&hbus->query_completion);
1587 	free(sc->rx_buf, M_DEVBUF);
1588 	bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res);
1589 free_bus:
1590 	mtx_destroy(&hbus->device_list_lock);
1591 	mtx_destroy(&hbus->config_lock);
1592 	free(hbus, M_DEVBUF);
1593 	return (ret);
1594 }
1595 
1596 /*
1597  * Standard detach entry point
1598  */
1599 static int
vmbus_pcib_detach(device_t dev)1600 vmbus_pcib_detach(device_t dev)
1601 {
1602 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1603 	struct hv_pcibus *hbus = sc->hbus;
1604 	struct pci_message teardown_packet;
1605 	struct pci_bus_relations relations;
1606 	int ret;
1607 
1608 	vmbus_pcib_pre_detach(hbus);
1609 
1610 	if (hbus->state == hv_pcibus_installed)
1611 		bus_generic_detach(dev);
1612 
1613 	/* Delete any children which might still exist. */
1614 	memset(&relations, 0, sizeof(relations));
1615 	hv_pci_devices_present(hbus, &relations);
1616 
1617 	ret = hv_send_resources_released(hbus);
1618 	if (ret)
1619 		device_printf(dev, "failed to send PCI_RESOURCES_RELEASED\n");
1620 
1621 	teardown_packet.type = PCI_BUS_D0EXIT;
1622 	ret = vmbus_chan_send(sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
1623 	    &teardown_packet, sizeof(struct pci_message), 0);
1624 	if (ret)
1625 		device_printf(dev, "failed to send PCI_BUS_D0EXIT\n");
1626 
1627 	taskqueue_drain_all(hbus->sc->taskq);
1628 	vmbus_chan_close(sc->chan);
1629 	taskqueue_free(sc->taskq);
1630 
1631 	free_completion(&hbus->query_completion);
1632 	free(sc->rx_buf, M_DEVBUF);
1633 	bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res);
1634 
1635 	mtx_destroy(&hbus->device_list_lock);
1636 	mtx_destroy(&hbus->config_lock);
1637 	free(hbus, M_DEVBUF);
1638 
1639 	return (0);
1640 }
1641 
1642 static int
vmbus_pcib_read_ivar(device_t dev,device_t child,int which,uintptr_t * val)1643 vmbus_pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *val)
1644 {
1645 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1646 
1647 	switch (which) {
1648 	case PCIB_IVAR_DOMAIN:
1649 		*val = sc->hbus->pci_domain;
1650 		return (0);
1651 
1652 	case PCIB_IVAR_BUS:
1653 		/* There is only bus 0. */
1654 		*val = 0;
1655 		return (0);
1656 	}
1657 	return (ENOENT);
1658 }
1659 
1660 static int
vmbus_pcib_write_ivar(device_t dev,device_t child,int which,uintptr_t val)1661 vmbus_pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t val)
1662 {
1663 	return (ENOENT);
1664 }
1665 
1666 static struct resource *
vmbus_pcib_alloc_resource(device_t dev,device_t child,int type,int * rid,rman_res_t start,rman_res_t end,rman_res_t count,u_int flags)1667 vmbus_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
1668 	rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
1669 {
1670 	unsigned int bar_no;
1671 	struct hv_pci_dev *hpdev;
1672 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1673 	struct resource *res;
1674 	unsigned int devfn;
1675 
1676 	if (type == PCI_RES_BUS)
1677 		return (pci_domain_alloc_bus(sc->hbus->pci_domain, child, rid,
1678 		    start, end, count, flags));
1679 
1680 	/* Devices with port I/O BAR are not supported. */
1681 	if (type == SYS_RES_IOPORT)
1682 		return (NULL);
1683 
1684 	if (type == SYS_RES_MEMORY) {
1685 		devfn = PCI_DEVFN(pci_get_slot(child),
1686 		    pci_get_function(child));
1687 		hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1688 		if (!hpdev)
1689 			return (NULL);
1690 
1691 		bar_no = PCI_RID2BAR(*rid);
1692 		if (bar_no >= MAX_NUM_BARS)
1693 			return (NULL);
1694 
1695 		/* Make sure a 32-bit BAR gets a 32-bit address */
1696 		if (!(hpdev->probed_bar[bar_no] & PCIM_BAR_MEM_64))
1697 			end = ulmin(end, 0xFFFFFFFF);
1698 	}
1699 
1700 	res = bus_generic_alloc_resource(dev, child, type, rid,
1701 		start, end, count, flags);
1702 	/*
1703 	 * If this is a request for a specific range, assume it is
1704 	 * correct and pass it up to the parent.
1705 	 */
1706 	if (res == NULL && start + count - 1 == end)
1707 		res = bus_generic_alloc_resource(dev, child, type, rid,
1708 		    start, end, count, flags);
1709 	if (res == NULL)
1710 		device_printf(dev, "vmbus_pcib_alloc_resource failed\n");
1711 
1712 	return (res);
1713 }
1714 
1715 static int
vmbus_pcib_adjust_resource(device_t dev,device_t child,struct resource * r,rman_res_t start,rman_res_t end)1716 vmbus_pcib_adjust_resource(device_t dev, device_t child,
1717     struct resource *r, rman_res_t start, rman_res_t end)
1718 {
1719 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1720 
1721 	if (rman_get_type(r) == PCI_RES_BUS)
1722 		return (pci_domain_adjust_bus(sc->hbus->pci_domain, child, r,
1723 		    start, end));
1724 	return (bus_generic_adjust_resource(dev, child, r, start, end));
1725 }
1726 
1727 static int
vmbus_pcib_release_resource(device_t dev,device_t child,struct resource * r)1728 vmbus_pcib_release_resource(device_t dev, device_t child, struct resource *r)
1729 {
1730 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1731 
1732 	switch (rman_get_type(r)) {
1733 	case PCI_RES_BUS:
1734 		return (pci_domain_release_bus(sc->hbus->pci_domain, child, r));
1735 	case SYS_RES_IOPORT:
1736 		return (EINVAL);
1737 	default:
1738 		return (bus_generic_release_resource(dev, child, r));
1739 	}
1740 }
1741 
1742 static int
vmbus_pcib_activate_resource(device_t dev,device_t child,struct resource * r)1743 vmbus_pcib_activate_resource(device_t dev, device_t child, struct resource *r)
1744 {
1745 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1746 
1747 	if (rman_get_type(r) == PCI_RES_BUS)
1748 		return (pci_domain_activate_bus(sc->hbus->pci_domain, child,
1749 		    r));
1750 	return (bus_generic_activate_resource(dev, child, r));
1751 }
1752 
1753 static int
vmbus_pcib_deactivate_resource(device_t dev,device_t child,struct resource * r)1754 vmbus_pcib_deactivate_resource(device_t dev, device_t child, struct resource *r)
1755 {
1756 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1757 
1758 	if (rman_get_type(r) == PCI_RES_BUS)
1759 		return (pci_domain_deactivate_bus(sc->hbus->pci_domain, child,
1760 		    r));
1761 	return (bus_generic_deactivate_resource(dev, child, r));
1762 }
1763 
1764 static int
vmbus_pcib_get_cpus(device_t pcib,device_t dev,enum cpu_sets op,size_t setsize,cpuset_t * cpuset)1765 vmbus_pcib_get_cpus(device_t pcib, device_t dev, enum cpu_sets op,
1766     size_t setsize, cpuset_t *cpuset)
1767 {
1768 	return (bus_get_cpus(pcib, op, setsize, cpuset));
1769 }
1770 
1771 static uint32_t
vmbus_pcib_read_config(device_t dev,u_int bus,u_int slot,u_int func,u_int reg,int bytes)1772 vmbus_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
1773     u_int reg, int bytes)
1774 {
1775 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1776 	struct hv_pci_dev *hpdev;
1777 	unsigned int devfn = PCI_DEVFN(slot, func);
1778 	uint32_t data = 0;
1779 
1780 	KASSERT(bus == 0, ("bus should be 0, but is %u", bus));
1781 
1782 	hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1783 	if (!hpdev)
1784 		return (~0);
1785 
1786 	_hv_pcifront_read_config(hpdev, reg, bytes, &data);
1787 
1788 	return (data);
1789 }
1790 
1791 static void
vmbus_pcib_write_config(device_t dev,u_int bus,u_int slot,u_int func,u_int reg,uint32_t data,int bytes)1792 vmbus_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
1793     u_int reg, uint32_t data, int bytes)
1794 {
1795 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1796 	struct hv_pci_dev *hpdev;
1797 	unsigned int devfn = PCI_DEVFN(slot, func);
1798 
1799 	KASSERT(bus == 0, ("bus should be 0, but is %u", bus));
1800 
1801 	hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1802 	if (!hpdev)
1803 		return;
1804 
1805 	_hv_pcifront_write_config(hpdev, reg, bytes, data);
1806 }
1807 
1808 static int
vmbus_pcib_route_intr(device_t pcib,device_t dev,int pin)1809 vmbus_pcib_route_intr(device_t pcib, device_t dev, int pin)
1810 {
1811 	/* We only support MSI/MSI-X and don't support INTx interrupt. */
1812 	return (PCI_INVALID_IRQ);
1813 }
1814 
1815 static int
vmbus_pcib_alloc_msi(device_t pcib,device_t dev,int count,int maxcount,int * irqs)1816 vmbus_pcib_alloc_msi(device_t pcib, device_t dev, int count,
1817     int maxcount, int *irqs)
1818 {
1819 #if defined(__amd64__) || defined(__i386__)
1820 	return (PCIB_ALLOC_MSI(device_get_parent(pcib), dev, count, maxcount,
1821 	    irqs));
1822 #endif
1823 #if defined(__aarch64__)
1824 	return (intr_alloc_msi(pcib, dev, ACPI_MSI_XREF, count, maxcount,
1825 	    irqs));
1826 #endif
1827 }
1828 
1829 static int
vmbus_pcib_release_msi(device_t pcib,device_t dev,int count,int * irqs)1830 vmbus_pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs)
1831 {
1832 #if defined(__amd64__) || defined(__i386__)
1833 	return (PCIB_RELEASE_MSI(device_get_parent(pcib), dev, count, irqs));
1834 #endif
1835 #if defined(__aarch64__)
1836 	return(intr_release_msi(pcib, dev, ACPI_MSI_XREF, count, irqs));
1837 #endif
1838 }
1839 
1840 static int
vmbus_pcib_alloc_msix(device_t pcib,device_t dev,int * irq)1841 vmbus_pcib_alloc_msix(device_t pcib, device_t dev, int *irq)
1842 {
1843 #if defined(__aarch64__)
1844 	int ret;
1845 #if defined(INTRNG)
1846 	ret = intr_alloc_msix(pcib, dev, ACPI_MSI_XREF, irq);
1847 	return ret;
1848 #else
1849     return (ENXIO);
1850 #endif
1851 #else
1852 	return (PCIB_ALLOC_MSIX(device_get_parent(pcib), dev, irq));
1853 #endif /* __aarch64__ */
1854 }
1855 
1856 static int
vmbus_pcib_release_msix(device_t pcib,device_t dev,int irq)1857 vmbus_pcib_release_msix(device_t pcib, device_t dev, int irq)
1858 {
1859 #if defined(__aarch64__)
1860 	return (intr_release_msix(pcib, dev, ACPI_MSI_XREF, irq));
1861 #else
1862 	return (PCIB_RELEASE_MSIX(device_get_parent(pcib), dev, irq));
1863 #endif /* __aarch64__ */
1864 }
1865 
1866 #if defined(__aarch64__)
1867 #define	MSI_INTEL_ADDR_DEST	0x00000000
1868 #define	MSI_INTEL_DATA_DELFIXED 0x0
1869 #endif
1870 #if defined(__amd64__) || defined(__i386__)
1871 #define MSI_INTEL_ADDR_DEST 0x000ff000
1872 #define MSI_INTEL_DATA_INTVEC   IOART_INTVEC    /* Interrupt vector. */
1873 #define MSI_INTEL_DATA_DELFIXED IOART_DELFIXED
1874 #endif
1875 
1876 static int
vmbus_pcib_map_msi(device_t pcib,device_t child,int irq,uint64_t * addr,uint32_t * data)1877 vmbus_pcib_map_msi(device_t pcib, device_t child, int irq,
1878     uint64_t *addr, uint32_t *data)
1879 {
1880 	unsigned int devfn;
1881 	struct hv_pci_dev *hpdev;
1882 
1883 	uint64_t v_addr;
1884 	uint32_t v_data;
1885 	struct hv_irq_desc *hid, *tmp_hid;
1886 	unsigned int cpu, vcpu_id;
1887 	unsigned int vector;
1888 
1889 	struct vmbus_pcib_softc *sc = device_get_softc(pcib);
1890 	struct compose_comp_ctxt comp;
1891 	struct {
1892 		struct pci_packet pkt;
1893 		union {
1894 			struct pci_create_interrupt v1;
1895 			struct pci_create_interrupt3 v3;
1896 		}int_pkts;
1897 	} ctxt;
1898 	int ret;
1899 	uint32_t size;
1900 
1901 	devfn = PCI_DEVFN(pci_get_slot(child), pci_get_function(child));
1902 	hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1903 	if (!hpdev)
1904 		return (ENOENT);
1905 #if defined(__aarch64__)
1906 	ret = intr_map_msi(pcib, child, ACPI_MSI_XREF, irq,
1907 	    &v_addr, &v_data);
1908 #else
1909 	ret = PCIB_MAP_MSI(device_get_parent(pcib), child, irq,
1910             &v_addr, &v_data);
1911 #endif
1912 	if (ret)
1913 		return (ret);
1914 
1915 	TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid) {
1916 		if (hid->irq == irq) {
1917 			TAILQ_REMOVE(&hpdev->irq_desc_list, hid, link);
1918 			hv_int_desc_free(hpdev, hid);
1919 			break;
1920 		}
1921 	}
1922 
1923 #if defined(__aarch64__)
1924 	cpu = 0;
1925 	vcpu_id = VMBUS_GET_VCPU_ID(device_get_parent(pcib), pcib, cpu);
1926 	vector = v_data;
1927 #else
1928 	cpu = apic_cpuid((v_addr & MSI_INTEL_ADDR_DEST) >> 12);
1929 	vcpu_id = VMBUS_GET_VCPU_ID(device_get_parent(pcib), pcib, cpu);
1930 	vector = v_data & MSI_INTEL_DATA_INTVEC;
1931 #endif
1932 
1933 	if (hpdev->hbus->protocol_version < PCI_PROTOCOL_VERSION_1_4 &&
1934 	    vcpu_id > 63) {
1935 		/* We only support vcpu_id < 64 before vPCI version 1.4 */
1936 		device_printf(pcib,
1937 		    "Error: "
1938 		    "vcpu_id %u overflowed on PCI VMBus version 0x%x\n",
1939 		    vcpu_id, hpdev->hbus->protocol_version);
1940 		return (ENODEV);
1941 	}
1942 
1943 	init_completion(&comp.comp_pkt.host_event);
1944 
1945 	memset(&ctxt, 0, sizeof(ctxt));
1946 	ctxt.pkt.completion_func = hv_pci_compose_compl;
1947 	ctxt.pkt.compl_ctxt = &comp;
1948 	switch (hpdev->hbus->protocol_version) {
1949 	case PCI_PROTOCOL_VERSION_1_1:
1950 		ctxt.int_pkts.v1.message_type.type =
1951 		    PCI_CREATE_INTERRUPT_MESSAGE;
1952 		ctxt.int_pkts.v1.wslot.val = hpdev->desc.wslot.val;
1953 		ctxt.int_pkts.v1.int_desc.vector = vector;
1954 		ctxt.int_pkts.v1.int_desc.vector_count = 1;
1955 		ctxt.int_pkts.v1.int_desc.delivery_mode =
1956 		    MSI_INTEL_DATA_DELFIXED;
1957 		ctxt.int_pkts.v1.int_desc.cpu_mask = 1ULL << vcpu_id;
1958 		size = sizeof(ctxt.int_pkts.v1);
1959 		break;
1960 
1961 	case PCI_PROTOCOL_VERSION_1_4:
1962 		ctxt.int_pkts.v3.message_type.type =
1963 		    PCI_CREATE_INTERRUPT_MESSAGE3;
1964 		ctxt.int_pkts.v3.wslot.val = hpdev->desc.wslot.val;
1965 		ctxt.int_pkts.v3.int_desc.vector = vector;
1966 		ctxt.int_pkts.v3.int_desc.vector_count = 1;
1967 		ctxt.int_pkts.v3.int_desc.reserved = 0;
1968 		ctxt.int_pkts.v3.int_desc.delivery_mode =
1969 		    MSI_INTEL_DATA_DELFIXED;
1970 		ctxt.int_pkts.v3.int_desc.processor_count = 1;
1971 		ctxt.int_pkts.v3.int_desc.processor_array[0] = vcpu_id;
1972 		size = sizeof(ctxt.int_pkts.v3);
1973 		break;
1974 	}
1975 	ret = vmbus_chan_send(sc->chan,	VMBUS_CHANPKT_TYPE_INBAND,
1976 	    VMBUS_CHANPKT_FLAG_RC, &ctxt.int_pkts, size,
1977 	    (uint64_t)(uintptr_t)&ctxt.pkt);
1978 	if (ret) {
1979 		free_completion(&comp.comp_pkt.host_event);
1980 		return (ret);
1981 	}
1982 
1983 	wait_for_completion(&comp.comp_pkt.host_event);
1984 	free_completion(&comp.comp_pkt.host_event);
1985 
1986 	if (comp.comp_pkt.completion_status < 0) {
1987 		device_printf(pcib,
1988 		    "vmbus_pcib_map_msi completion_status %d\n",
1989 		    comp.comp_pkt.completion_status);
1990 		return (EPROTO);
1991 	}
1992 
1993 	*addr = comp.int_desc.address;
1994 	*data = comp.int_desc.data;
1995 
1996 	hid = malloc(sizeof(struct hv_irq_desc), M_DEVBUF, M_WAITOK | M_ZERO);
1997 	hid->irq = irq;
1998 	hid->desc = comp.int_desc;
1999 	TAILQ_INSERT_TAIL(&hpdev->irq_desc_list, hid, link);
2000 
2001 	return (0);
2002 }
2003 
2004 static device_method_t vmbus_pcib_methods[] = {
2005 	/* Device interface */
2006 	DEVMETHOD(device_probe,         vmbus_pcib_probe),
2007 	DEVMETHOD(device_attach,        vmbus_pcib_attach),
2008 	DEVMETHOD(device_detach,        vmbus_pcib_detach),
2009 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
2010 	DEVMETHOD(device_suspend,	bus_generic_suspend),
2011 	DEVMETHOD(device_resume,	bus_generic_resume),
2012 
2013 	/* Bus interface */
2014 	DEVMETHOD(bus_read_ivar,		vmbus_pcib_read_ivar),
2015 	DEVMETHOD(bus_write_ivar,		vmbus_pcib_write_ivar),
2016 	DEVMETHOD(bus_alloc_resource,		vmbus_pcib_alloc_resource),
2017 	DEVMETHOD(bus_adjust_resource,		vmbus_pcib_adjust_resource),
2018 	DEVMETHOD(bus_release_resource,		vmbus_pcib_release_resource),
2019 	DEVMETHOD(bus_activate_resource,   	vmbus_pcib_activate_resource),
2020 	DEVMETHOD(bus_deactivate_resource, 	vmbus_pcib_deactivate_resource),
2021 	DEVMETHOD(bus_setup_intr,	   bus_generic_setup_intr),
2022 	DEVMETHOD(bus_teardown_intr,	   bus_generic_teardown_intr),
2023 	DEVMETHOD(bus_get_cpus,			vmbus_pcib_get_cpus),
2024 
2025 	/* pcib interface */
2026 	DEVMETHOD(pcib_maxslots,		pcib_maxslots),
2027 	DEVMETHOD(pcib_read_config,		vmbus_pcib_read_config),
2028 	DEVMETHOD(pcib_write_config,		vmbus_pcib_write_config),
2029 	DEVMETHOD(pcib_route_interrupt,		vmbus_pcib_route_intr),
2030 	DEVMETHOD(pcib_alloc_msi,		vmbus_pcib_alloc_msi),
2031 	DEVMETHOD(pcib_release_msi,		vmbus_pcib_release_msi),
2032 	DEVMETHOD(pcib_alloc_msix,		vmbus_pcib_alloc_msix),
2033 	DEVMETHOD(pcib_release_msix,		vmbus_pcib_release_msix),
2034 	DEVMETHOD(pcib_map_msi,			vmbus_pcib_map_msi),
2035 	DEVMETHOD(pcib_request_feature,		pcib_request_feature_allow),
2036 
2037 	DEVMETHOD_END
2038 };
2039 
2040 DEFINE_CLASS_0(pcib, vmbus_pcib_driver, vmbus_pcib_methods,
2041 		sizeof(struct vmbus_pcib_softc));
2042 DRIVER_MODULE(vmbus_pcib, vmbus, vmbus_pcib_driver, 0, 0);
2043 MODULE_DEPEND(vmbus_pcib, vmbus, 1, 1, 1);
2044 MODULE_DEPEND(vmbus_pcib, pci, 1, 1, 1);
2045 
2046 #endif /* NEW_PCIB */
2047