xref: /freebsd/sys/dev/hyperv/pcib/vmbus_pcib.c (revision 4e8d558c)
1 /*-
2  * Copyright (c) 2016-2017 Microsoft Corp.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #ifdef NEW_PCIB
31 #include "opt_acpi.h"
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/types.h>
36 #include <sys/malloc.h>
37 #include <sys/module.h>
38 #include <sys/kernel.h>
39 #include <sys/queue.h>
40 #include <sys/lock.h>
41 #include <sys/sx.h>
42 #include <sys/smp.h>
43 #include <sys/sysctl.h>
44 #include <sys/bus.h>
45 #include <sys/rman.h>
46 #include <sys/mutex.h>
47 #include <sys/errno.h>
48 
49 #include <vm/vm.h>
50 #include <vm/vm_param.h>
51 #include <vm/vm_kern.h>
52 #include <vm/pmap.h>
53 
54 #if defined(__aarch64__)
55 #include <arm64/include/intr.h>
56 #endif
57 #include <machine/atomic.h>
58 #include <machine/bus.h>
59 #include <machine/frame.h>
60 #include <machine/pci_cfgreg.h>
61 #include <machine/resource.h>
62 
63 #include <sys/pciio.h>
64 #include <dev/pci/pcireg.h>
65 #include <dev/pci/pcivar.h>
66 #include <dev/pci/pci_private.h>
67 #include <dev/pci/pcib_private.h>
68 #include "pcib_if.h"
69 #if defined(__i386__) || defined(__amd64__)
70 #include <machine/intr_machdep.h>
71 #include <x86/apicreg.h>
72 #endif
73 #if defined(__aarch64__)
74 #include <contrib/dev/acpica/include/acpi.h>
75 #include <contrib/dev/acpica/include/accommon.h>
76 #include <dev/acpica/acpivar.h>
77 #include <dev/acpica/acpi_pcibvar.h>
78 #endif
79 #include <dev/hyperv/include/hyperv.h>
80 #include <dev/hyperv/include/vmbus_xact.h>
81 #include <dev/hyperv/vmbus/vmbus_reg.h>
82 #include <dev/hyperv/vmbus/vmbus_chanvar.h>
83 
84 #include "vmbus_if.h"
85 
86 struct completion {
87 	unsigned int done;
88 	struct mtx lock;
89 };
90 
91 static void
92 init_completion(struct completion *c)
93 {
94 	memset(c, 0, sizeof(*c));
95 	mtx_init(&c->lock, "hvcmpl", NULL, MTX_DEF);
96 	c->done = 0;
97 }
98 static void
99 reinit_completion(struct completion *c)
100 {
101 	c->done = 0;
102 }
103 static void
104 free_completion(struct completion *c)
105 {
106 	mtx_destroy(&c->lock);
107 }
108 
109 static void
110 complete(struct completion *c)
111 {
112 	mtx_lock(&c->lock);
113 	c->done++;
114 	mtx_unlock(&c->lock);
115 	wakeup(c);
116 }
117 
118 static void
119 wait_for_completion(struct completion *c)
120 {
121 	mtx_lock(&c->lock);
122 	while (c->done == 0)
123 		mtx_sleep(c, &c->lock, 0, "hvwfc", 0);
124 	c->done--;
125 	mtx_unlock(&c->lock);
126 }
127 
128 /*
129  * Return: 0 if completed, a non-zero value if timed out.
130  */
131 static int
132 wait_for_completion_timeout(struct completion *c, int timeout)
133 {
134 	int ret;
135 
136 	mtx_lock(&c->lock);
137 
138 	if (c->done == 0)
139 		mtx_sleep(c, &c->lock, 0, "hvwfc", timeout);
140 
141 	if (c->done > 0) {
142 		c->done--;
143 		ret = 0;
144 	} else {
145 		ret = 1;
146 	}
147 
148 	mtx_unlock(&c->lock);
149 
150 	return (ret);
151 }
152 
153 #define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
154 #define PCI_MAKE_VERSION(major, minor) ((uint32_t)(((major) << 16) | (minor)))
155 
156 enum pci_protocol_version_t {
157 	PCI_PROTOCOL_VERSION_1_1 = PCI_MAKE_VERSION(1, 1),
158 	PCI_PROTOCOL_VERSION_1_4 = PCI_MAKE_VERSION(1, 4),
159 };
160 
161 static enum pci_protocol_version_t pci_protocol_versions[] = {
162 	PCI_PROTOCOL_VERSION_1_4,
163 	PCI_PROTOCOL_VERSION_1_1,
164 };
165 
166 #define PCI_CONFIG_MMIO_LENGTH	0x2000
167 #define CFG_PAGE_OFFSET 0x1000
168 #define CFG_PAGE_SIZE (PCI_CONFIG_MMIO_LENGTH - CFG_PAGE_OFFSET)
169 
170 /*
171  * Message Types
172  */
173 
174 enum pci_message_type {
175 	/*
176 	 * Version 1.1
177 	 */
178 	PCI_MESSAGE_BASE                = 0x42490000,
179 	PCI_BUS_RELATIONS               = PCI_MESSAGE_BASE + 0,
180 	PCI_QUERY_BUS_RELATIONS         = PCI_MESSAGE_BASE + 1,
181 	PCI_POWER_STATE_CHANGE          = PCI_MESSAGE_BASE + 4,
182 	PCI_QUERY_RESOURCE_REQUIREMENTS = PCI_MESSAGE_BASE + 5,
183 	PCI_QUERY_RESOURCE_RESOURCES    = PCI_MESSAGE_BASE + 6,
184 	PCI_BUS_D0ENTRY                 = PCI_MESSAGE_BASE + 7,
185 	PCI_BUS_D0EXIT                  = PCI_MESSAGE_BASE + 8,
186 	PCI_READ_BLOCK                  = PCI_MESSAGE_BASE + 9,
187 	PCI_WRITE_BLOCK                 = PCI_MESSAGE_BASE + 0xA,
188 	PCI_EJECT                       = PCI_MESSAGE_BASE + 0xB,
189 	PCI_QUERY_STOP                  = PCI_MESSAGE_BASE + 0xC,
190 	PCI_REENABLE                    = PCI_MESSAGE_BASE + 0xD,
191 	PCI_QUERY_STOP_FAILED           = PCI_MESSAGE_BASE + 0xE,
192 	PCI_EJECTION_COMPLETE           = PCI_MESSAGE_BASE + 0xF,
193 	PCI_RESOURCES_ASSIGNED          = PCI_MESSAGE_BASE + 0x10,
194 	PCI_RESOURCES_RELEASED          = PCI_MESSAGE_BASE + 0x11,
195 	PCI_INVALIDATE_BLOCK            = PCI_MESSAGE_BASE + 0x12,
196 	PCI_QUERY_PROTOCOL_VERSION      = PCI_MESSAGE_BASE + 0x13,
197 	PCI_CREATE_INTERRUPT_MESSAGE    = PCI_MESSAGE_BASE + 0x14,
198 	PCI_DELETE_INTERRUPT_MESSAGE    = PCI_MESSAGE_BASE + 0x15,
199 	PCI_RESOURCES_ASSIGNED2         = PCI_MESSAGE_BASE + 0x16,
200 	PCI_CREATE_INTERRUPT_MESSAGE2   = PCI_MESSAGE_BASE + 0x17,
201 	PCI_DELETE_INTERRUPT_MESSAGE2   = PCI_MESSAGE_BASE + 0x18, /* unused */
202 	PCI_BUS_RELATIONS2              = PCI_MESSAGE_BASE + 0x19,
203 	PCI_RESOURCES_ASSIGNED3         = PCI_MESSAGE_BASE + 0x1A,
204 	PCI_CREATE_INTERRUPT_MESSAGE3   = PCI_MESSAGE_BASE + 0x1B,
205 	PCI_MESSAGE_MAXIMUM
206 };
207 
208 #define STATUS_REVISION_MISMATCH 0xC0000059
209 
210 /*
211  * Structures defining the virtual PCI Express protocol.
212  */
213 
214 union pci_version {
215 	struct {
216 		uint16_t minor_version;
217 		uint16_t major_version;
218 	} parts;
219 	uint32_t version;
220 } __packed;
221 
222 /*
223  * This representation is the one used in Windows, which is
224  * what is expected when sending this back and forth with
225  * the Hyper-V parent partition.
226  */
227 union win_slot_encoding {
228 	struct {
229 		uint32_t	slot:5;
230 		uint32_t	func:3;
231 		uint32_t	reserved:24;
232 	} bits;
233 	uint32_t val;
234 } __packed;
235 
236 struct pci_func_desc {
237 	uint16_t	v_id;	/* vendor ID */
238 	uint16_t	d_id;	/* device ID */
239 	uint8_t		rev;
240 	uint8_t		prog_intf;
241 	uint8_t		subclass;
242 	uint8_t		base_class;
243 	uint32_t	subsystem_id;
244 	union win_slot_encoding wslot;
245 	uint32_t	ser;	/* serial number */
246 } __packed;
247 
248 struct pci_func_desc2 {
249 	uint16_t	v_id;	/* vendor ID */
250 	uint16_t	d_id;	/* device ID */
251 	uint8_t		rev;
252 	uint8_t		prog_intf;
253 	uint8_t		subclass;
254 	uint8_t		base_class;
255 	uint32_t	subsystem_id;
256 	union		win_slot_encoding wslot;
257 	uint32_t	ser;	/* serial number */
258 	uint32_t	flags;
259 	uint16_t	virtual_numa_node;
260 	uint16_t	reserved;
261 } __packed;
262 
263 
264 struct hv_msi_desc {
265 	uint8_t		vector;
266 	uint8_t		delivery_mode;
267 	uint16_t	vector_count;
268 	uint32_t	reserved;
269 	uint64_t	cpu_mask;
270 } __packed;
271 
272 struct hv_msi_desc3 {
273 	uint32_t	vector;
274 	uint8_t		delivery_mode;
275 	uint8_t		reserved;
276 	uint16_t	vector_count;
277 	uint16_t	processor_count;
278 	uint16_t	processor_array[32];
279 } __packed;
280 
281 struct tran_int_desc {
282 	uint16_t	reserved;
283 	uint16_t	vector_count;
284 	uint32_t	data;
285 	uint64_t	address;
286 } __packed;
287 
288 struct pci_message {
289 	uint32_t type;
290 } __packed;
291 
292 struct pci_child_message {
293 	struct pci_message message_type;
294 	union win_slot_encoding wslot;
295 } __packed;
296 
297 struct pci_incoming_message {
298 	struct vmbus_chanpkt_hdr hdr;
299 	struct pci_message message_type;
300 } __packed;
301 
302 struct pci_response {
303 	struct vmbus_chanpkt_hdr hdr;
304 	int32_t status;	/* negative values are failures */
305 } __packed;
306 
307 struct pci_packet {
308 	void (*completion_func)(void *context, struct pci_response *resp,
309 	    int resp_packet_size);
310 	void *compl_ctxt;
311 
312 	struct pci_message message[0];
313 };
314 
315 /*
316  * Specific message types supporting the PCI protocol.
317  */
318 
319 struct pci_version_request {
320 	struct pci_message message_type;
321 	uint32_t protocol_version;
322 	uint32_t reservedz:31;
323 } __packed;
324 
325 struct pci_bus_d0_entry {
326 	struct pci_message message_type;
327 	uint32_t reserved;
328 	uint64_t mmio_base;
329 } __packed;
330 
331 struct pci_bus_relations {
332 	struct pci_incoming_message incoming;
333 	uint32_t device_count;
334 	struct pci_func_desc func[0];
335 } __packed;
336 
337 struct pci_bus_relations2 {
338 	struct pci_incoming_message incoming;
339 	uint32_t device_count;
340 	struct pci_func_desc2 func[0];
341 } __packed;
342 
343 #define MAX_NUM_BARS	(PCIR_MAX_BAR_0 + 1)
344 struct pci_q_res_req_response {
345 	struct vmbus_chanpkt_hdr hdr;
346 	int32_t status; /* negative values are failures */
347 	uint32_t probed_bar[MAX_NUM_BARS];
348 } __packed;
349 
350 struct pci_resources_assigned {
351 	struct pci_message message_type;
352 	union win_slot_encoding wslot;
353 	uint8_t memory_range[0x14][MAX_NUM_BARS]; /* unused here */
354 	uint32_t msi_descriptors;
355 	uint32_t reserved[4];
356 } __packed;
357 
358 struct pci_resources_assigned2 {
359 	struct pci_message message_type;
360 	union win_slot_encoding wslot;
361 	uint8_t memory_range[0x14][6];   /* not used here */
362 	uint32_t msi_descriptor_count;
363 	uint8_t reserved[70];
364 } __packed;
365 
366 struct pci_create_interrupt {
367 	struct pci_message message_type;
368 	union win_slot_encoding wslot;
369 	struct hv_msi_desc int_desc;
370 } __packed;
371 
372 struct pci_create_interrupt3 {
373 	struct pci_message message_type;
374 	union win_slot_encoding wslot;
375 	struct hv_msi_desc3 int_desc;
376 } __packed;
377 
378 struct pci_create_int_response {
379 	struct pci_response response;
380 	uint32_t reserved;
381 	struct tran_int_desc int_desc;
382 } __packed;
383 
384 struct pci_delete_interrupt {
385 	struct pci_message message_type;
386 	union win_slot_encoding wslot;
387 	struct tran_int_desc int_desc;
388 } __packed;
389 
390 struct pci_dev_incoming {
391 	struct pci_incoming_message incoming;
392 	union win_slot_encoding wslot;
393 } __packed;
394 
395 struct pci_eject_response {
396 	struct pci_message message_type;
397 	union win_slot_encoding wslot;
398 	uint32_t status;
399 } __packed;
400 
401 /*
402  * Driver specific state.
403  */
404 
405 enum hv_pcibus_state {
406 	hv_pcibus_init = 0,
407 	hv_pcibus_installed,
408 };
409 
410 struct hv_pcibus {
411 	device_t pcib;
412 	device_t pci_bus;
413 	struct vmbus_pcib_softc *sc;
414 
415 	uint16_t pci_domain;
416 
417 	enum hv_pcibus_state state;
418 
419 	struct resource *cfg_res;
420 
421 	struct completion query_completion, *query_comp;
422 
423 	struct mtx config_lock; /* Avoid two threads writing index page */
424 	struct mtx device_list_lock;    /* Protect lists below */
425 	uint32_t protocol_version;
426 	TAILQ_HEAD(, hv_pci_dev) children;
427 	TAILQ_HEAD(, hv_dr_state) dr_list;
428 
429 	volatile int detaching;
430 };
431 
432 struct hv_pcidev_desc {
433 	uint16_t v_id;	/* vendor ID */
434 	uint16_t d_id;	/* device ID */
435 	uint8_t rev;
436 	uint8_t prog_intf;
437 	uint8_t subclass;
438 	uint8_t base_class;
439 	uint32_t subsystem_id;
440 	union win_slot_encoding wslot;
441 	uint32_t ser;	/* serial number */
442 	uint32_t flags;
443 	uint16_t virtual_numa_node;
444 } __packed;
445 
446 struct hv_pci_dev {
447 	TAILQ_ENTRY(hv_pci_dev) link;
448 
449 	struct hv_pcidev_desc desc;
450 
451 	bool reported_missing;
452 
453 	struct hv_pcibus *hbus;
454 	struct task eject_task;
455 
456 	TAILQ_HEAD(, hv_irq_desc) irq_desc_list;
457 
458 	/*
459 	 * What would be observed if one wrote 0xFFFFFFFF to a BAR and then
460 	 * read it back, for each of the BAR offsets within config space.
461 	 */
462 	uint32_t probed_bar[MAX_NUM_BARS];
463 };
464 
465 /*
466  * Tracks "Device Relations" messages from the host, which must be both
467  * processed in order.
468  */
469 struct hv_dr_work {
470 	struct task task;
471 	struct hv_pcibus *bus;
472 };
473 
474 struct hv_dr_state {
475 	TAILQ_ENTRY(hv_dr_state) link;
476 	uint32_t device_count;
477 	struct hv_pcidev_desc func[0];
478 };
479 
480 struct hv_irq_desc {
481 	TAILQ_ENTRY(hv_irq_desc) link;
482 	struct tran_int_desc desc;
483 	int irq;
484 };
485 
486 #define PCI_DEVFN(slot, func)   ((((slot) & 0x1f) << 3) | ((func) & 0x07))
487 #define PCI_SLOT(devfn)         (((devfn) >> 3) & 0x1f)
488 #define PCI_FUNC(devfn)         ((devfn) & 0x07)
489 
490 static uint32_t
491 devfn_to_wslot(unsigned int devfn)
492 {
493 	union win_slot_encoding wslot;
494 
495 	wslot.val = 0;
496 	wslot.bits.slot = PCI_SLOT(devfn);
497 	wslot.bits.func = PCI_FUNC(devfn);
498 
499 	return (wslot.val);
500 }
501 
502 static unsigned int
503 wslot_to_devfn(uint32_t wslot)
504 {
505 	union win_slot_encoding encoding;
506 	unsigned int slot;
507 	unsigned int func;
508 
509 	encoding.val = wslot;
510 
511 	slot = encoding.bits.slot;
512 	func = encoding.bits.func;
513 
514 	return (PCI_DEVFN(slot, func));
515 }
516 
517 struct vmbus_pcib_softc {
518 	struct vmbus_channel	*chan;
519 	void *rx_buf;
520 
521 	struct taskqueue	*taskq;
522 
523 	struct hv_pcibus	*hbus;
524 };
525 
526 /* {44C4F61D-4444-4400-9D52-802E27EDE19F} */
527 static const struct hyperv_guid g_pass_through_dev_type = {
528 	.hv_guid = {0x1D, 0xF6, 0xC4, 0x44, 0x44, 0x44, 0x00, 0x44,
529 	    0x9D, 0x52, 0x80, 0x2E, 0x27, 0xED, 0xE1, 0x9F}
530 };
531 
532 struct hv_pci_compl {
533 	struct completion host_event;
534 	int32_t completion_status;
535 };
536 
537 struct q_res_req_compl {
538 	struct completion host_event;
539 	struct hv_pci_dev *hpdev;
540 };
541 
542 struct compose_comp_ctxt {
543 	struct hv_pci_compl comp_pkt;
544 	struct tran_int_desc int_desc;
545 };
546 
547 /*
548  * It is possible the device is revoked during initialization.
549  * Check if this happens during wait.
550  * Return: 0 if response arrived, ENODEV if device revoked.
551  */
552 static int
553 wait_for_response(struct hv_pcibus *hbus, struct completion *c)
554 {
555 	do {
556 		if (vmbus_chan_is_revoked(hbus->sc->chan)) {
557 			device_printf(hbus->pcib,
558 			    "The device is revoked.\n");
559 			return (ENODEV);
560 		}
561 	} while (wait_for_completion_timeout(c, hz /10) != 0);
562 
563 	return 0;
564 }
565 
566 static void
567 hv_pci_generic_compl(void *context, struct pci_response *resp,
568     int resp_packet_size)
569 {
570 	struct hv_pci_compl *comp_pkt = context;
571 
572 	if (resp_packet_size >= sizeof(struct pci_response))
573 		comp_pkt->completion_status = resp->status;
574 	else
575 		comp_pkt->completion_status = -1;
576 
577 	complete(&comp_pkt->host_event);
578 }
579 
580 static void
581 q_resource_requirements(void *context, struct pci_response *resp,
582     int resp_packet_size)
583 {
584 	struct q_res_req_compl *completion = context;
585 	struct pci_q_res_req_response *q_res_req =
586 	    (struct pci_q_res_req_response *)resp;
587 	int i;
588 
589 	if (resp->status < 0) {
590 		printf("vmbus_pcib: failed to query resource requirements\n");
591 	} else {
592 		for (i = 0; i < MAX_NUM_BARS; i++)
593 			completion->hpdev->probed_bar[i] =
594 			    q_res_req->probed_bar[i];
595 	}
596 
597 	complete(&completion->host_event);
598 }
599 
600 static void
601 hv_pci_compose_compl(void *context, struct pci_response *resp,
602     int resp_packet_size)
603 {
604 	struct compose_comp_ctxt *comp_pkt = context;
605 	struct pci_create_int_response *int_resp =
606 	    (struct pci_create_int_response *)resp;
607 
608 	comp_pkt->comp_pkt.completion_status = resp->status;
609 	comp_pkt->int_desc = int_resp->int_desc;
610 	complete(&comp_pkt->comp_pkt.host_event);
611 }
612 
613 static void
614 hv_int_desc_free(struct hv_pci_dev *hpdev, struct hv_irq_desc *hid)
615 {
616 	struct pci_delete_interrupt *int_pkt;
617 	struct {
618 		struct pci_packet pkt;
619 		uint8_t buffer[sizeof(struct pci_delete_interrupt)];
620 	} ctxt;
621 
622 	memset(&ctxt, 0, sizeof(ctxt));
623 	int_pkt = (struct pci_delete_interrupt *)&ctxt.pkt.message;
624 	int_pkt->message_type.type = PCI_DELETE_INTERRUPT_MESSAGE;
625 	int_pkt->wslot.val = hpdev->desc.wslot.val;
626 	int_pkt->int_desc = hid->desc;
627 
628 	vmbus_chan_send(hpdev->hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
629 	    int_pkt, sizeof(*int_pkt), 0);
630 
631 	free(hid, M_DEVBUF);
632 }
633 
634 static void
635 hv_pci_delete_device(struct hv_pci_dev *hpdev)
636 {
637 	struct hv_pcibus *hbus = hpdev->hbus;
638 	struct hv_irq_desc *hid, *tmp_hid;
639 	device_t pci_dev;
640 	int devfn;
641 
642 	devfn = wslot_to_devfn(hpdev->desc.wslot.val);
643 
644 	bus_topo_lock();
645 
646 	pci_dev = pci_find_dbsf(hbus->pci_domain,
647 	    0, PCI_SLOT(devfn), PCI_FUNC(devfn));
648 	if (pci_dev)
649 		device_delete_child(hbus->pci_bus, pci_dev);
650 
651 	bus_topo_unlock();
652 
653 	mtx_lock(&hbus->device_list_lock);
654 	TAILQ_REMOVE(&hbus->children, hpdev, link);
655 	mtx_unlock(&hbus->device_list_lock);
656 
657 	TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid)
658 		hv_int_desc_free(hpdev, hid);
659 
660 	free(hpdev, M_DEVBUF);
661 }
662 
663 static struct hv_pci_dev *
664 new_pcichild_device(struct hv_pcibus *hbus, struct hv_pcidev_desc *desc)
665 {
666 	struct hv_pci_dev *hpdev;
667 	struct pci_child_message *res_req;
668 	struct q_res_req_compl comp_pkt;
669 	struct {
670 		struct pci_packet pkt;
671 		uint8_t buffer[sizeof(struct pci_child_message)];
672 	} ctxt;
673 	int ret;
674 
675 	hpdev = malloc(sizeof(*hpdev), M_DEVBUF, M_WAITOK | M_ZERO);
676 	hpdev->hbus = hbus;
677 
678 	TAILQ_INIT(&hpdev->irq_desc_list);
679 
680 	init_completion(&comp_pkt.host_event);
681 	comp_pkt.hpdev = hpdev;
682 
683 	ctxt.pkt.compl_ctxt = &comp_pkt;
684 	ctxt.pkt.completion_func = q_resource_requirements;
685 
686 	res_req = (struct pci_child_message *)&ctxt.pkt.message;
687 	res_req->message_type.type = PCI_QUERY_RESOURCE_REQUIREMENTS;
688 	res_req->wslot.val = desc->wslot.val;
689 
690 	ret = vmbus_chan_send(hbus->sc->chan,
691 	    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
692 	    res_req, sizeof(*res_req), (uint64_t)(uintptr_t)&ctxt.pkt);
693 	if (ret)
694 		goto err;
695 
696 	if (wait_for_response(hbus, &comp_pkt.host_event))
697 		goto err;
698 
699 	free_completion(&comp_pkt.host_event);
700 
701 	hpdev->desc = *desc;
702 
703 	mtx_lock(&hbus->device_list_lock);
704 	if (TAILQ_EMPTY(&hbus->children))
705 		hbus->pci_domain = desc->ser & 0xFFFF;
706 	TAILQ_INSERT_TAIL(&hbus->children, hpdev, link);
707 	mtx_unlock(&hbus->device_list_lock);
708 	return (hpdev);
709 err:
710 	free_completion(&comp_pkt.host_event);
711 	free(hpdev, M_DEVBUF);
712 	return (NULL);
713 }
714 
715 static int
716 pci_rescan(device_t dev)
717 {
718 	return (BUS_RESCAN(dev));
719 }
720 
721 static void
722 pci_devices_present_work(void *arg, int pending __unused)
723 {
724 	struct hv_dr_work *dr_wrk = arg;
725 	struct hv_dr_state *dr = NULL;
726 	struct hv_pcibus *hbus;
727 	uint32_t child_no;
728 	bool found;
729 	struct hv_pcidev_desc *new_desc;
730 	struct hv_pci_dev *hpdev, *tmp_hpdev;
731 	struct completion *query_comp;
732 	bool need_rescan = false;
733 
734 	hbus = dr_wrk->bus;
735 	free(dr_wrk, M_DEVBUF);
736 
737 	/* Pull this off the queue and process it if it was the last one. */
738 	mtx_lock(&hbus->device_list_lock);
739 	while (!TAILQ_EMPTY(&hbus->dr_list)) {
740 		dr = TAILQ_FIRST(&hbus->dr_list);
741 		TAILQ_REMOVE(&hbus->dr_list, dr, link);
742 
743 		/* Throw this away if the list still has stuff in it. */
744 		if (!TAILQ_EMPTY(&hbus->dr_list)) {
745 			free(dr, M_DEVBUF);
746 			continue;
747 		}
748 	}
749 	mtx_unlock(&hbus->device_list_lock);
750 
751 	if (!dr)
752 		return;
753 
754 	/* First, mark all existing children as reported missing. */
755 	mtx_lock(&hbus->device_list_lock);
756 	TAILQ_FOREACH(hpdev, &hbus->children, link)
757 		hpdev->reported_missing = true;
758 	mtx_unlock(&hbus->device_list_lock);
759 
760 	/* Next, add back any reported devices. */
761 	for (child_no = 0; child_no < dr->device_count; child_no++) {
762 		found = false;
763 		new_desc = &dr->func[child_no];
764 
765 		mtx_lock(&hbus->device_list_lock);
766 		TAILQ_FOREACH(hpdev, &hbus->children, link) {
767 			if ((hpdev->desc.wslot.val ==
768 			    new_desc->wslot.val) &&
769 			    (hpdev->desc.v_id == new_desc->v_id) &&
770 			    (hpdev->desc.d_id == new_desc->d_id) &&
771 			    (hpdev->desc.ser == new_desc->ser)) {
772 				hpdev->reported_missing = false;
773 				found = true;
774 				break;
775 			}
776 		}
777 		mtx_unlock(&hbus->device_list_lock);
778 
779 		if (!found) {
780 			if (!need_rescan)
781 				need_rescan = true;
782 
783 			hpdev = new_pcichild_device(hbus, new_desc);
784 			if (!hpdev)
785 				printf("vmbus_pcib: failed to add a child\n");
786 		}
787 	}
788 
789 	/* Remove missing device(s), if any */
790 	TAILQ_FOREACH_SAFE(hpdev, &hbus->children, link, tmp_hpdev) {
791 		if (hpdev->reported_missing)
792 			hv_pci_delete_device(hpdev);
793 	}
794 
795 	/* Rescan the bus to find any new device, if necessary. */
796 	if (hbus->state == hv_pcibus_installed && need_rescan)
797 		pci_rescan(hbus->pci_bus);
798 
799 	/* Wake up hv_pci_query_relations(), if it's waiting. */
800 	query_comp = hbus->query_comp;
801 	if (query_comp) {
802 		hbus->query_comp = NULL;
803 		complete(query_comp);
804 	}
805 
806 	free(dr, M_DEVBUF);
807 }
808 
809 static struct hv_pci_dev *
810 get_pcichild_wslot(struct hv_pcibus *hbus, uint32_t wslot)
811 {
812 	struct hv_pci_dev *hpdev, *ret = NULL;
813 
814 	mtx_lock(&hbus->device_list_lock);
815 	TAILQ_FOREACH(hpdev, &hbus->children, link) {
816 		if (hpdev->desc.wslot.val == wslot) {
817 			ret = hpdev;
818 			break;
819 		}
820 	}
821 	mtx_unlock(&hbus->device_list_lock);
822 
823 	return (ret);
824 }
825 
826 static void
827 hv_pci_devices_present(struct hv_pcibus *hbus,
828     struct pci_bus_relations *relations)
829 {
830 	struct hv_dr_state *dr;
831 	struct hv_dr_work *dr_wrk;
832 	unsigned long dr_size;
833 
834 	if (hbus->detaching && relations->device_count > 0)
835 		return;
836 
837 	dr_size = offsetof(struct hv_dr_state, func) +
838 	    (sizeof(struct pci_func_desc) * relations->device_count);
839 	dr = malloc(dr_size, M_DEVBUF, M_WAITOK | M_ZERO);
840 
841 	dr->device_count = relations->device_count;
842 	if (dr->device_count != 0)
843 		memcpy(dr->func, relations->func,
844 		    sizeof(struct hv_pcidev_desc) * dr->device_count);
845 
846 	mtx_lock(&hbus->device_list_lock);
847 	TAILQ_INSERT_TAIL(&hbus->dr_list, dr, link);
848 	mtx_unlock(&hbus->device_list_lock);
849 
850 	dr_wrk = malloc(sizeof(*dr_wrk), M_DEVBUF, M_WAITOK | M_ZERO);
851 	dr_wrk->bus = hbus;
852 	TASK_INIT(&dr_wrk->task, 0, pci_devices_present_work, dr_wrk);
853 	taskqueue_enqueue(hbus->sc->taskq, &dr_wrk->task);
854 }
855 
856 static void
857 hv_pci_devices_present2(struct hv_pcibus *hbus,
858     struct pci_bus_relations2 *relations)
859 {
860 	struct hv_dr_state *dr;
861 	struct hv_dr_work *dr_wrk;
862 	unsigned long dr_size;
863 
864 	if (hbus->detaching && relations->device_count > 0)
865 		return;
866 
867 	dr_size = offsetof(struct hv_dr_state, func) +
868 	    (sizeof(struct pci_func_desc2) * relations->device_count);
869 	dr = malloc(dr_size, M_DEVBUF, M_WAITOK | M_ZERO);
870 
871 	dr->device_count = relations->device_count;
872 	if (dr->device_count != 0)
873 		memcpy(dr->func, relations->func,
874 		    sizeof(struct pci_func_desc2) * dr->device_count);
875 
876 	mtx_lock(&hbus->device_list_lock);
877 	TAILQ_INSERT_TAIL(&hbus->dr_list, dr, link);
878 	mtx_unlock(&hbus->device_list_lock);
879 
880 	dr_wrk = malloc(sizeof(*dr_wrk), M_DEVBUF, M_WAITOK | M_ZERO);
881 	dr_wrk->bus = hbus;
882 	TASK_INIT(&dr_wrk->task, 0, pci_devices_present_work, dr_wrk);
883 	taskqueue_enqueue(hbus->sc->taskq, &dr_wrk->task);
884 }
885 
886 static void
887 hv_eject_device_work(void *arg, int pending __unused)
888 {
889 	struct hv_pci_dev *hpdev = arg;
890 	union win_slot_encoding wslot = hpdev->desc.wslot;
891 	struct hv_pcibus *hbus = hpdev->hbus;
892 	struct pci_eject_response *eject_pkt;
893 	struct {
894 		struct pci_packet pkt;
895 		uint8_t buffer[sizeof(struct pci_eject_response)];
896 	} ctxt;
897 
898 	hv_pci_delete_device(hpdev);
899 
900 	memset(&ctxt, 0, sizeof(ctxt));
901 	eject_pkt = (struct pci_eject_response *)&ctxt.pkt.message;
902 	eject_pkt->message_type.type = PCI_EJECTION_COMPLETE;
903 	eject_pkt->wslot.val = wslot.val;
904 	vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
905 	    eject_pkt, sizeof(*eject_pkt), 0);
906 }
907 
908 static void
909 hv_pci_eject_device(struct hv_pci_dev *hpdev)
910 {
911 	struct hv_pcibus *hbus = hpdev->hbus;
912 	struct taskqueue *taskq;
913 
914 	if (hbus->detaching)
915 		return;
916 
917 	/*
918 	 * Push this task into the same taskqueue on which
919 	 * vmbus_pcib_attach() runs, so we're sure this task can't run
920 	 * concurrently with vmbus_pcib_attach().
921 	 */
922 	TASK_INIT(&hpdev->eject_task, 0, hv_eject_device_work, hpdev);
923 	taskq = vmbus_chan_mgmt_tq(hbus->sc->chan);
924 	taskqueue_enqueue(taskq, &hpdev->eject_task);
925 }
926 
927 #define PCIB_PACKET_SIZE	0x100
928 
929 static void
930 vmbus_pcib_on_channel_callback(struct vmbus_channel *chan, void *arg)
931 {
932 	struct vmbus_pcib_softc *sc = arg;
933 	struct hv_pcibus *hbus = sc->hbus;
934 
935 	void *buffer;
936 	int bufferlen = PCIB_PACKET_SIZE;
937 
938 	struct pci_packet *comp_packet;
939 	struct pci_response *response;
940 	struct pci_incoming_message *new_msg;
941 	struct pci_bus_relations *bus_rel;
942 	struct pci_bus_relations2 *bus_rel2;
943 	struct pci_dev_incoming *dev_msg;
944 	struct hv_pci_dev *hpdev;
945 
946 	buffer = sc->rx_buf;
947 	do {
948 		struct vmbus_chanpkt_hdr *pkt = buffer;
949 		uint32_t bytes_rxed;
950 		int ret;
951 
952 		bytes_rxed = bufferlen;
953 		ret = vmbus_chan_recv_pkt(chan, pkt, &bytes_rxed);
954 
955 		if (ret == ENOBUFS) {
956 			/* Handle large packet */
957 			if (bufferlen > PCIB_PACKET_SIZE) {
958 				free(buffer, M_DEVBUF);
959 				buffer = NULL;
960 			}
961 
962 			/* alloc new buffer */
963 			buffer =
964 			    malloc(bytes_rxed, M_DEVBUF, M_WAITOK | M_ZERO);
965 			bufferlen = bytes_rxed;
966 
967 			continue;
968 		}
969 
970 		if (ret != 0) {
971 			/* ignore EIO or EAGAIN */
972 			break;
973 		}
974 
975 		if (bytes_rxed <= sizeof(struct pci_response))
976 			continue;
977 
978 		switch (pkt->cph_type) {
979 		case VMBUS_CHANPKT_TYPE_COMP:
980 			comp_packet =
981 			    (struct pci_packet *)(uintptr_t)pkt->cph_xactid;
982 			response = (struct pci_response *)pkt;
983 			comp_packet->completion_func(comp_packet->compl_ctxt,
984 			    response, bytes_rxed);
985 			break;
986 		case VMBUS_CHANPKT_TYPE_INBAND:
987 			new_msg = (struct pci_incoming_message *)buffer;
988 
989 			switch (new_msg->message_type.type) {
990 			case PCI_BUS_RELATIONS:
991 				bus_rel = (struct pci_bus_relations *)buffer;
992 
993 				if (bus_rel->device_count == 0)
994 					break;
995 
996 				if (bytes_rxed <
997 				    offsetof(struct pci_bus_relations, func) +
998 				        (sizeof(struct pci_func_desc) *
999 				            (bus_rel->device_count)))
1000 					break;
1001 
1002 				hv_pci_devices_present(hbus, bus_rel);
1003 				break;
1004 
1005 			case PCI_BUS_RELATIONS2:
1006 				bus_rel2 = (struct pci_bus_relations2 *)buffer;
1007 
1008 				if (bus_rel2->device_count == 0)
1009 					break;
1010 
1011 				if (bytes_rxed <
1012 				    offsetof(struct pci_bus_relations2, func) +
1013 				    (sizeof(struct pci_func_desc2) *
1014 				    (bus_rel2->device_count)))
1015 					break;
1016 
1017 				hv_pci_devices_present2(hbus, bus_rel2);
1018 
1019 			case PCI_EJECT:
1020 				dev_msg = (struct pci_dev_incoming *)buffer;
1021 				hpdev = get_pcichild_wslot(hbus,
1022 				    dev_msg->wslot.val);
1023 
1024 				if (hpdev)
1025 					hv_pci_eject_device(hpdev);
1026 
1027 				break;
1028 			default:
1029 				printf("vmbus_pcib: Unknown msg type 0x%x\n",
1030 				    new_msg->message_type.type);
1031 				break;
1032 			}
1033 			break;
1034 		default:
1035 			printf("vmbus_pcib: Unknown VMBus msg type %hd\n",
1036 			    pkt->cph_type);
1037 			break;
1038 		}
1039 	} while (1);
1040 
1041 	if (bufferlen > PCIB_PACKET_SIZE)
1042 		free(buffer, M_DEVBUF);
1043 }
1044 
1045 static int
1046 hv_pci_protocol_negotiation(struct hv_pcibus *hbus,
1047     enum pci_protocol_version_t version[],
1048     int num_version)
1049 {
1050 	struct pci_version_request *version_req;
1051 	struct hv_pci_compl comp_pkt;
1052 	struct {
1053 		struct pci_packet pkt;
1054 		uint8_t buffer[sizeof(struct pci_version_request)];
1055 	} ctxt;
1056 	int ret;
1057 	int i;
1058 
1059 	init_completion(&comp_pkt.host_event);
1060 
1061 	ctxt.pkt.completion_func = hv_pci_generic_compl;
1062 	ctxt.pkt.compl_ctxt = &comp_pkt;
1063 	version_req = (struct pci_version_request *)&ctxt.pkt.message;
1064 	version_req->message_type.type = PCI_QUERY_PROTOCOL_VERSION;
1065 
1066 	for(i=0; i< num_version; i++) {
1067 		version_req->protocol_version = version[i];
1068 		ret = vmbus_chan_send(hbus->sc->chan,
1069 		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
1070 		    version_req, sizeof(*version_req),
1071 		    (uint64_t)(uintptr_t)&ctxt.pkt);
1072 		if (!ret)
1073 			ret = wait_for_response(hbus, &comp_pkt.host_event);
1074 
1075 		if (ret) {
1076 			device_printf(hbus->pcib,
1077 				"vmbus_pcib failed to request version: %d\n",
1078 				ret);
1079 			goto out;
1080 		}
1081 
1082 		if (comp_pkt.completion_status >= 0) {
1083 			hbus->protocol_version = version[i];
1084 			device_printf(hbus->pcib,
1085 				"PCI VMBus using version 0x%x\n",
1086 				hbus->protocol_version);
1087 			ret = 0;
1088 			goto out;
1089 		}
1090 
1091 		if (comp_pkt.completion_status != STATUS_REVISION_MISMATCH) {
1092 			device_printf(hbus->pcib,
1093 				"vmbus_pcib version negotiation failed: %x\n",
1094 				comp_pkt.completion_status);
1095 			ret = EPROTO;
1096 			goto out;
1097 		}
1098 		reinit_completion(&comp_pkt.host_event);
1099 	}
1100 
1101 	device_printf(hbus->pcib,
1102 		"PCI pass-trhpugh VSP failed to find supported version\n");
1103 out:
1104 	free_completion(&comp_pkt.host_event);
1105 	return (ret);
1106 }
1107 
1108 /* Ask the host to send along the list of child devices */
1109 static int
1110 hv_pci_query_relations(struct hv_pcibus *hbus)
1111 {
1112 	struct pci_message message;
1113 	int ret;
1114 
1115 	message.type = PCI_QUERY_BUS_RELATIONS;
1116 	ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
1117 	    &message, sizeof(message), 0);
1118 	return (ret);
1119 }
1120 
1121 static int
1122 hv_pci_enter_d0(struct hv_pcibus *hbus)
1123 {
1124 	struct pci_bus_d0_entry *d0_entry;
1125 	struct hv_pci_compl comp_pkt;
1126 	struct {
1127 		struct pci_packet pkt;
1128 		uint8_t buffer[sizeof(struct pci_bus_d0_entry)];
1129 	} ctxt;
1130 	int ret;
1131 
1132 	/*
1133 	 * Tell the host that the bus is ready to use, and moved into the
1134 	 * powered-on state.  This includes telling the host which region
1135 	 * of memory-mapped I/O space has been chosen for configuration space
1136 	 * access.
1137 	 */
1138 	init_completion(&comp_pkt.host_event);
1139 
1140 	ctxt.pkt.completion_func = hv_pci_generic_compl;
1141 	ctxt.pkt.compl_ctxt = &comp_pkt;
1142 
1143 	d0_entry = (struct pci_bus_d0_entry *)&ctxt.pkt.message;
1144 	memset(d0_entry, 0, sizeof(*d0_entry));
1145 	d0_entry->message_type.type = PCI_BUS_D0ENTRY;
1146 	d0_entry->mmio_base = rman_get_start(hbus->cfg_res);
1147 
1148 	ret = vmbus_chan_send(hbus->sc->chan, VMBUS_CHANPKT_TYPE_INBAND,
1149 	    VMBUS_CHANPKT_FLAG_RC, d0_entry, sizeof(*d0_entry),
1150 	    (uint64_t)(uintptr_t)&ctxt.pkt);
1151 	if (!ret)
1152 		ret = wait_for_response(hbus, &comp_pkt.host_event);
1153 
1154 	if (ret)
1155 		goto out;
1156 
1157 	if (comp_pkt.completion_status < 0) {
1158 		device_printf(hbus->pcib, "vmbus_pcib failed to enable D0\n");
1159 		ret = EPROTO;
1160 	} else {
1161 		ret = 0;
1162 	}
1163 
1164 out:
1165 	free_completion(&comp_pkt.host_event);
1166 	return (ret);
1167 }
1168 
1169 /*
1170  * It looks this is only needed by Windows VM, but let's send the message too
1171  * just to make the host happy.
1172  */
1173 static int
1174 hv_send_resources_allocated(struct hv_pcibus *hbus)
1175 {
1176 	struct pci_resources_assigned *res_assigned;
1177 	struct pci_resources_assigned2 *res_assigned2;
1178 	struct hv_pci_compl comp_pkt;
1179 	struct hv_pci_dev *hpdev;
1180 	struct pci_packet *pkt;
1181 	uint32_t wslot;
1182 	int ret = 0;
1183 	size_t size_res;
1184 
1185 	size_res = (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_4)
1186 			? sizeof(*res_assigned) : sizeof(*res_assigned2);
1187 	pkt = malloc(sizeof(*pkt) + size_res,
1188 	    M_DEVBUF, M_WAITOK | M_ZERO);
1189 
1190 	for (wslot = 0; wslot < 256; wslot++) {
1191 		hpdev = get_pcichild_wslot(hbus, wslot);
1192 		if (!hpdev)
1193 			continue;
1194 
1195 		init_completion(&comp_pkt.host_event);
1196 
1197 		memset(pkt, 0, sizeof(*pkt) + size_res);
1198 		pkt->completion_func = hv_pci_generic_compl;
1199 		pkt->compl_ctxt = &comp_pkt;
1200 
1201 		if (hbus->protocol_version < PCI_PROTOCOL_VERSION_1_4) {
1202 			res_assigned =
1203 			    (struct pci_resources_assigned *)&pkt->message;
1204 			res_assigned->message_type.type =
1205 			    PCI_RESOURCES_ASSIGNED;
1206 			res_assigned->wslot.val = hpdev->desc.wslot.val;
1207 		} else {
1208 			res_assigned2 =
1209 			    (struct pci_resources_assigned2 *)&pkt->message;
1210 			res_assigned2->message_type.type =
1211 			    PCI_RESOURCES_ASSIGNED2;
1212 			res_assigned2->wslot.val = hpdev->desc.wslot.val;
1213 		}
1214 
1215 		ret = vmbus_chan_send(hbus->sc->chan,
1216 		    VMBUS_CHANPKT_TYPE_INBAND, VMBUS_CHANPKT_FLAG_RC,
1217 		    &pkt->message, size_res,
1218 		    (uint64_t)(uintptr_t)pkt);
1219 		if (!ret)
1220 			ret = wait_for_response(hbus, &comp_pkt.host_event);
1221 
1222 		free_completion(&comp_pkt.host_event);
1223 
1224 		if (ret)
1225 			break;
1226 
1227 		if (comp_pkt.completion_status < 0) {
1228 			ret = EPROTO;
1229 			device_printf(hbus->pcib,
1230 			    "failed to send PCI_RESOURCES_ASSIGNED\n");
1231 			break;
1232 		}
1233 	}
1234 
1235 	free(pkt, M_DEVBUF);
1236 	return (ret);
1237 }
1238 
1239 static int
1240 hv_send_resources_released(struct hv_pcibus *hbus)
1241 {
1242 	struct pci_child_message pkt;
1243 	struct hv_pci_dev *hpdev;
1244 	uint32_t wslot;
1245 	int ret;
1246 
1247 	for (wslot = 0; wslot < 256; wslot++) {
1248 		hpdev = get_pcichild_wslot(hbus, wslot);
1249 		if (!hpdev)
1250 			continue;
1251 
1252 		pkt.message_type.type = PCI_RESOURCES_RELEASED;
1253 		pkt.wslot.val = hpdev->desc.wslot.val;
1254 
1255 		ret = vmbus_chan_send(hbus->sc->chan,
1256 		    VMBUS_CHANPKT_TYPE_INBAND, 0, &pkt, sizeof(pkt), 0);
1257 		if (ret)
1258 			return (ret);
1259 	}
1260 
1261 	return (0);
1262 }
1263 
1264 #define hv_cfg_read(x, s)						\
1265 static inline uint##x##_t hv_cfg_read_##s(struct hv_pcibus *bus,	\
1266     bus_size_t offset)							\
1267 {									\
1268 	return (bus_read_##s(bus->cfg_res, offset));			\
1269 }
1270 
1271 #define hv_cfg_write(x, s)						\
1272 static inline void hv_cfg_write_##s(struct hv_pcibus *bus,		\
1273     bus_size_t offset, uint##x##_t val)					\
1274 {									\
1275 	return (bus_write_##s(bus->cfg_res, offset, val));		\
1276 }
1277 
1278 hv_cfg_read(8, 1)
1279 hv_cfg_read(16, 2)
1280 hv_cfg_read(32, 4)
1281 
1282 hv_cfg_write(8, 1)
1283 hv_cfg_write(16, 2)
1284 hv_cfg_write(32, 4)
1285 
1286 static void
1287 _hv_pcifront_read_config(struct hv_pci_dev *hpdev, int where, int size,
1288     uint32_t *val)
1289 {
1290 	struct hv_pcibus *hbus = hpdev->hbus;
1291 	bus_size_t addr = CFG_PAGE_OFFSET + where;
1292 
1293 	/*
1294 	 * If the attempt is to read the IDs or the ROM BAR, simulate that.
1295 	 */
1296 	if (where + size <= PCIR_COMMAND) {
1297 		memcpy(val, ((uint8_t *)&hpdev->desc.v_id) + where, size);
1298 	} else if (where >= PCIR_REVID && where + size <=
1299 		   PCIR_CACHELNSZ) {
1300 		memcpy(val, ((uint8_t *)&hpdev->desc.rev) + where -
1301 		       PCIR_REVID, size);
1302 	} else if (where >= PCIR_SUBVEND_0 && where + size <=
1303 		   PCIR_BIOS) {
1304 		memcpy(val, (uint8_t *)&hpdev->desc.subsystem_id + where -
1305 		       PCIR_SUBVEND_0, size);
1306 	} else if (where >= PCIR_BIOS && where + size <=
1307 		   PCIR_CAP_PTR) {
1308 		/* ROM BARs are unimplemented */
1309 		*val = 0;
1310 	} else if ((where >= PCIR_INTLINE && where + size <=
1311 		   PCIR_INTPIN) ||(where == PCIR_INTPIN && size == 1)) {
1312 		/*
1313 		 * Interrupt Line and Interrupt PIN are hard-wired to zero
1314 		 * because this front-end only supports message-signaled
1315 		 * interrupts.
1316 		 */
1317 		*val = 0;
1318 	} else if (where + size <= CFG_PAGE_SIZE) {
1319 		mtx_lock(&hbus->config_lock);
1320 
1321 		/* Choose the function to be read. */
1322 		hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val);
1323 
1324 		/* Make sure the function was chosen before we start reading.*/
1325 		mb();
1326 
1327 		/* Read from that function's config space. */
1328 		switch (size) {
1329 		case 1:
1330 			*((uint8_t *)val) = hv_cfg_read_1(hbus, addr);
1331 			break;
1332 		case 2:
1333 			*((uint16_t *)val) = hv_cfg_read_2(hbus, addr);
1334 			break;
1335 		default:
1336 			*((uint32_t *)val) = hv_cfg_read_4(hbus, addr);
1337 			break;
1338 		}
1339 		/*
1340 		 * Make sure the write was done before we release the lock,
1341 		 * allowing consecutive reads/writes.
1342 		 */
1343 		mb();
1344 
1345 		mtx_unlock(&hbus->config_lock);
1346 	} else {
1347 		/* Invalid config read: it's unlikely to reach here. */
1348 		memset(val, 0, size);
1349 	}
1350 }
1351 
1352 static void
1353 _hv_pcifront_write_config(struct hv_pci_dev *hpdev, int where, int size,
1354     uint32_t val)
1355 {
1356 	struct hv_pcibus *hbus = hpdev->hbus;
1357 	bus_size_t addr = CFG_PAGE_OFFSET + where;
1358 
1359 	/* SSIDs and ROM BARs are read-only */
1360 	if (where >= PCIR_SUBVEND_0 && where + size <= PCIR_CAP_PTR)
1361 		return;
1362 
1363 	if (where >= PCIR_COMMAND && where + size <= CFG_PAGE_SIZE) {
1364 		mtx_lock(&hbus->config_lock);
1365 
1366 		/* Choose the function to be written. */
1367 		hv_cfg_write_4(hbus, 0, hpdev->desc.wslot.val);
1368 
1369 		/* Make sure the function was chosen before we start writing.*/
1370 		wmb();
1371 
1372 		/* Write to that function's config space. */
1373 		switch (size) {
1374 		case 1:
1375 			hv_cfg_write_1(hbus, addr, (uint8_t)val);
1376 			break;
1377 		case 2:
1378 			hv_cfg_write_2(hbus, addr, (uint16_t)val);
1379 			break;
1380 		default:
1381 			hv_cfg_write_4(hbus, addr, (uint32_t)val);
1382 			break;
1383 		}
1384 
1385 		/*
1386 		 * Make sure the write was done before we release the lock,
1387 		 * allowing consecutive reads/writes.
1388 		 */
1389 		mb();
1390 
1391 		mtx_unlock(&hbus->config_lock);
1392 	} else {
1393 		/* Invalid config write: it's unlikely to reach here. */
1394 		return;
1395 	}
1396 }
1397 
1398 /*
1399  * The vPCI in some Hyper-V releases do not initialize the last 4
1400  * bit of BAR registers. This could result weird problems causing PCI
1401  * code fail to configure BAR correctly.
1402  *
1403  * Just write all 1's to those BARs whose probed values are not zero.
1404  * This seems to make the Hyper-V vPCI and pci_write_bar() to cooperate
1405  * correctly.
1406  */
1407 
1408 static void
1409 vmbus_pcib_prepopulate_bars(struct hv_pcibus *hbus)
1410 {
1411 	struct hv_pci_dev *hpdev;
1412 	int i;
1413 
1414 	mtx_lock(&hbus->device_list_lock);
1415 	TAILQ_FOREACH(hpdev, &hbus->children, link) {
1416 		for (i = 0; i < 6; i++) {
1417 			/* Ignore empty bar */
1418 			if (hpdev->probed_bar[i] == 0)
1419 				continue;
1420 
1421 			uint32_t bar_val = 0;
1422 
1423 			_hv_pcifront_read_config(hpdev, PCIR_BAR(i),
1424 			    4, &bar_val);
1425 
1426 			if (hpdev->probed_bar[i] != bar_val) {
1427 				if (bootverbose)
1428 					printf("vmbus_pcib: initialize bar %d "
1429 					    "by writing all 1s\n", i);
1430 
1431 				_hv_pcifront_write_config(hpdev, PCIR_BAR(i),
1432 				    4, 0xffffffff);
1433 
1434 				/* Now write the original value back */
1435 				_hv_pcifront_write_config(hpdev, PCIR_BAR(i),
1436 				    4, bar_val);
1437 			}
1438 		}
1439 	}
1440 	mtx_unlock(&hbus->device_list_lock);
1441 }
1442 
1443 static void
1444 vmbus_pcib_set_detaching(void *arg, int pending __unused)
1445 {
1446 	struct hv_pcibus *hbus = arg;
1447 
1448 	atomic_set_int(&hbus->detaching, 1);
1449 }
1450 
1451 static void
1452 vmbus_pcib_pre_detach(struct hv_pcibus *hbus)
1453 {
1454 	struct task task;
1455 
1456 	TASK_INIT(&task, 0, vmbus_pcib_set_detaching, hbus);
1457 
1458 	/*
1459 	 * Make sure the channel callback won't push any possible new
1460 	 * PCI_BUS_RELATIONS and PCI_EJECT tasks to sc->taskq.
1461 	 */
1462 	vmbus_chan_run_task(hbus->sc->chan, &task);
1463 
1464 	taskqueue_drain_all(hbus->sc->taskq);
1465 }
1466 
1467 
1468 /*
1469  * Standard probe entry point.
1470  *
1471  */
1472 static int
1473 vmbus_pcib_probe(device_t dev)
1474 {
1475 	if (VMBUS_PROBE_GUID(device_get_parent(dev), dev,
1476 	    &g_pass_through_dev_type) == 0) {
1477 		device_set_desc(dev, "Hyper-V PCI Express Pass Through");
1478 		return (BUS_PROBE_DEFAULT);
1479 	}
1480 	return (ENXIO);
1481 }
1482 
1483 /*
1484  * Standard attach entry point.
1485  *
1486  */
1487 static int
1488 vmbus_pcib_attach(device_t dev)
1489 {
1490 	const int pci_ring_size = (4 * PAGE_SIZE);
1491 	const struct hyperv_guid *inst_guid;
1492 	struct vmbus_channel *channel;
1493 	struct vmbus_pcib_softc *sc;
1494 	struct hv_pcibus *hbus;
1495 	int rid = 0;
1496 	int ret;
1497 
1498 	hbus = malloc(sizeof(*hbus), M_DEVBUF, M_WAITOK | M_ZERO);
1499 	hbus->pcib = dev;
1500 
1501 	channel = vmbus_get_channel(dev);
1502 	inst_guid = vmbus_chan_guid_inst(channel);
1503 	hbus->pci_domain = inst_guid->hv_guid[9] |
1504 			  (inst_guid->hv_guid[8] << 8);
1505 
1506 	mtx_init(&hbus->config_lock, "hbcfg", NULL, MTX_DEF);
1507 	mtx_init(&hbus->device_list_lock, "hbdl", NULL, MTX_DEF);
1508 	TAILQ_INIT(&hbus->children);
1509 	TAILQ_INIT(&hbus->dr_list);
1510 
1511 	hbus->cfg_res = bus_alloc_resource(dev, SYS_RES_MEMORY, &rid,
1512 	    0, RM_MAX_END, PCI_CONFIG_MMIO_LENGTH,
1513 	    RF_ACTIVE | rman_make_alignment_flags(PAGE_SIZE));
1514 
1515 	if (!hbus->cfg_res) {
1516 		device_printf(dev, "failed to get resource for cfg window\n");
1517 		ret = ENXIO;
1518 		goto free_bus;
1519 	}
1520 
1521 	sc = device_get_softc(dev);
1522 	sc->chan = channel;
1523 	sc->rx_buf = malloc(PCIB_PACKET_SIZE, M_DEVBUF, M_WAITOK | M_ZERO);
1524 	sc->hbus = hbus;
1525 
1526 	/*
1527 	 * The taskq is used to handle PCI_BUS_RELATIONS and PCI_EJECT
1528 	 * messages. NB: we can't handle the messages in the channel callback
1529 	 * directly, because the message handlers need to send new messages
1530 	 * to the host and waits for the host's completion messages, which
1531 	 * must also be handled by the channel callback.
1532 	 */
1533 	sc->taskq = taskqueue_create("vmbus_pcib_tq", M_WAITOK,
1534 	    taskqueue_thread_enqueue, &sc->taskq);
1535 	taskqueue_start_threads(&sc->taskq, 1, PI_NET, "vmbus_pcib_tq");
1536 
1537 	hbus->sc = sc;
1538 
1539 	init_completion(&hbus->query_completion);
1540 	hbus->query_comp = &hbus->query_completion;
1541 
1542 	ret = vmbus_chan_open(sc->chan, pci_ring_size, pci_ring_size,
1543 		NULL, 0, vmbus_pcib_on_channel_callback, sc);
1544 	if (ret)
1545 		goto free_res;
1546 
1547 	ret = hv_pci_protocol_negotiation(hbus, pci_protocol_versions,
1548 	    ARRAY_SIZE(pci_protocol_versions));
1549 	if (ret)
1550 		goto vmbus_close;
1551 
1552 	ret = hv_pci_query_relations(hbus);
1553 	if (!ret)
1554 		ret = wait_for_response(hbus, hbus->query_comp);
1555 
1556 	if (ret)
1557 		goto vmbus_close;
1558 
1559 	ret = hv_pci_enter_d0(hbus);
1560 	if (ret)
1561 		goto vmbus_close;
1562 
1563 	ret = hv_send_resources_allocated(hbus);
1564 	if (ret)
1565 		goto vmbus_close;
1566 
1567 	vmbus_pcib_prepopulate_bars(hbus);
1568 
1569 	hbus->pci_bus = device_add_child(dev, "pci", -1);
1570 	if (!hbus->pci_bus) {
1571 		device_printf(dev, "failed to create pci bus\n");
1572 		ret = ENXIO;
1573 		goto vmbus_close;
1574 	}
1575 
1576 	bus_generic_attach(dev);
1577 
1578 	hbus->state = hv_pcibus_installed;
1579 
1580 	return (0);
1581 
1582 vmbus_close:
1583 	vmbus_pcib_pre_detach(hbus);
1584 	vmbus_chan_close(sc->chan);
1585 free_res:
1586 	taskqueue_free(sc->taskq);
1587 	free_completion(&hbus->query_completion);
1588 	free(sc->rx_buf, M_DEVBUF);
1589 	bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res);
1590 free_bus:
1591 	mtx_destroy(&hbus->device_list_lock);
1592 	mtx_destroy(&hbus->config_lock);
1593 	free(hbus, M_DEVBUF);
1594 	return (ret);
1595 }
1596 
1597 /*
1598  * Standard detach entry point
1599  */
1600 static int
1601 vmbus_pcib_detach(device_t dev)
1602 {
1603 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1604 	struct hv_pcibus *hbus = sc->hbus;
1605 	struct pci_message teardown_packet;
1606 	struct pci_bus_relations relations;
1607 	int ret;
1608 
1609 	vmbus_pcib_pre_detach(hbus);
1610 
1611 	if (hbus->state == hv_pcibus_installed)
1612 		bus_generic_detach(dev);
1613 
1614 	/* Delete any children which might still exist. */
1615 	memset(&relations, 0, sizeof(relations));
1616 	hv_pci_devices_present(hbus, &relations);
1617 
1618 	ret = hv_send_resources_released(hbus);
1619 	if (ret)
1620 		device_printf(dev, "failed to send PCI_RESOURCES_RELEASED\n");
1621 
1622 	teardown_packet.type = PCI_BUS_D0EXIT;
1623 	ret = vmbus_chan_send(sc->chan, VMBUS_CHANPKT_TYPE_INBAND, 0,
1624 	    &teardown_packet, sizeof(struct pci_message), 0);
1625 	if (ret)
1626 		device_printf(dev, "failed to send PCI_BUS_D0EXIT\n");
1627 
1628 	taskqueue_drain_all(hbus->sc->taskq);
1629 	vmbus_chan_close(sc->chan);
1630 	taskqueue_free(sc->taskq);
1631 
1632 	free_completion(&hbus->query_completion);
1633 	free(sc->rx_buf, M_DEVBUF);
1634 	bus_release_resource(dev, SYS_RES_MEMORY, 0, hbus->cfg_res);
1635 
1636 	mtx_destroy(&hbus->device_list_lock);
1637 	mtx_destroy(&hbus->config_lock);
1638 	free(hbus, M_DEVBUF);
1639 
1640 	return (0);
1641 }
1642 
1643 static int
1644 vmbus_pcib_read_ivar(device_t dev, device_t child, int which, uintptr_t *val)
1645 {
1646 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1647 
1648 	switch (which) {
1649 	case PCIB_IVAR_DOMAIN:
1650 		*val = sc->hbus->pci_domain;
1651 		return (0);
1652 
1653 	case PCIB_IVAR_BUS:
1654 		/* There is only bus 0. */
1655 		*val = 0;
1656 		return (0);
1657 	}
1658 	return (ENOENT);
1659 }
1660 
1661 static int
1662 vmbus_pcib_write_ivar(device_t dev, device_t child, int which, uintptr_t val)
1663 {
1664 	return (ENOENT);
1665 }
1666 
1667 static struct resource *
1668 vmbus_pcib_alloc_resource(device_t dev, device_t child, int type, int *rid,
1669 	rman_res_t start, rman_res_t end, rman_res_t count, u_int flags)
1670 {
1671 	unsigned int bar_no;
1672 	struct hv_pci_dev *hpdev;
1673 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1674 	struct resource *res;
1675 	unsigned int devfn;
1676 
1677 	if (type == PCI_RES_BUS)
1678 		return (pci_domain_alloc_bus(sc->hbus->pci_domain, child, rid,
1679 		    start, end, count, flags));
1680 
1681 	/* Devices with port I/O BAR are not supported. */
1682 	if (type == SYS_RES_IOPORT)
1683 		return (NULL);
1684 
1685 	if (type == SYS_RES_MEMORY) {
1686 		devfn = PCI_DEVFN(pci_get_slot(child),
1687 		    pci_get_function(child));
1688 		hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1689 		if (!hpdev)
1690 			return (NULL);
1691 
1692 		bar_no = PCI_RID2BAR(*rid);
1693 		if (bar_no >= MAX_NUM_BARS)
1694 			return (NULL);
1695 
1696 		/* Make sure a 32-bit BAR gets a 32-bit address */
1697 		if (!(hpdev->probed_bar[bar_no] & PCIM_BAR_MEM_64))
1698 			end = ulmin(end, 0xFFFFFFFF);
1699 	}
1700 
1701 	res = bus_generic_alloc_resource(dev, child, type, rid,
1702 		start, end, count, flags);
1703 	/*
1704 	 * If this is a request for a specific range, assume it is
1705 	 * correct and pass it up to the parent.
1706 	 */
1707 	if (res == NULL && start + count - 1 == end)
1708 		res = bus_generic_alloc_resource(dev, child, type, rid,
1709 		    start, end, count, flags);
1710 	if (res) {
1711 		device_printf(dev,"vmbus_pcib_alloc_resource is successful\n");
1712 	}
1713 	return (res);
1714 }
1715 
1716 static int
1717 vmbus_pcib_release_resource(device_t dev, device_t child, int type, int rid,
1718     struct resource *r)
1719 {
1720 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1721 
1722 	if (type == PCI_RES_BUS)
1723 		return (pci_domain_release_bus(sc->hbus->pci_domain, child,
1724 		    rid, r));
1725 
1726 	if (type == SYS_RES_IOPORT)
1727 		return (EINVAL);
1728 
1729 	return (bus_generic_release_resource(dev, child, type, rid, r));
1730 }
1731 
1732 static int
1733 vmbus_pcib_get_cpus(device_t pcib, device_t dev, enum cpu_sets op,
1734     size_t setsize, cpuset_t *cpuset)
1735 {
1736 	return (bus_get_cpus(pcib, op, setsize, cpuset));
1737 }
1738 
1739 static uint32_t
1740 vmbus_pcib_read_config(device_t dev, u_int bus, u_int slot, u_int func,
1741     u_int reg, int bytes)
1742 {
1743 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1744 	struct hv_pci_dev *hpdev;
1745 	unsigned int devfn = PCI_DEVFN(slot, func);
1746 	uint32_t data = 0;
1747 
1748 	KASSERT(bus == 0, ("bus should be 0, but is %u", bus));
1749 
1750 	hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1751 	if (!hpdev)
1752 		return (~0);
1753 
1754 	_hv_pcifront_read_config(hpdev, reg, bytes, &data);
1755 
1756 	return (data);
1757 }
1758 
1759 static void
1760 vmbus_pcib_write_config(device_t dev, u_int bus, u_int slot, u_int func,
1761     u_int reg, uint32_t data, int bytes)
1762 {
1763 	struct vmbus_pcib_softc *sc = device_get_softc(dev);
1764 	struct hv_pci_dev *hpdev;
1765 	unsigned int devfn = PCI_DEVFN(slot, func);
1766 
1767 	KASSERT(bus == 0, ("bus should be 0, but is %u", bus));
1768 
1769 	hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1770 	if (!hpdev)
1771 		return;
1772 
1773 	_hv_pcifront_write_config(hpdev, reg, bytes, data);
1774 }
1775 
1776 static int
1777 vmbus_pcib_route_intr(device_t pcib, device_t dev, int pin)
1778 {
1779 	/* We only support MSI/MSI-X and don't support INTx interrupt. */
1780 	return (PCI_INVALID_IRQ);
1781 }
1782 
1783 static int
1784 vmbus_pcib_alloc_msi(device_t pcib, device_t dev, int count,
1785     int maxcount, int *irqs)
1786 {
1787 #if defined(__amd64__) || defined(__i386__)
1788 	return (PCIB_ALLOC_MSI(device_get_parent(pcib), dev, count, maxcount,
1789 	    irqs));
1790 #endif
1791 #if defined(__aarch64__)
1792 	return (intr_alloc_msi(pcib, dev, ACPI_MSI_XREF, count, maxcount,
1793 	    irqs));
1794 #endif
1795 }
1796 
1797 static int
1798 vmbus_pcib_release_msi(device_t pcib, device_t dev, int count, int *irqs)
1799 {
1800 #if defined(__amd64__) || defined(__i386__)
1801 	return (PCIB_RELEASE_MSI(device_get_parent(pcib), dev, count, irqs));
1802 #endif
1803 #if defined(__aarch64__)
1804 	return(intr_release_msi(pcib, dev, ACPI_MSI_XREF, count, irqs));
1805 #endif
1806 }
1807 
1808 static int
1809 vmbus_pcib_alloc_msix(device_t pcib, device_t dev, int *irq)
1810 {
1811 #if defined(__aarch64__)
1812 	int ret;
1813 #if defined(INTRNG)
1814 	ret = intr_alloc_msix(pcib, dev, ACPI_MSI_XREF, irq);
1815 	return ret;
1816 #else
1817     return (ENXIO);
1818 #endif
1819 #else
1820 	return (PCIB_ALLOC_MSIX(device_get_parent(pcib), dev, irq));
1821 #endif /* __aarch64__ */
1822 }
1823 
1824 static int
1825 vmbus_pcib_release_msix(device_t pcib, device_t dev, int irq)
1826 {
1827 #if defined(__aarch64__)
1828 	return (intr_release_msix(pcib, dev, ACPI_MSI_XREF, irq));
1829 #else
1830 	return (PCIB_RELEASE_MSIX(device_get_parent(pcib), dev, irq));
1831 #endif /* __aarch64__ */
1832 }
1833 
1834 #if defined(__aarch64__)
1835 #define	MSI_INTEL_ADDR_DEST	0x00000000
1836 #define	MSI_INTEL_DATA_DELFIXED 0x0
1837 #endif
1838 #if defined(__amd64__) || defined(__i386__)
1839 #define MSI_INTEL_ADDR_DEST 0x000ff000
1840 #define MSI_INTEL_DATA_INTVEC   IOART_INTVEC    /* Interrupt vector. */
1841 #define MSI_INTEL_DATA_DELFIXED IOART_DELFIXED
1842 #endif
1843 
1844 static int
1845 vmbus_pcib_map_msi(device_t pcib, device_t child, int irq,
1846     uint64_t *addr, uint32_t *data)
1847 {
1848 	unsigned int devfn;
1849 	struct hv_pci_dev *hpdev;
1850 
1851 	uint64_t v_addr;
1852 	uint32_t v_data;
1853 	struct hv_irq_desc *hid, *tmp_hid;
1854 	unsigned int cpu, vcpu_id;
1855 	unsigned int vector;
1856 
1857 	struct vmbus_pcib_softc *sc = device_get_softc(pcib);
1858 	struct compose_comp_ctxt comp;
1859 	struct {
1860 		struct pci_packet pkt;
1861 		union {
1862 			struct pci_create_interrupt v1;
1863 			struct pci_create_interrupt3 v3;
1864 		}int_pkts;
1865 	} ctxt;
1866 	int ret;
1867 	uint32_t size;
1868 
1869 	devfn = PCI_DEVFN(pci_get_slot(child), pci_get_function(child));
1870 	hpdev = get_pcichild_wslot(sc->hbus, devfn_to_wslot(devfn));
1871 	if (!hpdev)
1872 		return (ENOENT);
1873 #if defined(__aarch64__)
1874 	ret = intr_map_msi(pcib, child, ACPI_MSI_XREF, irq,
1875 	    &v_addr, &v_data);
1876 #else
1877 	ret = PCIB_MAP_MSI(device_get_parent(pcib), child, irq,
1878             &v_addr, &v_data);
1879 #endif
1880 	if (ret)
1881 		return (ret);
1882 
1883 	TAILQ_FOREACH_SAFE(hid, &hpdev->irq_desc_list, link, tmp_hid) {
1884 		if (hid->irq == irq) {
1885 			TAILQ_REMOVE(&hpdev->irq_desc_list, hid, link);
1886 			hv_int_desc_free(hpdev, hid);
1887 			break;
1888 		}
1889 	}
1890 
1891 #if defined(__aarch64__)
1892 	cpu = 0;
1893 	vcpu_id = VMBUS_GET_VCPU_ID(device_get_parent(pcib), pcib, cpu);
1894 	vector = v_data;
1895 #else
1896 	cpu = (v_addr & MSI_INTEL_ADDR_DEST) >> 12;
1897 	vcpu_id = VMBUS_GET_VCPU_ID(device_get_parent(pcib), pcib, cpu);
1898 	vector = v_data & MSI_INTEL_DATA_INTVEC;
1899 #endif
1900 
1901 	init_completion(&comp.comp_pkt.host_event);
1902 
1903 	memset(&ctxt, 0, sizeof(ctxt));
1904 	ctxt.pkt.completion_func = hv_pci_compose_compl;
1905 	ctxt.pkt.compl_ctxt = &comp;
1906 	switch (hpdev->hbus->protocol_version) {
1907 	case PCI_PROTOCOL_VERSION_1_1:
1908 		ctxt.int_pkts.v1.message_type.type =
1909 		    PCI_CREATE_INTERRUPT_MESSAGE;
1910 		ctxt.int_pkts.v1.wslot.val = hpdev->desc.wslot.val;
1911 		ctxt.int_pkts.v1.int_desc.vector = vector;
1912 		ctxt.int_pkts.v1.int_desc.vector_count = 1;
1913 		ctxt.int_pkts.v1.int_desc.delivery_mode =
1914 		    MSI_INTEL_DATA_DELFIXED;
1915 		ctxt.int_pkts.v1.int_desc.cpu_mask = 1ULL << vcpu_id;
1916 		size = sizeof(ctxt.int_pkts.v1);
1917 		break;
1918 
1919 	case PCI_PROTOCOL_VERSION_1_4:
1920 		ctxt.int_pkts.v3.message_type.type =
1921 		    PCI_CREATE_INTERRUPT_MESSAGE3;
1922 		ctxt.int_pkts.v3.wslot.val = hpdev->desc.wslot.val;
1923 		ctxt.int_pkts.v3.int_desc.vector = vector;
1924 		ctxt.int_pkts.v3.int_desc.vector_count = 1;
1925 		ctxt.int_pkts.v3.int_desc.reserved = 0;
1926 		ctxt.int_pkts.v3.int_desc.delivery_mode =
1927 		    MSI_INTEL_DATA_DELFIXED;
1928 		ctxt.int_pkts.v3.int_desc.processor_count = 1;
1929 		ctxt.int_pkts.v3.int_desc.processor_array[0] = vcpu_id;
1930 		size = sizeof(ctxt.int_pkts.v3);
1931 		break;
1932 	}
1933 	ret = vmbus_chan_send(sc->chan,	VMBUS_CHANPKT_TYPE_INBAND,
1934 	    VMBUS_CHANPKT_FLAG_RC, &ctxt.int_pkts, size,
1935 	    (uint64_t)(uintptr_t)&ctxt.pkt);
1936 	if (ret) {
1937 		free_completion(&comp.comp_pkt.host_event);
1938 		return (ret);
1939 	}
1940 
1941 	wait_for_completion(&comp.comp_pkt.host_event);
1942 	free_completion(&comp.comp_pkt.host_event);
1943 
1944 	if (comp.comp_pkt.completion_status < 0) {
1945 		device_printf(pcib,
1946 		    "vmbus_pcib_map_msi completion_status %d\n",
1947 		    comp.comp_pkt.completion_status);
1948 		return (EPROTO);
1949 	}
1950 
1951 	*addr = comp.int_desc.address;
1952 	*data = comp.int_desc.data;
1953 
1954 	hid = malloc(sizeof(struct hv_irq_desc), M_DEVBUF, M_WAITOK | M_ZERO);
1955 	hid->irq = irq;
1956 	hid->desc = comp.int_desc;
1957 	TAILQ_INSERT_TAIL(&hpdev->irq_desc_list, hid, link);
1958 
1959 	return (0);
1960 }
1961 
1962 static device_method_t vmbus_pcib_methods[] = {
1963 	/* Device interface */
1964 	DEVMETHOD(device_probe,         vmbus_pcib_probe),
1965 	DEVMETHOD(device_attach,        vmbus_pcib_attach),
1966 	DEVMETHOD(device_detach,        vmbus_pcib_detach),
1967 	DEVMETHOD(device_shutdown,	bus_generic_shutdown),
1968 	DEVMETHOD(device_suspend,	bus_generic_suspend),
1969 	DEVMETHOD(device_resume,	bus_generic_resume),
1970 
1971 	/* Bus interface */
1972 	DEVMETHOD(bus_read_ivar,		vmbus_pcib_read_ivar),
1973 	DEVMETHOD(bus_write_ivar,		vmbus_pcib_write_ivar),
1974 	DEVMETHOD(bus_alloc_resource,		vmbus_pcib_alloc_resource),
1975 	DEVMETHOD(bus_release_resource,		vmbus_pcib_release_resource),
1976 	DEVMETHOD(bus_activate_resource,   bus_generic_activate_resource),
1977 	DEVMETHOD(bus_deactivate_resource, bus_generic_deactivate_resource),
1978 	DEVMETHOD(bus_setup_intr,	   bus_generic_setup_intr),
1979 	DEVMETHOD(bus_teardown_intr,	   bus_generic_teardown_intr),
1980 	DEVMETHOD(bus_get_cpus,			vmbus_pcib_get_cpus),
1981 
1982 	/* pcib interface */
1983 	DEVMETHOD(pcib_maxslots,		pcib_maxslots),
1984 	DEVMETHOD(pcib_read_config,		vmbus_pcib_read_config),
1985 	DEVMETHOD(pcib_write_config,		vmbus_pcib_write_config),
1986 	DEVMETHOD(pcib_route_interrupt,		vmbus_pcib_route_intr),
1987 	DEVMETHOD(pcib_alloc_msi,		vmbus_pcib_alloc_msi),
1988 	DEVMETHOD(pcib_release_msi,		vmbus_pcib_release_msi),
1989 	DEVMETHOD(pcib_alloc_msix,		vmbus_pcib_alloc_msix),
1990 	DEVMETHOD(pcib_release_msix,		vmbus_pcib_release_msix),
1991 	DEVMETHOD(pcib_map_msi,			vmbus_pcib_map_msi),
1992 	DEVMETHOD(pcib_request_feature,		pcib_request_feature_allow),
1993 
1994 	DEVMETHOD_END
1995 };
1996 
1997 DEFINE_CLASS_0(pcib, vmbus_pcib_driver, vmbus_pcib_methods,
1998 		sizeof(struct vmbus_pcib_softc));
1999 DRIVER_MODULE(vmbus_pcib, vmbus, vmbus_pcib_driver, 0, 0);
2000 MODULE_DEPEND(vmbus_pcib, vmbus, 1, 1, 1);
2001 MODULE_DEPEND(vmbus_pcib, pci, 1, 1, 1);
2002 
2003 #endif /* NEW_PCIB */
2004