1 /*
2  * Copyright (C) 2014 Michael Brown <mbrown@fensystems.co.uk>.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public License as
6  * published by the Free Software Foundation; either version 2 of the
7  * License, or (at your option) any later version.
8  *
9  * This program is distributed in the hope that it will be useful, but
10  * WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * General Public License for more details.
13  *
14  * You should have received a copy of the GNU General Public License
15  * along with this program; if not, write to the Free Software
16  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
17  * 02110-1301, USA.
18  *
19  * You can also choose to distribute this program under the terms of
20  * the Unmodified Binary Distribution Licence (as given in the file
21  * COPYING.UBDL), provided that you have satisfied its requirements.
22  */
23 
24 FILE_LICENCE ( GPL2_OR_LATER_OR_UBDL );
25 
26 /** @file
27  *
28  * Hyper-V driver
29  *
30  */
31 
32 #include <stdlib.h>
33 #include <stdarg.h>
34 #include <string.h>
35 #include <unistd.h>
36 #include <assert.h>
37 #include <errno.h>
38 #include <byteswap.h>
39 #include <pic8259.h>
40 #include <ipxe/malloc.h>
41 #include <ipxe/device.h>
42 #include <ipxe/timer.h>
43 #include <ipxe/quiesce.h>
44 #include <ipxe/cpuid.h>
45 #include <ipxe/msr.h>
46 #include <ipxe/hyperv.h>
47 #include <ipxe/vmbus.h>
48 #include "hyperv.h"
49 
50 /** Maximum time to wait for a message response
51  *
52  * This is a policy decision.
53  */
54 #define HV_MESSAGE_MAX_WAIT_MS 1000
55 
56 /** Hyper-V timer frequency (fixed 10Mhz) */
57 #define HV_TIMER_HZ 10000000
58 
59 /** Hyper-V timer scale factor (used to avoid 64-bit division) */
60 #define HV_TIMER_SHIFT 18
61 
62 /**
63  * Convert a Hyper-V status code to an iPXE status code
64  *
65  * @v status		Hyper-V status code
66  * @ret rc		iPXE status code (before negation)
67  */
68 #define EHV( status ) EPLATFORM ( EINFO_EPLATFORM, (status) )
69 
70 /**
71  * Allocate zeroed pages
72  *
73  * @v hv		Hyper-V hypervisor
74  * @v ...		Page addresses to fill in, terminated by NULL
75  * @ret rc		Return status code
76  */
77 __attribute__ (( sentinel )) int
hv_alloc_pages(struct hv_hypervisor * hv,...)78 hv_alloc_pages ( struct hv_hypervisor *hv, ... ) {
79 	va_list args;
80 	void **page;
81 	int i;
82 
83 	/* Allocate and zero pages */
84 	va_start ( args, hv );
85 	for ( i = 0 ; ( ( page = va_arg ( args, void ** ) ) != NULL ); i++ ) {
86 		*page = malloc_dma ( PAGE_SIZE, PAGE_SIZE );
87 		if ( ! *page )
88 			goto err_alloc;
89 		memset ( *page, 0, PAGE_SIZE );
90 	}
91 	va_end ( args );
92 
93 	return 0;
94 
95  err_alloc:
96 	va_end ( args );
97 	va_start ( args, hv );
98 	for ( ; i >= 0 ; i-- ) {
99 		page = va_arg ( args, void ** );
100 		free_dma ( *page, PAGE_SIZE );
101 	}
102 	va_end ( args );
103 	return -ENOMEM;
104 }
105 
106 /**
107  * Free pages
108  *
109  * @v hv		Hyper-V hypervisor
110  * @v ...		Page addresses, terminated by NULL
111  */
112 __attribute__ (( sentinel )) void
hv_free_pages(struct hv_hypervisor * hv,...)113 hv_free_pages ( struct hv_hypervisor *hv, ... ) {
114 	va_list args;
115 	void *page;
116 
117 	va_start ( args, hv );
118 	while ( ( page = va_arg ( args, void * ) ) != NULL )
119 		free_dma ( page, PAGE_SIZE );
120 	va_end ( args );
121 }
122 
123 /**
124  * Allocate message buffer
125  *
126  * @v hv		Hyper-V hypervisor
127  * @ret rc		Return status code
128  */
hv_alloc_message(struct hv_hypervisor * hv)129 static int hv_alloc_message ( struct hv_hypervisor *hv ) {
130 
131 	/* Allocate buffer.  Must be aligned to at least 8 bytes and
132 	 * must not cross a page boundary, so align on its own size.
133 	 */
134 	hv->message = malloc_dma ( sizeof ( *hv->message ),
135 				   sizeof ( *hv->message ) );
136 	if ( ! hv->message )
137 		return -ENOMEM;
138 
139 	return 0;
140 }
141 
142 /**
143  * Free message buffer
144  *
145  * @v hv		Hyper-V hypervisor
146  */
hv_free_message(struct hv_hypervisor * hv)147 static void hv_free_message ( struct hv_hypervisor *hv ) {
148 
149 	/* Free buffer */
150 	free_dma ( hv->message, sizeof ( *hv->message ) );
151 }
152 
153 /**
154  * Check whether or not we are running in Hyper-V
155  *
156  * @ret rc		Return status code
157  */
hv_check_hv(void)158 static int hv_check_hv ( void ) {
159 	struct x86_features features;
160 	uint32_t interface_id;
161 	uint32_t discard_ebx;
162 	uint32_t discard_ecx;
163 	uint32_t discard_edx;
164 
165 	/* Check for presence of a hypervisor (not necessarily Hyper-V) */
166 	x86_features ( &features );
167 	if ( ! ( features.intel.ecx & CPUID_FEATURES_INTEL_ECX_HYPERVISOR ) ) {
168 		DBGC ( HV_INTERFACE_ID, "HV not running in a hypervisor\n" );
169 		return -ENODEV;
170 	}
171 
172 	/* Check that hypervisor is Hyper-V */
173 	cpuid ( HV_CPUID_INTERFACE_ID, 0, &interface_id, &discard_ebx,
174 		&discard_ecx, &discard_edx );
175 	if ( interface_id != HV_INTERFACE_ID ) {
176 		DBGC ( HV_INTERFACE_ID, "HV not running in Hyper-V (interface "
177 		       "ID %#08x)\n", interface_id );
178 		return -ENODEV;
179 	}
180 
181 	return 0;
182 }
183 
184 /**
185  * Check required features
186  *
187  * @v hv		Hyper-V hypervisor
188  * @ret rc		Return status code
189  */
hv_check_features(struct hv_hypervisor * hv)190 static int hv_check_features ( struct hv_hypervisor *hv ) {
191 	uint32_t available;
192 	uint32_t permissions;
193 	uint32_t discard_ecx;
194 	uint32_t discard_edx;
195 
196 	/* Check that required features and privileges are available */
197 	cpuid ( HV_CPUID_FEATURES, 0, &available, &permissions, &discard_ecx,
198 		&discard_edx );
199 	if ( ! ( available & HV_FEATURES_AVAIL_HYPERCALL_MSR ) ) {
200 		DBGC ( hv, "HV %p has no hypercall MSRs (features %08x:%08x)\n",
201 		       hv, available, permissions );
202 		return -ENODEV;
203 	}
204 	if ( ! ( available & HV_FEATURES_AVAIL_SYNIC_MSR ) ) {
205 		DBGC ( hv, "HV %p has no SynIC MSRs (features %08x:%08x)\n",
206 		       hv, available, permissions );
207 		return -ENODEV;
208 	}
209 	if ( ! ( permissions & HV_FEATURES_PERM_POST_MESSAGES ) ) {
210 		DBGC ( hv, "HV %p cannot post messages (features %08x:%08x)\n",
211 		       hv, available, permissions );
212 		return -EACCES;
213 	}
214 	if ( ! ( permissions & HV_FEATURES_PERM_SIGNAL_EVENTS ) ) {
215 		DBGC ( hv, "HV %p cannot signal events (features %08x:%08x)",
216 		       hv, available, permissions );
217 		return -EACCES;
218 	}
219 
220 	return 0;
221 }
222 
223 /**
224  * Check that Gen 2 UEFI firmware is not running
225  *
226  * @v hv		Hyper-V hypervisor
227  * @ret rc		Return status code
228  *
229  * We must not steal ownership from the Gen 2 UEFI firmware, since
230  * doing so will cause an immediate crash.  Avoid this by checking for
231  * the guest OS identity known to be used by the Gen 2 UEFI firmware.
232  */
hv_check_uefi(struct hv_hypervisor * hv)233 static int hv_check_uefi ( struct hv_hypervisor *hv ) {
234 	uint64_t guest_os_id;
235 
236 	/* Check for UEFI firmware's guest OS identity */
237 	guest_os_id = rdmsr ( HV_X64_MSR_GUEST_OS_ID );
238 	if ( guest_os_id == HV_GUEST_OS_ID_UEFI ) {
239 		DBGC ( hv, "HV %p is owned by UEFI firmware\n", hv );
240 		return -ENOTSUP;
241 	}
242 
243 	return 0;
244 }
245 
246 /**
247  * Map hypercall page
248  *
249  * @v hv		Hyper-V hypervisor
250  */
hv_map_hypercall(struct hv_hypervisor * hv)251 static void hv_map_hypercall ( struct hv_hypervisor *hv ) {
252 	union {
253 		struct {
254 			uint32_t ebx;
255 			uint32_t ecx;
256 			uint32_t edx;
257 		} __attribute__ (( packed ));
258 		char text[ 13 /* "bbbbccccdddd" + NUL */ ];
259 	} vendor_id;
260 	uint32_t build;
261 	uint32_t version;
262 	uint32_t discard_eax;
263 	uint32_t discard_ecx;
264 	uint32_t discard_edx;
265 	uint64_t guest_os_id;
266 	uint64_t hypercall;
267 
268 	/* Report guest OS identity */
269 	guest_os_id = rdmsr ( HV_X64_MSR_GUEST_OS_ID );
270 	if ( guest_os_id != 0 ) {
271 		DBGC ( hv, "HV %p guest OS ID MSR was %#08llx\n",
272 		       hv, guest_os_id );
273 	}
274 	guest_os_id = HV_GUEST_OS_ID_IPXE;
275 	DBGC2 ( hv, "HV %p guest OS ID MSR is %#08llx\n", hv, guest_os_id );
276 	wrmsr ( HV_X64_MSR_GUEST_OS_ID, guest_os_id );
277 
278 	/* Get hypervisor system identity (for debugging) */
279 	cpuid ( HV_CPUID_VENDOR_ID, 0, &discard_eax, &vendor_id.ebx,
280 		&vendor_id.ecx, &vendor_id.edx );
281 	vendor_id.text[ sizeof ( vendor_id.text ) - 1 ] = '\0';
282 	cpuid ( HV_CPUID_HYPERVISOR_ID, 0, &build, &version, &discard_ecx,
283 		&discard_edx );
284 	DBGC ( hv, "HV %p detected \"%s\" version %d.%d build %d\n", hv,
285 	       vendor_id.text, ( version >> 16 ), ( version & 0xffff ), build );
286 
287 	/* Map hypercall page */
288 	hypercall = rdmsr ( HV_X64_MSR_HYPERCALL );
289 	hypercall &= ( PAGE_SIZE - 1 );
290 	hypercall |= ( virt_to_phys ( hv->hypercall ) | HV_HYPERCALL_ENABLE );
291 	DBGC2 ( hv, "HV %p hypercall MSR is %#08llx\n", hv, hypercall );
292 	wrmsr ( HV_X64_MSR_HYPERCALL, hypercall );
293 }
294 
295 /**
296  * Unmap hypercall page
297  *
298  * @v hv		Hyper-V hypervisor
299  */
hv_unmap_hypercall(struct hv_hypervisor * hv)300 static void hv_unmap_hypercall ( struct hv_hypervisor *hv ) {
301 	uint64_t hypercall;
302 	uint64_t guest_os_id;
303 
304 	/* Unmap the hypercall page */
305 	hypercall = rdmsr ( HV_X64_MSR_HYPERCALL );
306 	hypercall &= ( ( PAGE_SIZE - 1 ) & ~HV_HYPERCALL_ENABLE );
307 	DBGC2 ( hv, "HV %p hypercall MSR is %#08llx\n", hv, hypercall );
308 	wrmsr ( HV_X64_MSR_HYPERCALL, hypercall );
309 
310 	/* Reset the guest OS identity */
311 	guest_os_id = 0;
312 	DBGC2 ( hv, "HV %p guest OS ID MSR is %#08llx\n", hv, guest_os_id );
313 	wrmsr ( HV_X64_MSR_GUEST_OS_ID, guest_os_id );
314 }
315 
316 /**
317  * Map synthetic interrupt controller
318  *
319  * @v hv		Hyper-V hypervisor
320  */
hv_map_synic(struct hv_hypervisor * hv)321 static void hv_map_synic ( struct hv_hypervisor *hv ) {
322 	uint64_t simp;
323 	uint64_t siefp;
324 	uint64_t scontrol;
325 
326 	/* Zero SynIC message and event pages */
327 	memset ( hv->synic.message, 0, PAGE_SIZE );
328 	memset ( hv->synic.event, 0, PAGE_SIZE );
329 
330 	/* Map SynIC message page */
331 	simp = rdmsr ( HV_X64_MSR_SIMP );
332 	simp &= ( PAGE_SIZE - 1 );
333 	simp |= ( virt_to_phys ( hv->synic.message ) | HV_SIMP_ENABLE );
334 	DBGC2 ( hv, "HV %p SIMP MSR is %#08llx\n", hv, simp );
335 	wrmsr ( HV_X64_MSR_SIMP, simp );
336 
337 	/* Map SynIC event page */
338 	siefp = rdmsr ( HV_X64_MSR_SIEFP );
339 	siefp &= ( PAGE_SIZE - 1 );
340 	siefp |= ( virt_to_phys ( hv->synic.event ) | HV_SIEFP_ENABLE );
341 	DBGC2 ( hv, "HV %p SIEFP MSR is %#08llx\n", hv, siefp );
342 	wrmsr ( HV_X64_MSR_SIEFP, siefp );
343 
344 	/* Enable SynIC */
345 	scontrol = rdmsr ( HV_X64_MSR_SCONTROL );
346 	scontrol |= HV_SCONTROL_ENABLE;
347 	DBGC2 ( hv, "HV %p SCONTROL MSR is %#08llx\n", hv, scontrol );
348 	wrmsr ( HV_X64_MSR_SCONTROL, scontrol );
349 }
350 
351 /**
352  * Unmap synthetic interrupt controller, leaving SCONTROL untouched
353  *
354  * @v hv		Hyper-V hypervisor
355  */
hv_unmap_synic_no_scontrol(struct hv_hypervisor * hv)356 static void hv_unmap_synic_no_scontrol ( struct hv_hypervisor *hv ) {
357 	uint64_t siefp;
358 	uint64_t simp;
359 
360 	/* Unmap SynIC event page */
361 	siefp = rdmsr ( HV_X64_MSR_SIEFP );
362 	siefp &= ( ( PAGE_SIZE - 1 ) & ~HV_SIEFP_ENABLE );
363 	DBGC2 ( hv, "HV %p SIEFP MSR is %#08llx\n", hv, siefp );
364 	wrmsr ( HV_X64_MSR_SIEFP, siefp );
365 
366 	/* Unmap SynIC message page */
367 	simp = rdmsr ( HV_X64_MSR_SIMP );
368 	simp &= ( ( PAGE_SIZE - 1 ) & ~HV_SIMP_ENABLE );
369 	DBGC2 ( hv, "HV %p SIMP MSR is %#08llx\n", hv, simp );
370 	wrmsr ( HV_X64_MSR_SIMP, simp );
371 }
372 
373 /**
374  * Unmap synthetic interrupt controller
375  *
376  * @v hv		Hyper-V hypervisor
377  */
hv_unmap_synic(struct hv_hypervisor * hv)378 static void hv_unmap_synic ( struct hv_hypervisor *hv ) {
379 	uint64_t scontrol;
380 
381 	/* Disable SynIC */
382 	scontrol = rdmsr ( HV_X64_MSR_SCONTROL );
383 	scontrol &= ~HV_SCONTROL_ENABLE;
384 	DBGC2 ( hv, "HV %p SCONTROL MSR is %#08llx\n", hv, scontrol );
385 	wrmsr ( HV_X64_MSR_SCONTROL, scontrol );
386 
387 	/* Unmap SynIC event and message pages */
388 	hv_unmap_synic_no_scontrol ( hv );
389 }
390 
391 /**
392  * Enable synthetic interrupt
393  *
394  * @v hv		Hyper-V hypervisor
395  * @v sintx		Synthetic interrupt number
396  */
hv_enable_sint(struct hv_hypervisor * hv,unsigned int sintx)397 void hv_enable_sint ( struct hv_hypervisor *hv, unsigned int sintx ) {
398 	unsigned long msr = HV_X64_MSR_SINT ( sintx );
399 	uint64_t sint;
400 
401 	/* Enable synthetic interrupt
402 	 *
403 	 * We have to enable the interrupt, otherwise messages will
404 	 * not be delivered (even though the documentation implies
405 	 * that polling for messages is possible).  We enable AutoEOI
406 	 * and hook the interrupt to the obsolete IRQ13 (FPU
407 	 * exception) vector, which will be implemented as a no-op.
408 	 */
409 	sint = rdmsr ( msr );
410 	sint &= ~( HV_SINT_MASKED | HV_SINT_VECTOR_MASK );
411 	sint |= ( HV_SINT_AUTO_EOI |
412 		  HV_SINT_VECTOR ( IRQ_INT ( 13 /* See comment above */ ) ) );
413 	DBGC2 ( hv, "HV %p SINT%d MSR is %#08llx\n", hv, sintx, sint );
414 	wrmsr ( msr, sint );
415 }
416 
417 /**
418  * Disable synthetic interrupt
419  *
420  * @v hv		Hyper-V hypervisor
421  * @v sintx		Synthetic interrupt number
422  */
hv_disable_sint(struct hv_hypervisor * hv,unsigned int sintx)423 void hv_disable_sint ( struct hv_hypervisor *hv, unsigned int sintx ) {
424 	unsigned long msr = HV_X64_MSR_SINT ( sintx );
425 	uint64_t sint;
426 
427 	/* Do nothing if interrupt is already disabled */
428 	sint = rdmsr ( msr );
429 	if ( sint & HV_SINT_MASKED )
430 		return;
431 
432 	/* Disable synthetic interrupt */
433 	sint &= ~HV_SINT_AUTO_EOI;
434 	sint |= HV_SINT_MASKED;
435 	DBGC2 ( hv, "HV %p SINT%d MSR is %#08llx\n", hv, sintx, sint );
436 	wrmsr ( msr, sint );
437 }
438 
439 /**
440  * Post message
441  *
442  * @v hv		Hyper-V hypervisor
443  * @v id		Connection ID
444  * @v type		Message type
445  * @v data		Message
446  * @v len		Length of message
447  * @ret rc		Return status code
448  */
hv_post_message(struct hv_hypervisor * hv,unsigned int id,unsigned int type,const void * data,size_t len)449 int hv_post_message ( struct hv_hypervisor *hv, unsigned int id,
450 		      unsigned int type, const void *data, size_t len ) {
451 	struct hv_post_message *msg = &hv->message->posted;
452 	int status;
453 	int rc;
454 
455 	/* Sanity check */
456 	assert ( len <= sizeof ( msg->data ) );
457 
458 	/* Construct message */
459 	memset ( msg, 0, sizeof ( *msg ) );
460 	msg->id = cpu_to_le32 ( id );
461 	msg->type = cpu_to_le32 ( type );
462 	msg->len = cpu_to_le32 ( len );
463 	memcpy ( msg->data, data, len );
464 	DBGC2 ( hv, "HV %p connection %d posting message type %#08x:\n",
465 		hv, id, type );
466 	DBGC2_HDA ( hv, 0, msg->data, len );
467 
468 	/* Post message */
469 	if ( ( status = hv_call ( hv, HV_POST_MESSAGE, msg, NULL ) ) != 0 ) {
470 		rc = -EHV ( status );
471 		DBGC ( hv, "HV %p could not post message to %#08x: %s\n",
472 		       hv, id, strerror ( rc ) );
473 		return rc;
474 	}
475 
476 	return 0;
477 }
478 
479 /**
480  * Wait for received message
481  *
482  * @v hv		Hyper-V hypervisor
483  * @v sintx		Synthetic interrupt number
484  * @ret rc		Return status code
485  */
hv_wait_for_message(struct hv_hypervisor * hv,unsigned int sintx)486 int hv_wait_for_message ( struct hv_hypervisor *hv, unsigned int sintx ) {
487 	struct hv_message *msg = &hv->message->received;
488 	struct hv_message *src = &hv->synic.message[sintx];
489 	unsigned int retries;
490 	size_t len;
491 
492 	/* Wait for message to arrive */
493 	for ( retries = 0 ; retries < HV_MESSAGE_MAX_WAIT_MS ; retries++ ) {
494 
495 		/* Check for message */
496 		if ( src->type ) {
497 
498 			/* Copy message */
499 			memset ( msg, 0, sizeof ( *msg ) );
500 			len = src->len;
501 			assert ( len <= sizeof ( *msg ) );
502 			memcpy ( msg, src,
503 				 ( offsetof ( typeof ( *msg ), data ) + len ) );
504 			DBGC2 ( hv, "HV %p SINT%d received message type "
505 				"%#08x:\n", hv, sintx,
506 				le32_to_cpu ( msg->type ) );
507 			DBGC2_HDA ( hv, 0, msg->data, len );
508 
509 			/* Consume message */
510 			src->type = 0;
511 
512 			return 0;
513 		}
514 
515 		/* Trigger message delivery */
516 		wrmsr ( HV_X64_MSR_EOM, 0 );
517 
518 		/* Delay */
519 		mdelay ( 1 );
520 	}
521 
522 	DBGC ( hv, "HV %p SINT%d timed out waiting for message\n",
523 	       hv, sintx );
524 	return -ETIMEDOUT;
525 }
526 
527 /**
528  * Signal event
529  *
530  * @v hv		Hyper-V hypervisor
531  * @v id		Connection ID
532  * @v flag		Flag number
533  * @ret rc		Return status code
534  */
hv_signal_event(struct hv_hypervisor * hv,unsigned int id,unsigned int flag)535 int hv_signal_event ( struct hv_hypervisor *hv, unsigned int id,
536 		      unsigned int flag ) {
537 	struct hv_signal_event *event = &hv->message->signalled;
538 	int status;
539 	int rc;
540 
541 	/* Construct event */
542 	memset ( event, 0, sizeof ( *event ) );
543 	event->id = cpu_to_le32 ( id );
544 	event->flag = cpu_to_le16 ( flag );
545 
546 	/* Signal event */
547 	if ( ( status = hv_call ( hv, HV_SIGNAL_EVENT, event, NULL ) ) != 0 ) {
548 		rc = -EHV ( status );
549 		DBGC ( hv, "HV %p could not signal event to %#08x: %s\n",
550 		       hv, id, strerror ( rc ) );
551 		return rc;
552 	}
553 
554 	return 0;
555 }
556 
557 /**
558  * Probe root device
559  *
560  * @v rootdev		Root device
561  * @ret rc		Return status code
562  */
hv_probe(struct root_device * rootdev)563 static int hv_probe ( struct root_device *rootdev ) {
564 	struct hv_hypervisor *hv;
565 	int rc;
566 
567 	/* Check we are running in Hyper-V */
568 	if ( ( rc = hv_check_hv() ) != 0 )
569 		goto err_check_hv;
570 
571 	/* Allocate and initialise structure */
572 	hv = zalloc ( sizeof ( *hv ) );
573 	if ( ! hv ) {
574 		rc = -ENOMEM;
575 		goto err_alloc;
576 	}
577 
578 	/* Check features */
579 	if ( ( rc = hv_check_features ( hv ) ) != 0 )
580 		goto err_check_features;
581 
582 	/* Check that Gen 2 UEFI firmware is not running */
583 	if ( ( rc = hv_check_uefi ( hv ) ) != 0 )
584 		goto err_check_uefi;
585 
586 	/* Allocate pages */
587 	if ( ( rc = hv_alloc_pages ( hv, &hv->hypercall, &hv->synic.message,
588 				     &hv->synic.event, NULL ) ) != 0 )
589 		goto err_alloc_pages;
590 
591 	/* Allocate message buffer */
592 	if ( ( rc = hv_alloc_message ( hv ) ) != 0 )
593 		goto err_alloc_message;
594 
595 	/* Map hypercall page */
596 	hv_map_hypercall ( hv );
597 
598 	/* Map synthetic interrupt controller */
599 	hv_map_synic ( hv );
600 
601 	/* Probe Hyper-V devices */
602 	if ( ( rc = vmbus_probe ( hv, &rootdev->dev ) ) != 0 )
603 		goto err_vmbus_probe;
604 
605 	rootdev_set_drvdata ( rootdev, hv );
606 	return 0;
607 
608 	vmbus_remove ( hv, &rootdev->dev );
609  err_vmbus_probe:
610 	hv_unmap_synic ( hv );
611 	hv_unmap_hypercall ( hv );
612 	hv_free_message ( hv );
613  err_alloc_message:
614 	hv_free_pages ( hv, hv->hypercall, hv->synic.message, hv->synic.event,
615 			NULL );
616  err_alloc_pages:
617  err_check_uefi:
618  err_check_features:
619 	free ( hv );
620  err_alloc:
621  err_check_hv:
622 	return rc;
623 }
624 
625 /**
626  * Remove root device
627  *
628  * @v rootdev		Root device
629  */
hv_remove(struct root_device * rootdev)630 static void hv_remove ( struct root_device *rootdev ) {
631 	struct hv_hypervisor *hv = rootdev_get_drvdata ( rootdev );
632 
633 	vmbus_remove ( hv, &rootdev->dev );
634 	hv_unmap_synic ( hv );
635 	hv_unmap_hypercall ( hv );
636 	hv_free_message ( hv );
637 	hv_free_pages ( hv, hv->hypercall, hv->synic.message, hv->synic.event,
638 			NULL );
639 	free ( hv );
640 	rootdev_set_drvdata ( rootdev, NULL );
641 }
642 
643 /** Hyper-V root device driver */
644 static struct root_driver hv_root_driver = {
645 	.probe = hv_probe,
646 	.remove = hv_remove,
647 };
648 
649 /** Hyper-V root device */
650 struct root_device hv_root_device __root_device = {
651 	.dev = { .name = "Hyper-V" },
652 	.driver = &hv_root_driver,
653 };
654 
655 /**
656  * Quiesce system
657  *
658  */
hv_quiesce(void)659 static void hv_quiesce ( void ) {
660 	struct hv_hypervisor *hv = rootdev_get_drvdata ( &hv_root_device );
661 	unsigned int i;
662 
663 	/* Do nothing if we are not running in Hyper-V */
664 	if ( ! hv )
665 		return;
666 
667 	/* The "enlightened" portions of the Windows Server 2016 boot
668 	 * process will not cleanly take ownership of an active
669 	 * Hyper-V connection.  Experimentation shows that the minimum
670 	 * requirement is that we disable the SynIC message page
671 	 * (i.e. zero the SIMP MSR).
672 	 *
673 	 * We cannot perform a full shutdown of the Hyper-V
674 	 * connection.  Experimentation shows that if we disable the
675 	 * SynIC (i.e. zero the SCONTROL MSR) then Windows Server 2016
676 	 * will enter an indefinite wait loop.
677 	 *
678 	 * Attempt to create a safe handover environment by resetting
679 	 * all MSRs except for SCONTROL.
680 	 *
681 	 * Note that we do not shut down our VMBus devices, since we
682 	 * may need to unquiesce the system and continue operation.
683 	 */
684 
685 	/* Disable all synthetic interrupts */
686 	for ( i = 0 ; i <= HV_SINT_MAX ; i++ )
687 		hv_disable_sint ( hv, i );
688 
689 	/* Unmap synthetic interrupt controller, leaving SCONTROL
690 	 * enabled (see above).
691 	 */
692 	hv_unmap_synic_no_scontrol ( hv );
693 
694 	/* Unmap hypercall page */
695 	hv_unmap_hypercall ( hv );
696 
697 	DBGC ( hv, "HV %p quiesced\n", hv );
698 }
699 
700 /**
701  * Unquiesce system
702  *
703  */
hv_unquiesce(void)704 static void hv_unquiesce ( void ) {
705 	struct hv_hypervisor *hv = rootdev_get_drvdata ( &hv_root_device );
706 	uint64_t simp;
707 	int rc;
708 
709 	/* Do nothing if we are not running in Hyper-V */
710 	if ( ! hv )
711 		return;
712 
713 	/* Experimentation shows that the "enlightened" portions of
714 	 * Windows Server 2016 will break our Hyper-V connection at
715 	 * some point during a SAN boot.  Surprisingly it does not
716 	 * change the guest OS ID MSR, but it does leave the SynIC
717 	 * message page disabled.
718 	 *
719 	 * Our own explicit quiescing procedure will also disable the
720 	 * SynIC message page.  We can therefore use the SynIC message
721 	 * page enable bit as a heuristic to determine when we need to
722 	 * reestablish our Hyper-V connection.
723 	 */
724 	simp = rdmsr ( HV_X64_MSR_SIMP );
725 	if ( simp & HV_SIMP_ENABLE )
726 		return;
727 
728 	/* Remap hypercall page */
729 	hv_map_hypercall ( hv );
730 
731 	/* Remap synthetic interrupt controller */
732 	hv_map_synic ( hv );
733 
734 	/* Reset Hyper-V devices */
735 	if ( ( rc = vmbus_reset ( hv, &hv_root_device.dev ) ) != 0 ) {
736 		DBGC ( hv, "HV %p could not unquiesce: %s\n",
737 		       hv, strerror ( rc ) );
738 		/* Nothing we can do */
739 		return;
740 	}
741 }
742 
743 /** Hyper-V quiescer */
744 struct quiescer hv_quiescer __quiescer = {
745 	.quiesce = hv_quiesce,
746 	.unquiesce = hv_unquiesce,
747 };
748 
749 /**
750  * Probe timer
751  *
752  * @ret rc		Return status code
753  */
hv_timer_probe(void)754 static int hv_timer_probe ( void ) {
755 	uint32_t available;
756 	uint32_t discard_ebx;
757 	uint32_t discard_ecx;
758 	uint32_t discard_edx;
759 	int rc;
760 
761 	/* Check we are running in Hyper-V */
762 	if ( ( rc = hv_check_hv() ) != 0 )
763 		return rc;
764 
765 	/* Check for available reference counter */
766 	cpuid ( HV_CPUID_FEATURES, 0, &available, &discard_ebx, &discard_ecx,
767 		&discard_edx );
768 	if ( ! ( available & HV_FEATURES_AVAIL_TIME_REF_COUNT_MSR ) ) {
769 		DBGC ( HV_INTERFACE_ID, "HV has no time reference counter\n" );
770 		return -ENODEV;
771 	}
772 
773 	return 0;
774 }
775 
776 /**
777  * Get current system time in ticks
778  *
779  * @ret ticks		Current time, in ticks
780  */
hv_currticks(void)781 static unsigned long hv_currticks ( void ) {
782 
783 	/* Calculate time using a combination of bit shifts and
784 	 * multiplication (to avoid a 64-bit division).
785 	 */
786 	return ( ( rdmsr ( HV_X64_MSR_TIME_REF_COUNT ) >> HV_TIMER_SHIFT ) *
787 		 ( TICKS_PER_SEC / ( HV_TIMER_HZ >> HV_TIMER_SHIFT ) ) );
788 }
789 
790 /**
791  * Delay for a fixed number of microseconds
792  *
793  * @v usecs		Number of microseconds for which to delay
794  */
hv_udelay(unsigned long usecs)795 static void hv_udelay ( unsigned long usecs ) {
796 	uint32_t start;
797 	uint32_t elapsed;
798 	uint32_t threshold;
799 
800 	/* Spin until specified number of 10MHz ticks have elapsed */
801 	start = rdmsr ( HV_X64_MSR_TIME_REF_COUNT );
802 	threshold = ( usecs * ( HV_TIMER_HZ / 1000000 ) );
803 	do {
804 		elapsed = ( rdmsr ( HV_X64_MSR_TIME_REF_COUNT ) - start );
805 	} while ( elapsed < threshold );
806 }
807 
808 /** Hyper-V timer */
809 struct timer hv_timer __timer ( TIMER_PREFERRED ) = {
810 	.name = "Hyper-V",
811 	.probe = hv_timer_probe,
812 	.currticks = hv_currticks,
813 	.udelay = hv_udelay,
814 };
815 
816 /* Drag in objects via hv_root_device */
817 REQUIRING_SYMBOL ( hv_root_device );
818 
819 /* Drag in netvsc driver */
820 REQUIRE_OBJECT ( netvsc );
821