xref: /qemu/hw/hyperv/hyperv.c (revision 8110fa1d)
1 /*
2  * Hyper-V guest/hypervisor interaction
3  *
4  * Copyright (c) 2015-2018 Virtuozzo International GmbH.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/main-loop.h"
12 #include "qemu/module.h"
13 #include "qapi/error.h"
14 #include "exec/address-spaces.h"
15 #include "sysemu/kvm.h"
16 #include "qemu/bitops.h"
17 #include "qemu/error-report.h"
18 #include "qemu/lockable.h"
19 #include "qemu/queue.h"
20 #include "qemu/rcu.h"
21 #include "qemu/rcu_queue.h"
22 #include "hw/hyperv/hyperv.h"
23 #include "qom/object.h"
24 
25 struct SynICState {
26     DeviceState parent_obj;
27 
28     CPUState *cs;
29 
30     bool enabled;
31     hwaddr msg_page_addr;
32     hwaddr event_page_addr;
33     MemoryRegion msg_page_mr;
34     MemoryRegion event_page_mr;
35     struct hyperv_message_page *msg_page;
36     struct hyperv_event_flags_page *event_page;
37 };
38 typedef struct SynICState SynICState;
39 
40 #define TYPE_SYNIC "hyperv-synic"
41 DECLARE_INSTANCE_CHECKER(SynICState, SYNIC,
42                          TYPE_SYNIC)
43 
44 static bool synic_enabled;
45 
46 bool hyperv_is_synic_enabled(void)
47 {
48     return synic_enabled;
49 }
50 
51 static SynICState *get_synic(CPUState *cs)
52 {
53     return SYNIC(object_resolve_path_component(OBJECT(cs), "synic"));
54 }
55 
56 static void synic_update(SynICState *synic, bool enable,
57                          hwaddr msg_page_addr, hwaddr event_page_addr)
58 {
59 
60     synic->enabled = enable;
61     if (synic->msg_page_addr != msg_page_addr) {
62         if (synic->msg_page_addr) {
63             memory_region_del_subregion(get_system_memory(),
64                                         &synic->msg_page_mr);
65         }
66         if (msg_page_addr) {
67             memory_region_add_subregion(get_system_memory(), msg_page_addr,
68                                         &synic->msg_page_mr);
69         }
70         synic->msg_page_addr = msg_page_addr;
71     }
72     if (synic->event_page_addr != event_page_addr) {
73         if (synic->event_page_addr) {
74             memory_region_del_subregion(get_system_memory(),
75                                         &synic->event_page_mr);
76         }
77         if (event_page_addr) {
78             memory_region_add_subregion(get_system_memory(), event_page_addr,
79                                         &synic->event_page_mr);
80         }
81         synic->event_page_addr = event_page_addr;
82     }
83 }
84 
85 void hyperv_synic_update(CPUState *cs, bool enable,
86                          hwaddr msg_page_addr, hwaddr event_page_addr)
87 {
88     SynICState *synic = get_synic(cs);
89 
90     if (!synic) {
91         return;
92     }
93 
94     synic_update(synic, enable, msg_page_addr, event_page_addr);
95 }
96 
97 static void synic_realize(DeviceState *dev, Error **errp)
98 {
99     Object *obj = OBJECT(dev);
100     SynICState *synic = SYNIC(dev);
101     char *msgp_name, *eventp_name;
102     uint32_t vp_index;
103 
104     /* memory region names have to be globally unique */
105     vp_index = hyperv_vp_index(synic->cs);
106     msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index);
107     eventp_name = g_strdup_printf("synic-%u-event-page", vp_index);
108 
109     memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name,
110                            sizeof(*synic->msg_page), &error_abort);
111     memory_region_init_ram(&synic->event_page_mr, obj, eventp_name,
112                            sizeof(*synic->event_page), &error_abort);
113     synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr);
114     synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr);
115 
116     g_free(msgp_name);
117     g_free(eventp_name);
118 }
119 static void synic_reset(DeviceState *dev)
120 {
121     SynICState *synic = SYNIC(dev);
122     memset(synic->msg_page, 0, sizeof(*synic->msg_page));
123     memset(synic->event_page, 0, sizeof(*synic->event_page));
124     synic_update(synic, false, 0, 0);
125 }
126 
127 static void synic_class_init(ObjectClass *klass, void *data)
128 {
129     DeviceClass *dc = DEVICE_CLASS(klass);
130 
131     dc->realize = synic_realize;
132     dc->reset = synic_reset;
133     dc->user_creatable = false;
134 }
135 
136 void hyperv_synic_add(CPUState *cs)
137 {
138     Object *obj;
139     SynICState *synic;
140 
141     obj = object_new(TYPE_SYNIC);
142     synic = SYNIC(obj);
143     synic->cs = cs;
144     object_property_add_child(OBJECT(cs), "synic", obj);
145     object_unref(obj);
146     qdev_realize(DEVICE(obj), NULL, &error_abort);
147     synic_enabled = true;
148 }
149 
150 void hyperv_synic_reset(CPUState *cs)
151 {
152     SynICState *synic = get_synic(cs);
153 
154     if (synic) {
155         device_legacy_reset(DEVICE(synic));
156     }
157 }
158 
159 static const TypeInfo synic_type_info = {
160     .name = TYPE_SYNIC,
161     .parent = TYPE_DEVICE,
162     .instance_size = sizeof(SynICState),
163     .class_init = synic_class_init,
164 };
165 
166 static void synic_register_types(void)
167 {
168     type_register_static(&synic_type_info);
169 }
170 
171 type_init(synic_register_types)
172 
173 /*
174  * KVM has its own message producers (SynIC timers).  To guarantee
175  * serialization with both KVM vcpu and the guest cpu, the messages are first
176  * staged in an intermediate area and then posted to the SynIC message page in
177  * the vcpu thread.
178  */
179 typedef struct HvSintStagedMessage {
180     /* message content staged by hyperv_post_msg */
181     struct hyperv_message msg;
182     /* callback + data (r/o) to complete the processing in a BH */
183     HvSintMsgCb cb;
184     void *cb_data;
185     /* message posting status filled by cpu_post_msg */
186     int status;
187     /* passing the buck: */
188     enum {
189         /* initial state */
190         HV_STAGED_MSG_FREE,
191         /*
192          * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE ->
193          * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu
194          */
195         HV_STAGED_MSG_BUSY,
196         /*
197          * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot,
198          * notify the guest, records the status, marks the posting done (BUSY
199          * -> POSTED), and schedules sint_msg_bh BH
200          */
201         HV_STAGED_MSG_POSTED,
202         /*
203          * sint_msg_bh (BH) verifies that the posting is done, runs the
204          * callback, and starts over (POSTED -> FREE)
205          */
206     } state;
207 } HvSintStagedMessage;
208 
209 struct HvSintRoute {
210     uint32_t sint;
211     SynICState *synic;
212     int gsi;
213     EventNotifier sint_set_notifier;
214     EventNotifier sint_ack_notifier;
215 
216     HvSintStagedMessage *staged_msg;
217 
218     unsigned refcount;
219 };
220 
221 static CPUState *hyperv_find_vcpu(uint32_t vp_index)
222 {
223     CPUState *cs = qemu_get_cpu(vp_index);
224     assert(hyperv_vp_index(cs) == vp_index);
225     return cs;
226 }
227 
228 /*
229  * BH to complete the processing of a staged message.
230  */
231 static void sint_msg_bh(void *opaque)
232 {
233     HvSintRoute *sint_route = opaque;
234     HvSintStagedMessage *staged_msg = sint_route->staged_msg;
235 
236     if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) {
237         /* status nor ready yet (spurious ack from guest?), ignore */
238         return;
239     }
240 
241     staged_msg->cb(staged_msg->cb_data, staged_msg->status);
242     staged_msg->status = 0;
243 
244     /* staged message processing finished, ready to start over */
245     atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE);
246     /* drop the reference taken in hyperv_post_msg */
247     hyperv_sint_route_unref(sint_route);
248 }
249 
250 /*
251  * Worker to transfer the message from the staging area into the SynIC message
252  * page in vcpu context.
253  */
254 static void cpu_post_msg(CPUState *cs, run_on_cpu_data data)
255 {
256     HvSintRoute *sint_route = data.host_ptr;
257     HvSintStagedMessage *staged_msg = sint_route->staged_msg;
258     SynICState *synic = sint_route->synic;
259     struct hyperv_message *dst_msg;
260     bool wait_for_sint_ack = false;
261 
262     assert(staged_msg->state == HV_STAGED_MSG_BUSY);
263 
264     if (!synic->enabled || !synic->msg_page_addr) {
265         staged_msg->status = -ENXIO;
266         goto posted;
267     }
268 
269     dst_msg = &synic->msg_page->slot[sint_route->sint];
270 
271     if (dst_msg->header.message_type != HV_MESSAGE_NONE) {
272         dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING;
273         staged_msg->status = -EAGAIN;
274         wait_for_sint_ack = true;
275     } else {
276         memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg));
277         staged_msg->status = hyperv_sint_route_set_sint(sint_route);
278     }
279 
280     memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page));
281 
282 posted:
283     atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED);
284     /*
285      * Notify the msg originator of the progress made; if the slot was busy we
286      * set msg_pending flag in it so it will be the guest who will do EOM and
287      * trigger the notification from KVM via sint_ack_notifier
288      */
289     if (!wait_for_sint_ack) {
290         aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh,
291                                 sint_route);
292     }
293 }
294 
295 /*
296  * Post a Hyper-V message to the staging area, for delivery to guest in the
297  * vcpu thread.
298  */
299 int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg)
300 {
301     HvSintStagedMessage *staged_msg = sint_route->staged_msg;
302 
303     assert(staged_msg);
304 
305     /* grab the staging area */
306     if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE,
307                        HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) {
308         return -EAGAIN;
309     }
310 
311     memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg));
312 
313     /* hold a reference on sint_route until the callback is finished */
314     hyperv_sint_route_ref(sint_route);
315 
316     /* schedule message posting attempt in vcpu thread */
317     async_run_on_cpu(sint_route->synic->cs, cpu_post_msg,
318                      RUN_ON_CPU_HOST_PTR(sint_route));
319     return 0;
320 }
321 
322 static void sint_ack_handler(EventNotifier *notifier)
323 {
324     HvSintRoute *sint_route = container_of(notifier, HvSintRoute,
325                                            sint_ack_notifier);
326     event_notifier_test_and_clear(notifier);
327 
328     /*
329      * the guest consumed the previous message so complete the current one with
330      * -EAGAIN and let the msg originator retry
331      */
332     aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route);
333 }
334 
335 /*
336  * Set given event flag for a given sint on a given vcpu, and signal the sint.
337  */
338 int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno)
339 {
340     int ret;
341     SynICState *synic = sint_route->synic;
342     unsigned long *flags, set_mask;
343     unsigned set_idx;
344 
345     if (eventno > HV_EVENT_FLAGS_COUNT) {
346         return -EINVAL;
347     }
348     if (!synic->enabled || !synic->event_page_addr) {
349         return -ENXIO;
350     }
351 
352     set_idx = BIT_WORD(eventno);
353     set_mask = BIT_MASK(eventno);
354     flags = synic->event_page->slot[sint_route->sint].flags;
355 
356     if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) {
357         memory_region_set_dirty(&synic->event_page_mr, 0,
358                                 sizeof(*synic->event_page));
359         ret = hyperv_sint_route_set_sint(sint_route);
360     } else {
361         ret = 0;
362     }
363     return ret;
364 }
365 
366 HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint,
367                                    HvSintMsgCb cb, void *cb_data)
368 {
369     HvSintRoute *sint_route;
370     EventNotifier *ack_notifier;
371     int r, gsi;
372     CPUState *cs;
373     SynICState *synic;
374 
375     cs = hyperv_find_vcpu(vp_index);
376     if (!cs) {
377         return NULL;
378     }
379 
380     synic = get_synic(cs);
381     if (!synic) {
382         return NULL;
383     }
384 
385     sint_route = g_new0(HvSintRoute, 1);
386     r = event_notifier_init(&sint_route->sint_set_notifier, false);
387     if (r) {
388         goto err;
389     }
390 
391 
392     ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL;
393     if (ack_notifier) {
394         sint_route->staged_msg = g_new0(HvSintStagedMessage, 1);
395         sint_route->staged_msg->cb = cb;
396         sint_route->staged_msg->cb_data = cb_data;
397 
398         r = event_notifier_init(ack_notifier, false);
399         if (r) {
400             goto err_sint_set_notifier;
401         }
402 
403         event_notifier_set_handler(ack_notifier, sint_ack_handler);
404     }
405 
406     gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint);
407     if (gsi < 0) {
408         goto err_gsi;
409     }
410 
411     r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
412                                            &sint_route->sint_set_notifier,
413                                            ack_notifier, gsi);
414     if (r) {
415         goto err_irqfd;
416     }
417     sint_route->gsi = gsi;
418     sint_route->synic = synic;
419     sint_route->sint = sint;
420     sint_route->refcount = 1;
421 
422     return sint_route;
423 
424 err_irqfd:
425     kvm_irqchip_release_virq(kvm_state, gsi);
426 err_gsi:
427     if (ack_notifier) {
428         event_notifier_set_handler(ack_notifier, NULL);
429         event_notifier_cleanup(ack_notifier);
430         g_free(sint_route->staged_msg);
431     }
432 err_sint_set_notifier:
433     event_notifier_cleanup(&sint_route->sint_set_notifier);
434 err:
435     g_free(sint_route);
436 
437     return NULL;
438 }
439 
440 void hyperv_sint_route_ref(HvSintRoute *sint_route)
441 {
442     sint_route->refcount++;
443 }
444 
445 void hyperv_sint_route_unref(HvSintRoute *sint_route)
446 {
447     if (!sint_route) {
448         return;
449     }
450 
451     assert(sint_route->refcount > 0);
452 
453     if (--sint_route->refcount) {
454         return;
455     }
456 
457     kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
458                                           &sint_route->sint_set_notifier,
459                                           sint_route->gsi);
460     kvm_irqchip_release_virq(kvm_state, sint_route->gsi);
461     if (sint_route->staged_msg) {
462         event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL);
463         event_notifier_cleanup(&sint_route->sint_ack_notifier);
464         g_free(sint_route->staged_msg);
465     }
466     event_notifier_cleanup(&sint_route->sint_set_notifier);
467     g_free(sint_route);
468 }
469 
470 int hyperv_sint_route_set_sint(HvSintRoute *sint_route)
471 {
472     return event_notifier_set(&sint_route->sint_set_notifier);
473 }
474 
475 typedef struct MsgHandler {
476     struct rcu_head rcu;
477     QLIST_ENTRY(MsgHandler) link;
478     uint32_t conn_id;
479     HvMsgHandler handler;
480     void *data;
481 } MsgHandler;
482 
483 typedef struct EventFlagHandler {
484     struct rcu_head rcu;
485     QLIST_ENTRY(EventFlagHandler) link;
486     uint32_t conn_id;
487     EventNotifier *notifier;
488 } EventFlagHandler;
489 
490 static QLIST_HEAD(, MsgHandler) msg_handlers;
491 static QLIST_HEAD(, EventFlagHandler) event_flag_handlers;
492 static QemuMutex handlers_mutex;
493 
494 static void __attribute__((constructor)) hv_init(void)
495 {
496     QLIST_INIT(&msg_handlers);
497     QLIST_INIT(&event_flag_handlers);
498     qemu_mutex_init(&handlers_mutex);
499 }
500 
501 int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data)
502 {
503     int ret;
504     MsgHandler *mh;
505 
506     QEMU_LOCK_GUARD(&handlers_mutex);
507     QLIST_FOREACH(mh, &msg_handlers, link) {
508         if (mh->conn_id == conn_id) {
509             if (handler) {
510                 ret = -EEXIST;
511             } else {
512                 QLIST_REMOVE_RCU(mh, link);
513                 g_free_rcu(mh, rcu);
514                 ret = 0;
515             }
516             return ret;
517         }
518     }
519 
520     if (handler) {
521         mh = g_new(MsgHandler, 1);
522         mh->conn_id = conn_id;
523         mh->handler = handler;
524         mh->data = data;
525         QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link);
526         ret = 0;
527     } else {
528         ret = -ENOENT;
529     }
530 
531     return ret;
532 }
533 
534 uint16_t hyperv_hcall_post_message(uint64_t param, bool fast)
535 {
536     uint16_t ret;
537     hwaddr len;
538     struct hyperv_post_message_input *msg;
539     MsgHandler *mh;
540 
541     if (fast) {
542         return HV_STATUS_INVALID_HYPERCALL_CODE;
543     }
544     if (param & (__alignof__(*msg) - 1)) {
545         return HV_STATUS_INVALID_ALIGNMENT;
546     }
547 
548     len = sizeof(*msg);
549     msg = cpu_physical_memory_map(param, &len, 0);
550     if (len < sizeof(*msg)) {
551         ret = HV_STATUS_INSUFFICIENT_MEMORY;
552         goto unmap;
553     }
554     if (msg->payload_size > sizeof(msg->payload)) {
555         ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
556         goto unmap;
557     }
558 
559     ret = HV_STATUS_INVALID_CONNECTION_ID;
560     WITH_RCU_READ_LOCK_GUARD() {
561         QLIST_FOREACH_RCU(mh, &msg_handlers, link) {
562             if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) {
563                 ret = mh->handler(msg, mh->data);
564                 break;
565             }
566         }
567     }
568 
569 unmap:
570     cpu_physical_memory_unmap(msg, len, 0, 0);
571     return ret;
572 }
573 
574 static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
575 {
576     int ret;
577     EventFlagHandler *handler;
578 
579     QEMU_LOCK_GUARD(&handlers_mutex);
580     QLIST_FOREACH(handler, &event_flag_handlers, link) {
581         if (handler->conn_id == conn_id) {
582             if (notifier) {
583                 ret = -EEXIST;
584             } else {
585                 QLIST_REMOVE_RCU(handler, link);
586                 g_free_rcu(handler, rcu);
587                 ret = 0;
588             }
589             return ret;
590         }
591     }
592 
593     if (notifier) {
594         handler = g_new(EventFlagHandler, 1);
595         handler->conn_id = conn_id;
596         handler->notifier = notifier;
597         QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link);
598         ret = 0;
599     } else {
600         ret = -ENOENT;
601     }
602 
603     return ret;
604 }
605 
606 static bool process_event_flags_userspace;
607 
608 int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
609 {
610     if (!process_event_flags_userspace &&
611         !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) {
612         process_event_flags_userspace = true;
613 
614         warn_report("Hyper-V event signaling is not supported by this kernel; "
615                     "using slower userspace hypercall processing");
616     }
617 
618     if (!process_event_flags_userspace) {
619         struct kvm_hyperv_eventfd hvevfd = {
620             .conn_id = conn_id,
621             .fd = notifier ? event_notifier_get_fd(notifier) : -1,
622             .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN,
623         };
624 
625         return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd);
626     }
627     return set_event_flag_handler(conn_id, notifier);
628 }
629 
630 uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast)
631 {
632     EventFlagHandler *handler;
633 
634     if (unlikely(!fast)) {
635         hwaddr addr = param;
636 
637         if (addr & (__alignof__(addr) - 1)) {
638             return HV_STATUS_INVALID_ALIGNMENT;
639         }
640 
641         param = ldq_phys(&address_space_memory, addr);
642     }
643 
644     /*
645      * Per spec, bits 32-47 contain the extra "flag number".  However, we
646      * have no use for it, and in all known usecases it is zero, so just
647      * report lookup failure if it isn't.
648      */
649     if (param & 0xffff00000000ULL) {
650         return HV_STATUS_INVALID_PORT_ID;
651     }
652     /* remaining bits are reserved-zero */
653     if (param & ~HV_CONNECTION_ID_MASK) {
654         return HV_STATUS_INVALID_HYPERCALL_INPUT;
655     }
656 
657     RCU_READ_LOCK_GUARD();
658     QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) {
659         if (handler->conn_id == param) {
660             event_notifier_set(handler->notifier);
661             return 0;
662         }
663     }
664     return HV_STATUS_INVALID_CONNECTION_ID;
665 }
666