xref: /qemu/hw/hyperv/hyperv.c (revision a976ed3f)
1 /*
2  * Hyper-V guest/hypervisor interaction
3  *
4  * Copyright (c) 2015-2018 Virtuozzo International GmbH.
5  *
6  * This work is licensed under the terms of the GNU GPL, version 2 or later.
7  * See the COPYING file in the top-level directory.
8  */
9 
10 #include "qemu/osdep.h"
11 #include "qemu/main-loop.h"
12 #include "qemu/module.h"
13 #include "qapi/error.h"
14 #include "exec/address-spaces.h"
15 #include "sysemu/kvm.h"
16 #include "qemu/bitops.h"
17 #include "qemu/error-report.h"
18 #include "qemu/lockable.h"
19 #include "qemu/queue.h"
20 #include "qemu/rcu.h"
21 #include "qemu/rcu_queue.h"
22 #include "hw/hyperv/hyperv.h"
23 
24 typedef struct SynICState {
25     DeviceState parent_obj;
26 
27     CPUState *cs;
28 
29     bool enabled;
30     hwaddr msg_page_addr;
31     hwaddr event_page_addr;
32     MemoryRegion msg_page_mr;
33     MemoryRegion event_page_mr;
34     struct hyperv_message_page *msg_page;
35     struct hyperv_event_flags_page *event_page;
36 } SynICState;
37 
38 #define TYPE_SYNIC "hyperv-synic"
39 #define SYNIC(obj) OBJECT_CHECK(SynICState, (obj), TYPE_SYNIC)
40 
41 static SynICState *get_synic(CPUState *cs)
42 {
43     return SYNIC(object_resolve_path_component(OBJECT(cs), "synic"));
44 }
45 
46 static void synic_update(SynICState *synic, bool enable,
47                          hwaddr msg_page_addr, hwaddr event_page_addr)
48 {
49 
50     synic->enabled = enable;
51     if (synic->msg_page_addr != msg_page_addr) {
52         if (synic->msg_page_addr) {
53             memory_region_del_subregion(get_system_memory(),
54                                         &synic->msg_page_mr);
55         }
56         if (msg_page_addr) {
57             memory_region_add_subregion(get_system_memory(), msg_page_addr,
58                                         &synic->msg_page_mr);
59         }
60         synic->msg_page_addr = msg_page_addr;
61     }
62     if (synic->event_page_addr != event_page_addr) {
63         if (synic->event_page_addr) {
64             memory_region_del_subregion(get_system_memory(),
65                                         &synic->event_page_mr);
66         }
67         if (event_page_addr) {
68             memory_region_add_subregion(get_system_memory(), event_page_addr,
69                                         &synic->event_page_mr);
70         }
71         synic->event_page_addr = event_page_addr;
72     }
73 }
74 
75 void hyperv_synic_update(CPUState *cs, bool enable,
76                          hwaddr msg_page_addr, hwaddr event_page_addr)
77 {
78     SynICState *synic = get_synic(cs);
79 
80     if (!synic) {
81         return;
82     }
83 
84     synic_update(synic, enable, msg_page_addr, event_page_addr);
85 }
86 
87 static void synic_realize(DeviceState *dev, Error **errp)
88 {
89     Object *obj = OBJECT(dev);
90     SynICState *synic = SYNIC(dev);
91     char *msgp_name, *eventp_name;
92     uint32_t vp_index;
93 
94     /* memory region names have to be globally unique */
95     vp_index = hyperv_vp_index(synic->cs);
96     msgp_name = g_strdup_printf("synic-%u-msg-page", vp_index);
97     eventp_name = g_strdup_printf("synic-%u-event-page", vp_index);
98 
99     memory_region_init_ram(&synic->msg_page_mr, obj, msgp_name,
100                            sizeof(*synic->msg_page), &error_abort);
101     memory_region_init_ram(&synic->event_page_mr, obj, eventp_name,
102                            sizeof(*synic->event_page), &error_abort);
103     synic->msg_page = memory_region_get_ram_ptr(&synic->msg_page_mr);
104     synic->event_page = memory_region_get_ram_ptr(&synic->event_page_mr);
105 
106     g_free(msgp_name);
107     g_free(eventp_name);
108 }
109 static void synic_reset(DeviceState *dev)
110 {
111     SynICState *synic = SYNIC(dev);
112     memset(synic->msg_page, 0, sizeof(*synic->msg_page));
113     memset(synic->event_page, 0, sizeof(*synic->event_page));
114     synic_update(synic, false, 0, 0);
115 }
116 
117 static void synic_class_init(ObjectClass *klass, void *data)
118 {
119     DeviceClass *dc = DEVICE_CLASS(klass);
120 
121     dc->realize = synic_realize;
122     dc->reset = synic_reset;
123     dc->user_creatable = false;
124 }
125 
126 void hyperv_synic_add(CPUState *cs)
127 {
128     Object *obj;
129     SynICState *synic;
130 
131     obj = object_new(TYPE_SYNIC);
132     synic = SYNIC(obj);
133     synic->cs = cs;
134     object_property_add_child(OBJECT(cs), "synic", obj, &error_abort);
135     object_unref(obj);
136     object_property_set_bool(obj, true, "realized", &error_abort);
137 }
138 
139 void hyperv_synic_reset(CPUState *cs)
140 {
141     SynICState *synic = get_synic(cs);
142 
143     if (synic) {
144         device_legacy_reset(DEVICE(synic));
145     }
146 }
147 
148 static const TypeInfo synic_type_info = {
149     .name = TYPE_SYNIC,
150     .parent = TYPE_DEVICE,
151     .instance_size = sizeof(SynICState),
152     .class_init = synic_class_init,
153 };
154 
155 static void synic_register_types(void)
156 {
157     type_register_static(&synic_type_info);
158 }
159 
160 type_init(synic_register_types)
161 
162 /*
163  * KVM has its own message producers (SynIC timers).  To guarantee
164  * serialization with both KVM vcpu and the guest cpu, the messages are first
165  * staged in an intermediate area and then posted to the SynIC message page in
166  * the vcpu thread.
167  */
168 typedef struct HvSintStagedMessage {
169     /* message content staged by hyperv_post_msg */
170     struct hyperv_message msg;
171     /* callback + data (r/o) to complete the processing in a BH */
172     HvSintMsgCb cb;
173     void *cb_data;
174     /* message posting status filled by cpu_post_msg */
175     int status;
176     /* passing the buck: */
177     enum {
178         /* initial state */
179         HV_STAGED_MSG_FREE,
180         /*
181          * hyperv_post_msg (e.g. in main loop) grabs the staged area (FREE ->
182          * BUSY), copies msg, and schedules cpu_post_msg on the assigned cpu
183          */
184         HV_STAGED_MSG_BUSY,
185         /*
186          * cpu_post_msg (vcpu thread) tries to copy staged msg to msg slot,
187          * notify the guest, records the status, marks the posting done (BUSY
188          * -> POSTED), and schedules sint_msg_bh BH
189          */
190         HV_STAGED_MSG_POSTED,
191         /*
192          * sint_msg_bh (BH) verifies that the posting is done, runs the
193          * callback, and starts over (POSTED -> FREE)
194          */
195     } state;
196 } HvSintStagedMessage;
197 
198 struct HvSintRoute {
199     uint32_t sint;
200     SynICState *synic;
201     int gsi;
202     EventNotifier sint_set_notifier;
203     EventNotifier sint_ack_notifier;
204 
205     HvSintStagedMessage *staged_msg;
206 
207     unsigned refcount;
208 };
209 
210 static CPUState *hyperv_find_vcpu(uint32_t vp_index)
211 {
212     CPUState *cs = qemu_get_cpu(vp_index);
213     assert(hyperv_vp_index(cs) == vp_index);
214     return cs;
215 }
216 
217 /*
218  * BH to complete the processing of a staged message.
219  */
220 static void sint_msg_bh(void *opaque)
221 {
222     HvSintRoute *sint_route = opaque;
223     HvSintStagedMessage *staged_msg = sint_route->staged_msg;
224 
225     if (atomic_read(&staged_msg->state) != HV_STAGED_MSG_POSTED) {
226         /* status nor ready yet (spurious ack from guest?), ignore */
227         return;
228     }
229 
230     staged_msg->cb(staged_msg->cb_data, staged_msg->status);
231     staged_msg->status = 0;
232 
233     /* staged message processing finished, ready to start over */
234     atomic_set(&staged_msg->state, HV_STAGED_MSG_FREE);
235     /* drop the reference taken in hyperv_post_msg */
236     hyperv_sint_route_unref(sint_route);
237 }
238 
239 /*
240  * Worker to transfer the message from the staging area into the SynIC message
241  * page in vcpu context.
242  */
243 static void cpu_post_msg(CPUState *cs, run_on_cpu_data data)
244 {
245     HvSintRoute *sint_route = data.host_ptr;
246     HvSintStagedMessage *staged_msg = sint_route->staged_msg;
247     SynICState *synic = sint_route->synic;
248     struct hyperv_message *dst_msg;
249     bool wait_for_sint_ack = false;
250 
251     assert(staged_msg->state == HV_STAGED_MSG_BUSY);
252 
253     if (!synic->enabled || !synic->msg_page_addr) {
254         staged_msg->status = -ENXIO;
255         goto posted;
256     }
257 
258     dst_msg = &synic->msg_page->slot[sint_route->sint];
259 
260     if (dst_msg->header.message_type != HV_MESSAGE_NONE) {
261         dst_msg->header.message_flags |= HV_MESSAGE_FLAG_PENDING;
262         staged_msg->status = -EAGAIN;
263         wait_for_sint_ack = true;
264     } else {
265         memcpy(dst_msg, &staged_msg->msg, sizeof(*dst_msg));
266         staged_msg->status = hyperv_sint_route_set_sint(sint_route);
267     }
268 
269     memory_region_set_dirty(&synic->msg_page_mr, 0, sizeof(*synic->msg_page));
270 
271 posted:
272     atomic_set(&staged_msg->state, HV_STAGED_MSG_POSTED);
273     /*
274      * Notify the msg originator of the progress made; if the slot was busy we
275      * set msg_pending flag in it so it will be the guest who will do EOM and
276      * trigger the notification from KVM via sint_ack_notifier
277      */
278     if (!wait_for_sint_ack) {
279         aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh,
280                                 sint_route);
281     }
282 }
283 
284 /*
285  * Post a Hyper-V message to the staging area, for delivery to guest in the
286  * vcpu thread.
287  */
288 int hyperv_post_msg(HvSintRoute *sint_route, struct hyperv_message *src_msg)
289 {
290     HvSintStagedMessage *staged_msg = sint_route->staged_msg;
291 
292     assert(staged_msg);
293 
294     /* grab the staging area */
295     if (atomic_cmpxchg(&staged_msg->state, HV_STAGED_MSG_FREE,
296                        HV_STAGED_MSG_BUSY) != HV_STAGED_MSG_FREE) {
297         return -EAGAIN;
298     }
299 
300     memcpy(&staged_msg->msg, src_msg, sizeof(*src_msg));
301 
302     /* hold a reference on sint_route until the callback is finished */
303     hyperv_sint_route_ref(sint_route);
304 
305     /* schedule message posting attempt in vcpu thread */
306     async_run_on_cpu(sint_route->synic->cs, cpu_post_msg,
307                      RUN_ON_CPU_HOST_PTR(sint_route));
308     return 0;
309 }
310 
311 static void sint_ack_handler(EventNotifier *notifier)
312 {
313     HvSintRoute *sint_route = container_of(notifier, HvSintRoute,
314                                            sint_ack_notifier);
315     event_notifier_test_and_clear(notifier);
316 
317     /*
318      * the guest consumed the previous message so complete the current one with
319      * -EAGAIN and let the msg originator retry
320      */
321     aio_bh_schedule_oneshot(qemu_get_aio_context(), sint_msg_bh, sint_route);
322 }
323 
324 /*
325  * Set given event flag for a given sint on a given vcpu, and signal the sint.
326  */
327 int hyperv_set_event_flag(HvSintRoute *sint_route, unsigned eventno)
328 {
329     int ret;
330     SynICState *synic = sint_route->synic;
331     unsigned long *flags, set_mask;
332     unsigned set_idx;
333 
334     if (eventno > HV_EVENT_FLAGS_COUNT) {
335         return -EINVAL;
336     }
337     if (!synic->enabled || !synic->event_page_addr) {
338         return -ENXIO;
339     }
340 
341     set_idx = BIT_WORD(eventno);
342     set_mask = BIT_MASK(eventno);
343     flags = synic->event_page->slot[sint_route->sint].flags;
344 
345     if ((atomic_fetch_or(&flags[set_idx], set_mask) & set_mask) != set_mask) {
346         memory_region_set_dirty(&synic->event_page_mr, 0,
347                                 sizeof(*synic->event_page));
348         ret = hyperv_sint_route_set_sint(sint_route);
349     } else {
350         ret = 0;
351     }
352     return ret;
353 }
354 
355 HvSintRoute *hyperv_sint_route_new(uint32_t vp_index, uint32_t sint,
356                                    HvSintMsgCb cb, void *cb_data)
357 {
358     HvSintRoute *sint_route;
359     EventNotifier *ack_notifier;
360     int r, gsi;
361     CPUState *cs;
362     SynICState *synic;
363 
364     cs = hyperv_find_vcpu(vp_index);
365     if (!cs) {
366         return NULL;
367     }
368 
369     synic = get_synic(cs);
370     if (!synic) {
371         return NULL;
372     }
373 
374     sint_route = g_new0(HvSintRoute, 1);
375     r = event_notifier_init(&sint_route->sint_set_notifier, false);
376     if (r) {
377         goto err;
378     }
379 
380 
381     ack_notifier = cb ? &sint_route->sint_ack_notifier : NULL;
382     if (ack_notifier) {
383         sint_route->staged_msg = g_new0(HvSintStagedMessage, 1);
384         sint_route->staged_msg->cb = cb;
385         sint_route->staged_msg->cb_data = cb_data;
386 
387         r = event_notifier_init(ack_notifier, false);
388         if (r) {
389             goto err_sint_set_notifier;
390         }
391 
392         event_notifier_set_handler(ack_notifier, sint_ack_handler);
393     }
394 
395     gsi = kvm_irqchip_add_hv_sint_route(kvm_state, vp_index, sint);
396     if (gsi < 0) {
397         goto err_gsi;
398     }
399 
400     r = kvm_irqchip_add_irqfd_notifier_gsi(kvm_state,
401                                            &sint_route->sint_set_notifier,
402                                            ack_notifier, gsi);
403     if (r) {
404         goto err_irqfd;
405     }
406     sint_route->gsi = gsi;
407     sint_route->synic = synic;
408     sint_route->sint = sint;
409     sint_route->refcount = 1;
410 
411     return sint_route;
412 
413 err_irqfd:
414     kvm_irqchip_release_virq(kvm_state, gsi);
415 err_gsi:
416     if (ack_notifier) {
417         event_notifier_set_handler(ack_notifier, NULL);
418         event_notifier_cleanup(ack_notifier);
419         g_free(sint_route->staged_msg);
420     }
421 err_sint_set_notifier:
422     event_notifier_cleanup(&sint_route->sint_set_notifier);
423 err:
424     g_free(sint_route);
425 
426     return NULL;
427 }
428 
429 void hyperv_sint_route_ref(HvSintRoute *sint_route)
430 {
431     sint_route->refcount++;
432 }
433 
434 void hyperv_sint_route_unref(HvSintRoute *sint_route)
435 {
436     if (!sint_route) {
437         return;
438     }
439 
440     assert(sint_route->refcount > 0);
441 
442     if (--sint_route->refcount) {
443         return;
444     }
445 
446     kvm_irqchip_remove_irqfd_notifier_gsi(kvm_state,
447                                           &sint_route->sint_set_notifier,
448                                           sint_route->gsi);
449     kvm_irqchip_release_virq(kvm_state, sint_route->gsi);
450     if (sint_route->staged_msg) {
451         event_notifier_set_handler(&sint_route->sint_ack_notifier, NULL);
452         event_notifier_cleanup(&sint_route->sint_ack_notifier);
453         g_free(sint_route->staged_msg);
454     }
455     event_notifier_cleanup(&sint_route->sint_set_notifier);
456     g_free(sint_route);
457 }
458 
459 int hyperv_sint_route_set_sint(HvSintRoute *sint_route)
460 {
461     return event_notifier_set(&sint_route->sint_set_notifier);
462 }
463 
464 typedef struct MsgHandler {
465     struct rcu_head rcu;
466     QLIST_ENTRY(MsgHandler) link;
467     uint32_t conn_id;
468     HvMsgHandler handler;
469     void *data;
470 } MsgHandler;
471 
472 typedef struct EventFlagHandler {
473     struct rcu_head rcu;
474     QLIST_ENTRY(EventFlagHandler) link;
475     uint32_t conn_id;
476     EventNotifier *notifier;
477 } EventFlagHandler;
478 
479 static QLIST_HEAD(, MsgHandler) msg_handlers;
480 static QLIST_HEAD(, EventFlagHandler) event_flag_handlers;
481 static QemuMutex handlers_mutex;
482 
483 static void __attribute__((constructor)) hv_init(void)
484 {
485     QLIST_INIT(&msg_handlers);
486     QLIST_INIT(&event_flag_handlers);
487     qemu_mutex_init(&handlers_mutex);
488 }
489 
490 int hyperv_set_msg_handler(uint32_t conn_id, HvMsgHandler handler, void *data)
491 {
492     int ret;
493     MsgHandler *mh;
494 
495     QEMU_LOCK_GUARD(&handlers_mutex);
496     QLIST_FOREACH(mh, &msg_handlers, link) {
497         if (mh->conn_id == conn_id) {
498             if (handler) {
499                 ret = -EEXIST;
500             } else {
501                 QLIST_REMOVE_RCU(mh, link);
502                 g_free_rcu(mh, rcu);
503                 ret = 0;
504             }
505             return ret;
506         }
507     }
508 
509     if (handler) {
510         mh = g_new(MsgHandler, 1);
511         mh->conn_id = conn_id;
512         mh->handler = handler;
513         mh->data = data;
514         QLIST_INSERT_HEAD_RCU(&msg_handlers, mh, link);
515         ret = 0;
516     } else {
517         ret = -ENOENT;
518     }
519 
520     return ret;
521 }
522 
523 uint16_t hyperv_hcall_post_message(uint64_t param, bool fast)
524 {
525     uint16_t ret;
526     hwaddr len;
527     struct hyperv_post_message_input *msg;
528     MsgHandler *mh;
529 
530     if (fast) {
531         return HV_STATUS_INVALID_HYPERCALL_CODE;
532     }
533     if (param & (__alignof__(*msg) - 1)) {
534         return HV_STATUS_INVALID_ALIGNMENT;
535     }
536 
537     len = sizeof(*msg);
538     msg = cpu_physical_memory_map(param, &len, 0);
539     if (len < sizeof(*msg)) {
540         ret = HV_STATUS_INSUFFICIENT_MEMORY;
541         goto unmap;
542     }
543     if (msg->payload_size > sizeof(msg->payload)) {
544         ret = HV_STATUS_INVALID_HYPERCALL_INPUT;
545         goto unmap;
546     }
547 
548     ret = HV_STATUS_INVALID_CONNECTION_ID;
549     WITH_RCU_READ_LOCK_GUARD() {
550         QLIST_FOREACH_RCU(mh, &msg_handlers, link) {
551             if (mh->conn_id == (msg->connection_id & HV_CONNECTION_ID_MASK)) {
552                 ret = mh->handler(msg, mh->data);
553                 break;
554             }
555         }
556     }
557 
558 unmap:
559     cpu_physical_memory_unmap(msg, len, 0, 0);
560     return ret;
561 }
562 
563 static int set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
564 {
565     int ret;
566     EventFlagHandler *handler;
567 
568     QEMU_LOCK_GUARD(&handlers_mutex);
569     QLIST_FOREACH(handler, &event_flag_handlers, link) {
570         if (handler->conn_id == conn_id) {
571             if (notifier) {
572                 ret = -EEXIST;
573             } else {
574                 QLIST_REMOVE_RCU(handler, link);
575                 g_free_rcu(handler, rcu);
576                 ret = 0;
577             }
578             return ret;
579         }
580     }
581 
582     if (notifier) {
583         handler = g_new(EventFlagHandler, 1);
584         handler->conn_id = conn_id;
585         handler->notifier = notifier;
586         QLIST_INSERT_HEAD_RCU(&event_flag_handlers, handler, link);
587         ret = 0;
588     } else {
589         ret = -ENOENT;
590     }
591 
592     return ret;
593 }
594 
595 static bool process_event_flags_userspace;
596 
597 int hyperv_set_event_flag_handler(uint32_t conn_id, EventNotifier *notifier)
598 {
599     if (!process_event_flags_userspace &&
600         !kvm_check_extension(kvm_state, KVM_CAP_HYPERV_EVENTFD)) {
601         process_event_flags_userspace = true;
602 
603         warn_report("Hyper-V event signaling is not supported by this kernel; "
604                     "using slower userspace hypercall processing");
605     }
606 
607     if (!process_event_flags_userspace) {
608         struct kvm_hyperv_eventfd hvevfd = {
609             .conn_id = conn_id,
610             .fd = notifier ? event_notifier_get_fd(notifier) : -1,
611             .flags = notifier ? 0 : KVM_HYPERV_EVENTFD_DEASSIGN,
612         };
613 
614         return kvm_vm_ioctl(kvm_state, KVM_HYPERV_EVENTFD, &hvevfd);
615     }
616     return set_event_flag_handler(conn_id, notifier);
617 }
618 
619 uint16_t hyperv_hcall_signal_event(uint64_t param, bool fast)
620 {
621     EventFlagHandler *handler;
622 
623     if (unlikely(!fast)) {
624         hwaddr addr = param;
625 
626         if (addr & (__alignof__(addr) - 1)) {
627             return HV_STATUS_INVALID_ALIGNMENT;
628         }
629 
630         param = ldq_phys(&address_space_memory, addr);
631     }
632 
633     /*
634      * Per spec, bits 32-47 contain the extra "flag number".  However, we
635      * have no use for it, and in all known usecases it is zero, so just
636      * report lookup failure if it isn't.
637      */
638     if (param & 0xffff00000000ULL) {
639         return HV_STATUS_INVALID_PORT_ID;
640     }
641     /* remaining bits are reserved-zero */
642     if (param & ~HV_CONNECTION_ID_MASK) {
643         return HV_STATUS_INVALID_HYPERCALL_INPUT;
644     }
645 
646     RCU_READ_LOCK_GUARD();
647     QLIST_FOREACH_RCU(handler, &event_flag_handlers, link) {
648         if (handler->conn_id == param) {
649             event_notifier_set(handler->notifier);
650             return 0;
651         }
652     }
653     return HV_STATUS_INVALID_CONNECTION_ID;
654 }
655