1be8d8537SStefan Hajnoczi /* 2be8d8537SStefan Hajnoczi * Event loop thread 3be8d8537SStefan Hajnoczi * 4be8d8537SStefan Hajnoczi * Copyright Red Hat Inc., 2013 5be8d8537SStefan Hajnoczi * 6be8d8537SStefan Hajnoczi * Authors: 7be8d8537SStefan Hajnoczi * Stefan Hajnoczi <stefanha@redhat.com> 8be8d8537SStefan Hajnoczi * 9be8d8537SStefan Hajnoczi * This work is licensed under the terms of the GNU GPL, version 2 or later. 10be8d8537SStefan Hajnoczi * See the COPYING file in the top-level directory. 11be8d8537SStefan Hajnoczi * 12be8d8537SStefan Hajnoczi */ 13be8d8537SStefan Hajnoczi 14d38ea87aSPeter Maydell #include "qemu/osdep.h" 15be8d8537SStefan Hajnoczi #include "qom/object.h" 16be8d8537SStefan Hajnoczi #include "qom/object_interfaces.h" 17be8d8537SStefan Hajnoczi #include "qemu/module.h" 18be8d8537SStefan Hajnoczi #include "block/aio.h" 19d16341faSPaolo Bonzini #include "block/block.h" 20be8d8537SStefan Hajnoczi #include "sysemu/iothread.h" 21dc3dd0d2SStefan Hajnoczi #include "qmp-commands.h" 222f78e491SChrysostomos Nanakos #include "qemu/error-report.h" 23ab28bd23SPaolo Bonzini #include "qemu/rcu.h" 24e4370165SPaolo Bonzini #include "qemu/main-loop.h" 25be8d8537SStefan Hajnoczi 26be8d8537SStefan Hajnoczi typedef ObjectClass IOThreadClass; 27be8d8537SStefan Hajnoczi 28be8d8537SStefan Hajnoczi #define IOTHREAD_GET_CLASS(obj) \ 29be8d8537SStefan Hajnoczi OBJECT_GET_CLASS(IOThreadClass, obj, TYPE_IOTHREAD) 30be8d8537SStefan Hajnoczi #define IOTHREAD_CLASS(klass) \ 31be8d8537SStefan Hajnoczi OBJECT_CLASS_CHECK(IOThreadClass, klass, TYPE_IOTHREAD) 32be8d8537SStefan Hajnoczi 33cdd7abfdSStefan Hajnoczi /* Benchmark results from 2016 on NVMe SSD drives show max polling times around 34cdd7abfdSStefan Hajnoczi * 16-32 microseconds yield IOPS improvements for both iodepth=1 and iodepth=32 35cdd7abfdSStefan Hajnoczi * workloads. 36cdd7abfdSStefan Hajnoczi */ 37cdd7abfdSStefan Hajnoczi #define IOTHREAD_POLL_MAX_NS_DEFAULT 32768ULL 38cdd7abfdSStefan Hajnoczi 39e4370165SPaolo Bonzini static __thread IOThread *my_iothread; 40e4370165SPaolo Bonzini 41e4370165SPaolo Bonzini AioContext *qemu_get_current_aio_context(void) 42e4370165SPaolo Bonzini { 43e4370165SPaolo Bonzini return my_iothread ? my_iothread->ctx : qemu_get_aio_context(); 44e4370165SPaolo Bonzini } 45e4370165SPaolo Bonzini 46be8d8537SStefan Hajnoczi static void *iothread_run(void *opaque) 47be8d8537SStefan Hajnoczi { 48be8d8537SStefan Hajnoczi IOThread *iothread = opaque; 49be8d8537SStefan Hajnoczi 50ab28bd23SPaolo Bonzini rcu_register_thread(); 51ab28bd23SPaolo Bonzini 52e4370165SPaolo Bonzini my_iothread = iothread; 5388eb7c29SStefan Hajnoczi qemu_mutex_lock(&iothread->init_done_lock); 5488eb7c29SStefan Hajnoczi iothread->thread_id = qemu_get_thread_id(); 5588eb7c29SStefan Hajnoczi qemu_cond_signal(&iothread->init_done_cond); 5688eb7c29SStefan Hajnoczi qemu_mutex_unlock(&iothread->init_done_lock); 5788eb7c29SStefan Hajnoczi 5865c1b5b6SPaolo Bonzini while (!atomic_read(&iothread->stopping)) { 5965c1b5b6SPaolo Bonzini aio_poll(iothread->ctx, true); 60be8d8537SStefan Hajnoczi } 61ab28bd23SPaolo Bonzini 62ab28bd23SPaolo Bonzini rcu_unregister_thread(); 63be8d8537SStefan Hajnoczi return NULL; 64be8d8537SStefan Hajnoczi } 65be8d8537SStefan Hajnoczi 66dce8921bSFam Zheng static int iothread_stop(Object *object, void *opaque) 67be8d8537SStefan Hajnoczi { 68dce8921bSFam Zheng IOThread *iothread; 69be8d8537SStefan Hajnoczi 70dce8921bSFam Zheng iothread = (IOThread *)object_dynamic_cast(object, TYPE_IOTHREAD); 71dce8921bSFam Zheng if (!iothread || !iothread->ctx) { 72dce8921bSFam Zheng return 0; 732f78e491SChrysostomos Nanakos } 74be8d8537SStefan Hajnoczi iothread->stopping = true; 75be8d8537SStefan Hajnoczi aio_notify(iothread->ctx); 76be8d8537SStefan Hajnoczi qemu_thread_join(&iothread->thread); 77dce8921bSFam Zheng return 0; 78dce8921bSFam Zheng } 79dce8921bSFam Zheng 80cdd7abfdSStefan Hajnoczi static void iothread_instance_init(Object *obj) 81cdd7abfdSStefan Hajnoczi { 82cdd7abfdSStefan Hajnoczi IOThread *iothread = IOTHREAD(obj); 83cdd7abfdSStefan Hajnoczi 84cdd7abfdSStefan Hajnoczi iothread->poll_max_ns = IOTHREAD_POLL_MAX_NS_DEFAULT; 85cdd7abfdSStefan Hajnoczi } 86cdd7abfdSStefan Hajnoczi 87dce8921bSFam Zheng static void iothread_instance_finalize(Object *obj) 88dce8921bSFam Zheng { 89dce8921bSFam Zheng IOThread *iothread = IOTHREAD(obj); 90dce8921bSFam Zheng 91dce8921bSFam Zheng iothread_stop(obj, NULL); 9288eb7c29SStefan Hajnoczi qemu_cond_destroy(&iothread->init_done_cond); 9388eb7c29SStefan Hajnoczi qemu_mutex_destroy(&iothread->init_done_lock); 94eb7b5c35SLin Ma if (!iothread->ctx) { 95eb7b5c35SLin Ma return; 96eb7b5c35SLin Ma } 97be8d8537SStefan Hajnoczi aio_context_unref(iothread->ctx); 98be8d8537SStefan Hajnoczi } 99be8d8537SStefan Hajnoczi 100be8d8537SStefan Hajnoczi static void iothread_complete(UserCreatable *obj, Error **errp) 101be8d8537SStefan Hajnoczi { 1022f78e491SChrysostomos Nanakos Error *local_error = NULL; 103be8d8537SStefan Hajnoczi IOThread *iothread = IOTHREAD(obj); 104d21e8776SPaolo Bonzini char *name, *thread_name; 105be8d8537SStefan Hajnoczi 106be8d8537SStefan Hajnoczi iothread->stopping = false; 10788eb7c29SStefan Hajnoczi iothread->thread_id = -1; 1082f78e491SChrysostomos Nanakos iothread->ctx = aio_context_new(&local_error); 1092f78e491SChrysostomos Nanakos if (!iothread->ctx) { 1102f78e491SChrysostomos Nanakos error_propagate(errp, local_error); 1112f78e491SChrysostomos Nanakos return; 1122f78e491SChrysostomos Nanakos } 11388eb7c29SStefan Hajnoczi 1145e5db499SStefan Hajnoczi aio_context_set_poll_params(iothread->ctx, 1155e5db499SStefan Hajnoczi iothread->poll_max_ns, 1165e5db499SStefan Hajnoczi iothread->poll_grow, 1175e5db499SStefan Hajnoczi iothread->poll_shrink, 1180d9d86fbSStefan Hajnoczi &local_error); 1190d9d86fbSStefan Hajnoczi if (local_error) { 1200d9d86fbSStefan Hajnoczi error_propagate(errp, local_error); 1210d9d86fbSStefan Hajnoczi aio_context_unref(iothread->ctx); 1220d9d86fbSStefan Hajnoczi iothread->ctx = NULL; 1230d9d86fbSStefan Hajnoczi return; 1240d9d86fbSStefan Hajnoczi } 1250d9d86fbSStefan Hajnoczi 12688eb7c29SStefan Hajnoczi qemu_mutex_init(&iothread->init_done_lock); 12788eb7c29SStefan Hajnoczi qemu_cond_init(&iothread->init_done_cond); 128be8d8537SStefan Hajnoczi 129be8d8537SStefan Hajnoczi /* This assumes we are called from a thread with useful CPU affinity for us 130be8d8537SStefan Hajnoczi * to inherit. 131be8d8537SStefan Hajnoczi */ 132d21e8776SPaolo Bonzini name = object_get_canonical_path_component(OBJECT(obj)); 133d21e8776SPaolo Bonzini thread_name = g_strdup_printf("IO %s", name); 134d21e8776SPaolo Bonzini qemu_thread_create(&iothread->thread, thread_name, iothread_run, 135be8d8537SStefan Hajnoczi iothread, QEMU_THREAD_JOINABLE); 136d21e8776SPaolo Bonzini g_free(thread_name); 137d21e8776SPaolo Bonzini g_free(name); 13888eb7c29SStefan Hajnoczi 13988eb7c29SStefan Hajnoczi /* Wait for initialization to complete */ 14088eb7c29SStefan Hajnoczi qemu_mutex_lock(&iothread->init_done_lock); 14188eb7c29SStefan Hajnoczi while (iothread->thread_id == -1) { 14288eb7c29SStefan Hajnoczi qemu_cond_wait(&iothread->init_done_cond, 14388eb7c29SStefan Hajnoczi &iothread->init_done_lock); 14488eb7c29SStefan Hajnoczi } 14588eb7c29SStefan Hajnoczi qemu_mutex_unlock(&iothread->init_done_lock); 146be8d8537SStefan Hajnoczi } 147be8d8537SStefan Hajnoczi 1485e5db499SStefan Hajnoczi typedef struct { 1495e5db499SStefan Hajnoczi const char *name; 1505e5db499SStefan Hajnoczi ptrdiff_t offset; /* field's byte offset in IOThread struct */ 1515e5db499SStefan Hajnoczi } PollParamInfo; 1525e5db499SStefan Hajnoczi 1535e5db499SStefan Hajnoczi static PollParamInfo poll_max_ns_info = { 1545e5db499SStefan Hajnoczi "poll-max-ns", offsetof(IOThread, poll_max_ns), 1555e5db499SStefan Hajnoczi }; 1565e5db499SStefan Hajnoczi static PollParamInfo poll_grow_info = { 1575e5db499SStefan Hajnoczi "poll-grow", offsetof(IOThread, poll_grow), 1585e5db499SStefan Hajnoczi }; 1595e5db499SStefan Hajnoczi static PollParamInfo poll_shrink_info = { 1605e5db499SStefan Hajnoczi "poll-shrink", offsetof(IOThread, poll_shrink), 1615e5db499SStefan Hajnoczi }; 1625e5db499SStefan Hajnoczi 1635e5db499SStefan Hajnoczi static void iothread_get_poll_param(Object *obj, Visitor *v, 1640d9d86fbSStefan Hajnoczi const char *name, void *opaque, Error **errp) 1650d9d86fbSStefan Hajnoczi { 1660d9d86fbSStefan Hajnoczi IOThread *iothread = IOTHREAD(obj); 1675e5db499SStefan Hajnoczi PollParamInfo *info = opaque; 1685e5db499SStefan Hajnoczi int64_t *field = (void *)iothread + info->offset; 1690d9d86fbSStefan Hajnoczi 1705e5db499SStefan Hajnoczi visit_type_int64(v, name, field, errp); 1710d9d86fbSStefan Hajnoczi } 1720d9d86fbSStefan Hajnoczi 1735e5db499SStefan Hajnoczi static void iothread_set_poll_param(Object *obj, Visitor *v, 1740d9d86fbSStefan Hajnoczi const char *name, void *opaque, Error **errp) 1750d9d86fbSStefan Hajnoczi { 1760d9d86fbSStefan Hajnoczi IOThread *iothread = IOTHREAD(obj); 1775e5db499SStefan Hajnoczi PollParamInfo *info = opaque; 1785e5db499SStefan Hajnoczi int64_t *field = (void *)iothread + info->offset; 1790d9d86fbSStefan Hajnoczi Error *local_err = NULL; 1800d9d86fbSStefan Hajnoczi int64_t value; 1810d9d86fbSStefan Hajnoczi 1820d9d86fbSStefan Hajnoczi visit_type_int64(v, name, &value, &local_err); 1830d9d86fbSStefan Hajnoczi if (local_err) { 1840d9d86fbSStefan Hajnoczi goto out; 1850d9d86fbSStefan Hajnoczi } 1860d9d86fbSStefan Hajnoczi 1870d9d86fbSStefan Hajnoczi if (value < 0) { 1885e5db499SStefan Hajnoczi error_setg(&local_err, "%s value must be in range [0, %"PRId64"]", 1895e5db499SStefan Hajnoczi info->name, INT64_MAX); 1900d9d86fbSStefan Hajnoczi goto out; 1910d9d86fbSStefan Hajnoczi } 1920d9d86fbSStefan Hajnoczi 1935e5db499SStefan Hajnoczi *field = value; 1940d9d86fbSStefan Hajnoczi 1950d9d86fbSStefan Hajnoczi if (iothread->ctx) { 1965e5db499SStefan Hajnoczi aio_context_set_poll_params(iothread->ctx, 1975e5db499SStefan Hajnoczi iothread->poll_max_ns, 1985e5db499SStefan Hajnoczi iothread->poll_grow, 1995e5db499SStefan Hajnoczi iothread->poll_shrink, 2005e5db499SStefan Hajnoczi &local_err); 2010d9d86fbSStefan Hajnoczi } 2020d9d86fbSStefan Hajnoczi 2030d9d86fbSStefan Hajnoczi out: 2040d9d86fbSStefan Hajnoczi error_propagate(errp, local_err); 2050d9d86fbSStefan Hajnoczi } 2060d9d86fbSStefan Hajnoczi 207be8d8537SStefan Hajnoczi static void iothread_class_init(ObjectClass *klass, void *class_data) 208be8d8537SStefan Hajnoczi { 209be8d8537SStefan Hajnoczi UserCreatableClass *ucc = USER_CREATABLE_CLASS(klass); 210be8d8537SStefan Hajnoczi ucc->complete = iothread_complete; 2110d9d86fbSStefan Hajnoczi 2120d9d86fbSStefan Hajnoczi object_class_property_add(klass, "poll-max-ns", "int", 2135e5db499SStefan Hajnoczi iothread_get_poll_param, 2145e5db499SStefan Hajnoczi iothread_set_poll_param, 2155e5db499SStefan Hajnoczi NULL, &poll_max_ns_info, &error_abort); 2165e5db499SStefan Hajnoczi object_class_property_add(klass, "poll-grow", "int", 2175e5db499SStefan Hajnoczi iothread_get_poll_param, 2185e5db499SStefan Hajnoczi iothread_set_poll_param, 2195e5db499SStefan Hajnoczi NULL, &poll_grow_info, &error_abort); 2205e5db499SStefan Hajnoczi object_class_property_add(klass, "poll-shrink", "int", 2215e5db499SStefan Hajnoczi iothread_get_poll_param, 2225e5db499SStefan Hajnoczi iothread_set_poll_param, 2235e5db499SStefan Hajnoczi NULL, &poll_shrink_info, &error_abort); 224be8d8537SStefan Hajnoczi } 225be8d8537SStefan Hajnoczi 226be8d8537SStefan Hajnoczi static const TypeInfo iothread_info = { 227be8d8537SStefan Hajnoczi .name = TYPE_IOTHREAD, 228be8d8537SStefan Hajnoczi .parent = TYPE_OBJECT, 229be8d8537SStefan Hajnoczi .class_init = iothread_class_init, 230be8d8537SStefan Hajnoczi .instance_size = sizeof(IOThread), 231cdd7abfdSStefan Hajnoczi .instance_init = iothread_instance_init, 232be8d8537SStefan Hajnoczi .instance_finalize = iothread_instance_finalize, 233be8d8537SStefan Hajnoczi .interfaces = (InterfaceInfo[]) { 234be8d8537SStefan Hajnoczi {TYPE_USER_CREATABLE}, 235be8d8537SStefan Hajnoczi {} 236be8d8537SStefan Hajnoczi }, 237be8d8537SStefan Hajnoczi }; 238be8d8537SStefan Hajnoczi 239be8d8537SStefan Hajnoczi static void iothread_register_types(void) 240be8d8537SStefan Hajnoczi { 241be8d8537SStefan Hajnoczi type_register_static(&iothread_info); 242be8d8537SStefan Hajnoczi } 243be8d8537SStefan Hajnoczi 244be8d8537SStefan Hajnoczi type_init(iothread_register_types) 245be8d8537SStefan Hajnoczi 246be8d8537SStefan Hajnoczi char *iothread_get_id(IOThread *iothread) 247be8d8537SStefan Hajnoczi { 248be8d8537SStefan Hajnoczi return object_get_canonical_path_component(OBJECT(iothread)); 249be8d8537SStefan Hajnoczi } 250be8d8537SStefan Hajnoczi 251be8d8537SStefan Hajnoczi AioContext *iothread_get_aio_context(IOThread *iothread) 252be8d8537SStefan Hajnoczi { 253be8d8537SStefan Hajnoczi return iothread->ctx; 254be8d8537SStefan Hajnoczi } 255dc3dd0d2SStefan Hajnoczi 256dc3dd0d2SStefan Hajnoczi static int query_one_iothread(Object *object, void *opaque) 257dc3dd0d2SStefan Hajnoczi { 258dc3dd0d2SStefan Hajnoczi IOThreadInfoList ***prev = opaque; 259dc3dd0d2SStefan Hajnoczi IOThreadInfoList *elem; 260dc3dd0d2SStefan Hajnoczi IOThreadInfo *info; 261dc3dd0d2SStefan Hajnoczi IOThread *iothread; 262dc3dd0d2SStefan Hajnoczi 263dc3dd0d2SStefan Hajnoczi iothread = (IOThread *)object_dynamic_cast(object, TYPE_IOTHREAD); 264dc3dd0d2SStefan Hajnoczi if (!iothread) { 265dc3dd0d2SStefan Hajnoczi return 0; 266dc3dd0d2SStefan Hajnoczi } 267dc3dd0d2SStefan Hajnoczi 268dc3dd0d2SStefan Hajnoczi info = g_new0(IOThreadInfo, 1); 269dc3dd0d2SStefan Hajnoczi info->id = iothread_get_id(iothread); 270dc3dd0d2SStefan Hajnoczi info->thread_id = iothread->thread_id; 271*5fc00480SPavel Hrdina info->poll_max_ns = iothread->poll_max_ns; 272*5fc00480SPavel Hrdina info->poll_grow = iothread->poll_grow; 273*5fc00480SPavel Hrdina info->poll_shrink = iothread->poll_shrink; 274dc3dd0d2SStefan Hajnoczi 275dc3dd0d2SStefan Hajnoczi elem = g_new0(IOThreadInfoList, 1); 276dc3dd0d2SStefan Hajnoczi elem->value = info; 277dc3dd0d2SStefan Hajnoczi elem->next = NULL; 278dc3dd0d2SStefan Hajnoczi 279dc3dd0d2SStefan Hajnoczi **prev = elem; 280dc3dd0d2SStefan Hajnoczi *prev = &elem->next; 281dc3dd0d2SStefan Hajnoczi return 0; 282dc3dd0d2SStefan Hajnoczi } 283dc3dd0d2SStefan Hajnoczi 284dc3dd0d2SStefan Hajnoczi IOThreadInfoList *qmp_query_iothreads(Error **errp) 285dc3dd0d2SStefan Hajnoczi { 286dc3dd0d2SStefan Hajnoczi IOThreadInfoList *head = NULL; 287dc3dd0d2SStefan Hajnoczi IOThreadInfoList **prev = &head; 288bc2256c4SDaniel P. Berrange Object *container = object_get_objects_root(); 289dc3dd0d2SStefan Hajnoczi 290dc3dd0d2SStefan Hajnoczi object_child_foreach(container, query_one_iothread, &prev); 291dc3dd0d2SStefan Hajnoczi return head; 292dc3dd0d2SStefan Hajnoczi } 293dce8921bSFam Zheng 294dce8921bSFam Zheng void iothread_stop_all(void) 295dce8921bSFam Zheng { 296dce8921bSFam Zheng Object *container = object_get_objects_root(); 297d16341faSPaolo Bonzini BlockDriverState *bs; 298d16341faSPaolo Bonzini BdrvNextIterator it; 299d16341faSPaolo Bonzini 300d16341faSPaolo Bonzini for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { 301d16341faSPaolo Bonzini AioContext *ctx = bdrv_get_aio_context(bs); 302d16341faSPaolo Bonzini if (ctx == qemu_get_aio_context()) { 303d16341faSPaolo Bonzini continue; 304d16341faSPaolo Bonzini } 305d16341faSPaolo Bonzini aio_context_acquire(ctx); 306d16341faSPaolo Bonzini bdrv_set_aio_context(bs, qemu_get_aio_context()); 307d16341faSPaolo Bonzini aio_context_release(ctx); 308d16341faSPaolo Bonzini } 309dce8921bSFam Zheng 310dce8921bSFam Zheng object_child_foreach(container, iothread_stop, NULL); 311dce8921bSFam Zheng } 312