1 /* 2 * QEMU coroutine implementation 3 * 4 * Copyright IBM, Corp. 2011 5 * 6 * Authors: 7 * Stefan Hajnoczi <stefanha@linux.vnet.ibm.com> 8 * Kevin Wolf <kwolf@redhat.com> 9 * 10 * This work is licensed under the terms of the GNU LGPL, version 2 or later. 11 * See the COPYING.LIB file in the top-level directory. 12 * 13 */ 14 15 #ifndef QEMU_COROUTINE_H 16 #define QEMU_COROUTINE_H 17 18 #include "qemu/queue.h" 19 #include "qemu/timer.h" 20 21 /** 22 * Coroutines are a mechanism for stack switching and can be used for 23 * cooperative userspace threading. These functions provide a simple but 24 * useful flavor of coroutines that is suitable for writing sequential code, 25 * rather than callbacks, for operations that need to give up control while 26 * waiting for events to complete. 27 * 28 * These functions are re-entrant and may be used outside the global mutex. 29 */ 30 31 /** 32 * Mark a function that executes in coroutine context 33 * 34 * Functions that execute in coroutine context cannot be called directly from 35 * normal functions. In the future it would be nice to enable compiler or 36 * static checker support for catching such errors. This annotation might make 37 * it possible and in the meantime it serves as documentation. 38 * 39 * For example: 40 * 41 * static void coroutine_fn foo(void) { 42 * .... 43 * } 44 */ 45 #define coroutine_fn 46 47 typedef struct Coroutine Coroutine; 48 49 /** 50 * Coroutine entry point 51 * 52 * When the coroutine is entered for the first time, opaque is passed in as an 53 * argument. 54 * 55 * When this function returns, the coroutine is destroyed automatically and 56 * execution continues in the caller who last entered the coroutine. 57 */ 58 typedef void coroutine_fn CoroutineEntry(void *opaque); 59 60 /** 61 * Create a new coroutine 62 * 63 * Use qemu_coroutine_enter() to actually transfer control to the coroutine. 64 * The opaque argument is passed as the argument to the entry point. 65 */ 66 Coroutine *qemu_coroutine_create(CoroutineEntry *entry, void *opaque); 67 68 /** 69 * Transfer control to a coroutine 70 */ 71 void qemu_coroutine_enter(Coroutine *coroutine); 72 73 /** 74 * Transfer control to a coroutine if it's not active (i.e. part of the call 75 * stack of the running coroutine). Otherwise, do nothing. 76 */ 77 void qemu_coroutine_enter_if_inactive(Coroutine *co); 78 79 /** 80 * Transfer control to a coroutine and associate it with ctx 81 */ 82 void qemu_aio_coroutine_enter(AioContext *ctx, Coroutine *co); 83 84 /** 85 * Transfer control back to a coroutine's caller 86 * 87 * This function does not return until the coroutine is re-entered using 88 * qemu_coroutine_enter(). 89 */ 90 void coroutine_fn qemu_coroutine_yield(void); 91 92 /** 93 * Get the AioContext of the given coroutine 94 */ 95 AioContext *qemu_coroutine_get_aio_context(Coroutine *co); 96 97 /** 98 * Get the currently executing coroutine 99 */ 100 Coroutine *qemu_coroutine_self(void); 101 102 /** 103 * Return whether or not currently inside a coroutine 104 * 105 * This can be used to write functions that work both when in coroutine context 106 * and when not in coroutine context. Note that such functions cannot use the 107 * coroutine_fn annotation since they work outside coroutine context. 108 */ 109 bool qemu_in_coroutine(void); 110 111 /** 112 * Return true if the coroutine is currently entered 113 * 114 * A coroutine is "entered" if it has not yielded from the current 115 * qemu_coroutine_enter() call used to run it. This does not mean that the 116 * coroutine is currently executing code since it may have transferred control 117 * to another coroutine using qemu_coroutine_enter(). 118 * 119 * When several coroutines enter each other there may be no way to know which 120 * ones have already been entered. In such situations this function can be 121 * used to avoid recursively entering coroutines. 122 */ 123 bool qemu_coroutine_entered(Coroutine *co); 124 125 /** 126 * Provides a mutex that can be used to synchronise coroutines 127 */ 128 struct CoWaitRecord; 129 struct CoMutex { 130 /* Count of pending lockers; 0 for a free mutex, 1 for an 131 * uncontended mutex. 132 */ 133 unsigned locked; 134 135 /* Context that is holding the lock. Useful to avoid spinning 136 * when two coroutines on the same AioContext try to get the lock. :) 137 */ 138 AioContext *ctx; 139 140 /* A queue of waiters. Elements are added atomically in front of 141 * from_push. to_pop is only populated, and popped from, by whoever 142 * is in charge of the next wakeup. This can be an unlocker or, 143 * through the handoff protocol, a locker that is about to go to sleep. 144 */ 145 QSLIST_HEAD(, CoWaitRecord) from_push, to_pop; 146 147 unsigned handoff, sequence; 148 149 Coroutine *holder; 150 }; 151 152 /** 153 * Initialises a CoMutex. This must be called before any other operation is used 154 * on the CoMutex. 155 */ 156 void qemu_co_mutex_init(CoMutex *mutex); 157 158 /** 159 * Locks the mutex. If the lock cannot be taken immediately, control is 160 * transferred to the caller of the current coroutine. 161 */ 162 void coroutine_fn qemu_co_mutex_lock(CoMutex *mutex); 163 164 /** 165 * Unlocks the mutex and schedules the next coroutine that was waiting for this 166 * lock to be run. 167 */ 168 void coroutine_fn qemu_co_mutex_unlock(CoMutex *mutex); 169 170 /** 171 * Assert that the current coroutine holds @mutex. 172 */ 173 static inline coroutine_fn void qemu_co_mutex_assert_locked(CoMutex *mutex) 174 { 175 /* 176 * mutex->holder doesn't need any synchronisation if the assertion holds 177 * true because the mutex protects it. If it doesn't hold true, we still 178 * don't mind if another thread takes or releases mutex behind our back, 179 * because the condition will be false no matter whether we read NULL or 180 * the pointer for any other coroutine. 181 */ 182 assert(qatomic_read(&mutex->locked) && 183 mutex->holder == qemu_coroutine_self()); 184 } 185 186 /** 187 * CoQueues are a mechanism to queue coroutines in order to continue executing 188 * them later. They are similar to condition variables, but they need help 189 * from an external mutex in order to maintain thread-safety. 190 */ 191 typedef struct CoQueue { 192 QSIMPLEQ_HEAD(, Coroutine) entries; 193 } CoQueue; 194 195 /** 196 * Initialise a CoQueue. This must be called before any other operation is used 197 * on the CoQueue. 198 */ 199 void qemu_co_queue_init(CoQueue *queue); 200 201 typedef enum { 202 /* 203 * Enqueue at front instead of back. Use this to re-queue a request when 204 * its wait condition is not satisfied after being woken up. 205 */ 206 CO_QUEUE_WAIT_FRONT = 0x1, 207 } CoQueueWaitFlags; 208 209 /** 210 * Adds the current coroutine to the CoQueue and transfers control to the 211 * caller of the coroutine. The mutex is unlocked during the wait and 212 * locked again afterwards. 213 */ 214 #define qemu_co_queue_wait(queue, lock) \ 215 qemu_co_queue_wait_impl(queue, QEMU_MAKE_LOCKABLE(lock), 0) 216 #define qemu_co_queue_wait_flags(queue, lock, flags) \ 217 qemu_co_queue_wait_impl(queue, QEMU_MAKE_LOCKABLE(lock), (flags)) 218 void coroutine_fn qemu_co_queue_wait_impl(CoQueue *queue, QemuLockable *lock, 219 CoQueueWaitFlags flags); 220 221 /** 222 * Removes the next coroutine from the CoQueue, and queue it to run after 223 * the currently-running coroutine yields. 224 * Returns true if a coroutine was removed, false if the queue is empty. 225 * Used from coroutine context, use qemu_co_enter_next outside. 226 */ 227 bool coroutine_fn qemu_co_queue_next(CoQueue *queue); 228 229 /** 230 * Empties the CoQueue and queues the coroutine to run after 231 * the currently-running coroutine yields. 232 * Used from coroutine context, use qemu_co_enter_all outside. 233 */ 234 void coroutine_fn qemu_co_queue_restart_all(CoQueue *queue); 235 236 /** 237 * Removes the next coroutine from the CoQueue, and wake it up. Unlike 238 * qemu_co_queue_next, this function releases the lock during aio_co_wake 239 * because it is meant to be used outside coroutine context; in that case, the 240 * coroutine is entered immediately, before qemu_co_enter_next returns. 241 * 242 * If used in coroutine context, qemu_co_enter_next is equivalent to 243 * qemu_co_queue_next. 244 */ 245 #define qemu_co_enter_next(queue, lock) \ 246 qemu_co_enter_next_impl(queue, QEMU_MAKE_LOCKABLE(lock)) 247 bool qemu_co_enter_next_impl(CoQueue *queue, QemuLockable *lock); 248 249 /** 250 * Empties the CoQueue, waking the waiting coroutine one at a time. Unlike 251 * qemu_co_queue_all, this function releases the lock during aio_co_wake 252 * because it is meant to be used outside coroutine context; in that case, the 253 * coroutine is entered immediately, before qemu_co_enter_all returns. 254 * 255 * If used in coroutine context, qemu_co_enter_all is equivalent to 256 * qemu_co_queue_all. 257 */ 258 #define qemu_co_enter_all(queue, lock) \ 259 qemu_co_enter_all_impl(queue, QEMU_MAKE_LOCKABLE(lock)) 260 void qemu_co_enter_all_impl(CoQueue *queue, QemuLockable *lock); 261 262 /** 263 * Checks if the CoQueue is empty. 264 */ 265 bool qemu_co_queue_empty(CoQueue *queue); 266 267 268 typedef struct CoRwTicket CoRwTicket; 269 typedef struct CoRwlock { 270 CoMutex mutex; 271 272 /* Number of readers, or -1 if owned for writing. */ 273 int owners; 274 275 /* Waiting coroutines. */ 276 QSIMPLEQ_HEAD(, CoRwTicket) tickets; 277 } CoRwlock; 278 279 /** 280 * Initialises a CoRwlock. This must be called before any other operation 281 * is used on the CoRwlock 282 */ 283 void qemu_co_rwlock_init(CoRwlock *lock); 284 285 /** 286 * Read locks the CoRwlock. If the lock cannot be taken immediately because 287 * of a parallel writer, control is transferred to the caller of the current 288 * coroutine. 289 */ 290 void coroutine_fn qemu_co_rwlock_rdlock(CoRwlock *lock); 291 292 /** 293 * Write Locks the CoRwlock from a reader. This is a bit more efficient than 294 * @qemu_co_rwlock_unlock followed by a separate @qemu_co_rwlock_wrlock. 295 * Note that if the lock cannot be upgraded immediately, control is transferred 296 * to the caller of the current coroutine; another writer might run while 297 * @qemu_co_rwlock_upgrade blocks. 298 */ 299 void coroutine_fn qemu_co_rwlock_upgrade(CoRwlock *lock); 300 301 /** 302 * Downgrades a write-side critical section to a reader. Downgrading with 303 * @qemu_co_rwlock_downgrade never blocks, unlike @qemu_co_rwlock_unlock 304 * followed by @qemu_co_rwlock_rdlock. This makes it more efficient, but 305 * may also sometimes be necessary for correctness. 306 */ 307 void coroutine_fn qemu_co_rwlock_downgrade(CoRwlock *lock); 308 309 /** 310 * Write Locks the mutex. If the lock cannot be taken immediately because 311 * of a parallel reader, control is transferred to the caller of the current 312 * coroutine. 313 */ 314 void coroutine_fn qemu_co_rwlock_wrlock(CoRwlock *lock); 315 316 /** 317 * Unlocks the read/write lock and schedules the next coroutine that was 318 * waiting for this lock to be run. 319 */ 320 void coroutine_fn qemu_co_rwlock_unlock(CoRwlock *lock); 321 322 typedef struct QemuCoSleep { 323 Coroutine *to_wake; 324 } QemuCoSleep; 325 326 /** 327 * Yield the coroutine for a given duration. Initializes @w so that, 328 * during this yield, it can be passed to qemu_co_sleep_wake() to 329 * terminate the sleep. 330 */ 331 void coroutine_fn qemu_co_sleep_ns_wakeable(QemuCoSleep *w, 332 QEMUClockType type, int64_t ns); 333 334 /** 335 * Yield the coroutine until the next call to qemu_co_sleep_wake. 336 */ 337 void coroutine_fn qemu_co_sleep(QemuCoSleep *w); 338 339 static inline void coroutine_fn qemu_co_sleep_ns(QEMUClockType type, int64_t ns) 340 { 341 QemuCoSleep w = { 0 }; 342 qemu_co_sleep_ns_wakeable(&w, type, ns); 343 } 344 345 typedef void CleanupFunc(void *opaque); 346 /** 347 * Run entry in a coroutine and start timer. Wait for entry to finish or for 348 * timer to elapse, what happen first. If entry finished, return 0, if timer 349 * elapsed earlier, return -ETIMEDOUT. 350 * 351 * Be careful, entry execution is not canceled, user should handle it somehow. 352 * If @clean is provided, it's called after coroutine finish if timeout 353 * happened. 354 */ 355 int coroutine_fn qemu_co_timeout(CoroutineEntry *entry, void *opaque, 356 uint64_t timeout_ns, CleanupFunc clean); 357 358 /** 359 * Wake a coroutine if it is sleeping in qemu_co_sleep_ns. The timer will be 360 * deleted. @sleep_state must be the variable whose address was given to 361 * qemu_co_sleep_ns() and should be checked to be non-NULL before calling 362 * qemu_co_sleep_wake(). 363 */ 364 void qemu_co_sleep_wake(QemuCoSleep *w); 365 366 /** 367 * Yield until a file descriptor becomes readable 368 * 369 * Note that this function clobbers the handlers for the file descriptor. 370 */ 371 void coroutine_fn yield_until_fd_readable(int fd); 372 373 /** 374 * Increase coroutine pool size 375 */ 376 void qemu_coroutine_inc_pool_size(unsigned int additional_pool_size); 377 378 /** 379 * Decrease coroutine pool size 380 */ 381 void qemu_coroutine_dec_pool_size(unsigned int additional_pool_size); 382 383 #include "qemu/lockable.h" 384 385 /** 386 * Sends a (part of) iovec down a socket, yielding when the socket is full, or 387 * Receives data into a (part of) iovec from a socket, 388 * yielding when there is no data in the socket. 389 * The same interface as qemu_sendv_recvv(), with added yielding. 390 * XXX should mark these as coroutine_fn 391 */ 392 ssize_t coroutine_fn qemu_co_sendv_recvv(int sockfd, struct iovec *iov, 393 unsigned iov_cnt, size_t offset, 394 size_t bytes, bool do_send); 395 #define qemu_co_recvv(sockfd, iov, iov_cnt, offset, bytes) \ 396 qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, false) 397 #define qemu_co_sendv(sockfd, iov, iov_cnt, offset, bytes) \ 398 qemu_co_sendv_recvv(sockfd, iov, iov_cnt, offset, bytes, true) 399 400 /** 401 * The same as above, but with just a single buffer 402 */ 403 ssize_t coroutine_fn qemu_co_send_recv(int sockfd, void *buf, size_t bytes, 404 bool do_send); 405 #define qemu_co_recv(sockfd, buf, bytes) \ 406 qemu_co_send_recv(sockfd, buf, bytes, false) 407 #define qemu_co_send(sockfd, buf, bytes) \ 408 qemu_co_send_recv(sockfd, buf, bytes, true) 409 410 #endif /* QEMU_COROUTINE_H */ 411