xref: /qemu/include/qemu/job.h (revision 29b62a10)
1 /*
2  * Declarations for background jobs
3  *
4  * Copyright (c) 2011 IBM Corp.
5  * Copyright (c) 2012, 2018 Red Hat, Inc.
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #ifndef JOB_H
27 #define JOB_H
28 
29 #include "qapi/qapi-types-job.h"
30 #include "qemu/queue.h"
31 #include "qemu/progress_meter.h"
32 #include "qemu/coroutine.h"
33 #include "block/aio.h"
34 
35 typedef struct JobDriver JobDriver;
36 typedef struct JobTxn JobTxn;
37 
38 
39 /**
40  * Long-running operation.
41  */
42 typedef struct Job {
43 
44     /* Fields set at initialization (job_create), and never modified */
45 
46     /** The ID of the job. May be NULL for internal jobs. */
47     char *id;
48 
49     /**
50      * The type of this job.
51      * All callbacks are called with job_mutex *not* held.
52      */
53     const JobDriver *driver;
54 
55     /**
56      * The coroutine that executes the job.  If not NULL, it is reentered when
57      * busy is false and the job is cancelled.
58      * Initialized in job_start()
59      */
60     Coroutine *co;
61 
62     /** True if this job should automatically finalize itself */
63     bool auto_finalize;
64 
65     /** True if this job should automatically dismiss itself */
66     bool auto_dismiss;
67 
68     /**
69      * The completion function that will be called when the job completes.
70      * Called with AioContext lock held, since many callback implementations
71      * use bdrv_* functions that require to hold the lock.
72      */
73     BlockCompletionFunc *cb;
74 
75     /** The opaque value that is passed to the completion function.  */
76     void *opaque;
77 
78     /* ProgressMeter API is thread-safe */
79     ProgressMeter progress;
80 
81     /**
82      * AioContext to run the job coroutine in.
83      * The job Aiocontext can be read when holding *either*
84      * the BQL (so we are in the main loop) or the job_mutex.
85      * It can only be written when we hold *both* BQL
86      * and the job_mutex.
87      */
88     AioContext *aio_context;
89 
90 
91     /** Protected by job_mutex */
92 
93     /** Reference count of the block job */
94     int refcnt;
95 
96     /** Current state; See @JobStatus for details. */
97     JobStatus status;
98 
99     /**
100      * Timer that is used by @job_sleep_ns. Accessed under job_mutex (in
101      * job.c).
102      */
103     QEMUTimer sleep_timer;
104 
105     /**
106      * Counter for pause request. If non-zero, the block job is either paused,
107      * or if busy == true will pause itself as soon as possible.
108      */
109     int pause_count;
110 
111     /**
112      * Set to false by the job while the coroutine has yielded and may be
113      * re-entered by job_enter(). There may still be I/O or event loop activity
114      * pending. Accessed under job_mutex.
115      *
116      * When the job is deferred to the main loop, busy is true as long as the
117      * bottom half is still pending.
118      */
119     bool busy;
120 
121     /**
122      * Set to true by the job while it is in a quiescent state, where
123      * no I/O or event loop activity is pending.
124      */
125     bool paused;
126 
127     /**
128      * Set to true if the job is paused by user.  Can be unpaused with the
129      * block-job-resume QMP command.
130      */
131     bool user_paused;
132 
133     /**
134      * Set to true if the job should cancel itself.  The flag must
135      * always be tested just before toggling the busy flag from false
136      * to true.  After a job has been cancelled, it should only yield
137      * if #aio_poll will ("sooner or later") reenter the coroutine.
138      */
139     bool cancelled;
140 
141     /**
142      * Set to true if the job should abort immediately without waiting
143      * for data to be in sync.
144      */
145     bool force_cancel;
146 
147     /** Set to true when the job has deferred work to the main loop. */
148     bool deferred_to_main_loop;
149 
150     /**
151      * Return code from @run and/or @prepare callback(s).
152      * Not final until the job has reached the CONCLUDED status.
153      * 0 on success, -errno on failure.
154      */
155     int ret;
156 
157     /**
158      * Error object for a failed job.
159      * If job->ret is nonzero and an error object was not set, it will be set
160      * to strerror(-job->ret) during job_completed.
161      */
162     Error *err;
163 
164     /** Notifiers called when a cancelled job is finalised */
165     NotifierList on_finalize_cancelled;
166 
167     /** Notifiers called when a successfully completed job is finalised */
168     NotifierList on_finalize_completed;
169 
170     /** Notifiers called when the job transitions to PENDING */
171     NotifierList on_pending;
172 
173     /** Notifiers called when the job transitions to READY */
174     NotifierList on_ready;
175 
176     /** Notifiers called when the job coroutine yields or terminates */
177     NotifierList on_idle;
178 
179     /** Element of the list of jobs */
180     QLIST_ENTRY(Job) job_list;
181 
182     /** Transaction this job is part of */
183     JobTxn *txn;
184 
185     /** Element of the list of jobs in a job transaction */
186     QLIST_ENTRY(Job) txn_list;
187 } Job;
188 
189 /**
190  * Callbacks and other information about a Job driver.
191  * All callbacks are invoked with job_mutex *not* held.
192  */
193 struct JobDriver {
194 
195     /*
196      * These fields are initialized when this object is created,
197      * and are never changed afterwards
198      */
199 
200     /** Derived Job struct size */
201     size_t instance_size;
202 
203     /** Enum describing the operation */
204     JobType job_type;
205 
206     /**
207      * Mandatory: Entrypoint for the Coroutine.
208      *
209      * This callback will be invoked when moving from CREATED to RUNNING.
210      *
211      * If this callback returns nonzero, the job transaction it is part of is
212      * aborted. If it returns zero, the job moves into the WAITING state. If it
213      * is the last job to complete in its transaction, all jobs in the
214      * transaction move from WAITING to PENDING.
215      *
216      * This callback must be run in the job's context.
217      */
218     int coroutine_fn (*run)(Job *job, Error **errp);
219 
220     /*
221      * Functions run without regard to the BQL that may run in any
222      * arbitrary thread. These functions do not need to be thread-safe
223      * because the caller ensures that they are invoked from one
224      * thread at time.
225      */
226 
227     /**
228      * If the callback is not NULL, it will be invoked when the job transitions
229      * into the paused state.  Paused jobs must not perform any asynchronous
230      * I/O or event loop activity.  This callback is used to quiesce jobs.
231      */
232     void coroutine_fn (*pause)(Job *job);
233 
234     /**
235      * If the callback is not NULL, it will be invoked when the job transitions
236      * out of the paused state.  Any asynchronous I/O or event loop activity
237      * should be restarted from this callback.
238      */
239     void coroutine_fn (*resume)(Job *job);
240 
241     /*
242      * Global state (GS) API. These functions run under the BQL.
243      *
244      * See include/block/block-global-state.h for more information about
245      * the GS API.
246      */
247 
248     /**
249      * Called when the job is resumed by the user (i.e. user_paused becomes
250      * false). .user_resume is called before .resume.
251      */
252     void (*user_resume)(Job *job);
253 
254     /**
255      * Optional callback for job types whose completion must be triggered
256      * manually.
257      */
258     void (*complete)(Job *job, Error **errp);
259 
260     /**
261      * If the callback is not NULL, prepare will be invoked when all the jobs
262      * belonging to the same transaction complete; or upon this job's completion
263      * if it is not in a transaction.
264      *
265      * This callback will not be invoked if the job has already failed.
266      * If it fails, abort and then clean will be called.
267      *
268      * Called with AioContext lock held, since many callbacs implementations
269      * use bdrv_* functions that require to hold the lock.
270      */
271     int (*prepare)(Job *job);
272 
273     /**
274      * If the callback is not NULL, it will be invoked when all the jobs
275      * belonging to the same transaction complete; or upon this job's
276      * completion if it is not in a transaction. Skipped if NULL.
277      *
278      * All jobs will complete with a call to either .commit() or .abort() but
279      * never both.
280      *
281      * Called with AioContext lock held, since many callback implementations
282      * use bdrv_* functions that require to hold the lock.
283      */
284     void (*commit)(Job *job);
285 
286     /**
287      * If the callback is not NULL, it will be invoked when any job in the
288      * same transaction fails; or upon this job's failure (due to error or
289      * cancellation) if it is not in a transaction. Skipped if NULL.
290      *
291      * All jobs will complete with a call to either .commit() or .abort() but
292      * never both.
293      *
294      * Called with AioContext lock held, since many callback implementations
295      * use bdrv_* functions that require to hold the lock.
296      */
297     void (*abort)(Job *job);
298 
299     /**
300      * If the callback is not NULL, it will be invoked after a call to either
301      * .commit() or .abort(). Regardless of which callback is invoked after
302      * completion, .clean() will always be called, even if the job does not
303      * belong to a transaction group.
304      *
305      * Called with AioContext lock held, since many callbacs implementations
306      * use bdrv_* functions that require to hold the lock.
307      */
308     void (*clean)(Job *job);
309 
310     /**
311      * If the callback is not NULL, it will be invoked in job_cancel_async
312      *
313      * This function must return true if the job will be cancelled
314      * immediately without any further I/O (mandatory if @force is
315      * true), and false otherwise.  This lets the generic job layer
316      * know whether a job has been truly (force-)cancelled, or whether
317      * it is just in a special completion mode (like mirror after
318      * READY).
319      * (If the callback is NULL, the job is assumed to terminate
320      * without I/O.)
321      *
322      * Called with AioContext lock held, since many callback implementations
323      * use bdrv_* functions that require to hold the lock.
324      */
325     bool (*cancel)(Job *job, bool force);
326 
327 
328     /**
329      * Called when the job is freed.
330      * Called with AioContext lock held, since many callback implementations
331      * use bdrv_* functions that require to hold the lock.
332      */
333     void (*free)(Job *job);
334 };
335 
336 typedef enum JobCreateFlags {
337     /* Default behavior */
338     JOB_DEFAULT = 0x00,
339     /* Job is not QMP-created and should not send QMP events */
340     JOB_INTERNAL = 0x01,
341     /* Job requires manual finalize step */
342     JOB_MANUAL_FINALIZE = 0x02,
343     /* Job requires manual dismiss step */
344     JOB_MANUAL_DISMISS = 0x04,
345 } JobCreateFlags;
346 
347 extern QemuMutex job_mutex;
348 
349 #define JOB_LOCK_GUARD() QEMU_LOCK_GUARD(&job_mutex)
350 
351 #define WITH_JOB_LOCK_GUARD() WITH_QEMU_LOCK_GUARD(&job_mutex)
352 
353 /**
354  * job_lock:
355  *
356  * Take the mutex protecting the list of jobs and their status.
357  * Most functions called by the monitor need to call job_lock
358  * and job_unlock manually.  On the other hand, function called
359  * by the block jobs themselves and by the block layer will take the
360  * lock for you.
361  */
362 void job_lock(void);
363 
364 /**
365  * job_unlock:
366  *
367  * Release the mutex protecting the list of jobs and their status.
368  */
369 void job_unlock(void);
370 
371 /**
372  * Allocate and return a new job transaction. Jobs can be added to the
373  * transaction using job_txn_add_job().
374  *
375  * The transaction is automatically freed when the last job completes or is
376  * cancelled.
377  *
378  * All jobs in the transaction either complete successfully or fail/cancel as a
379  * group.  Jobs wait for each other before completing.  Cancelling one job
380  * cancels all jobs in the transaction.
381  */
382 JobTxn *job_txn_new(void);
383 
384 /**
385  * Release a reference that was previously acquired with job_txn_add_job or
386  * job_txn_new. If it's the last reference to the object, it will be freed.
387  *
388  * Called with job lock *not* held.
389  */
390 void job_txn_unref(JobTxn *txn);
391 
392 /*
393  * Same as job_txn_unref(), but called with job lock held.
394  * Might release the lock temporarily.
395  */
396 void job_txn_unref_locked(JobTxn *txn);
397 
398 /**
399  * Create a new long-running job and return it.
400  * Called with job_mutex *not* held.
401  *
402  * @job_id: The id of the newly-created job, or %NULL for internal jobs
403  * @driver: The class object for the newly-created job.
404  * @txn: The transaction this job belongs to, if any. %NULL otherwise.
405  * @ctx: The AioContext to run the job coroutine in.
406  * @flags: Creation flags for the job. See @JobCreateFlags.
407  * @cb: Completion function for the job.
408  * @opaque: Opaque pointer value passed to @cb.
409  * @errp: Error object.
410  */
411 void *job_create(const char *job_id, const JobDriver *driver, JobTxn *txn,
412                  AioContext *ctx, int flags, BlockCompletionFunc *cb,
413                  void *opaque, Error **errp);
414 
415 /**
416  * Add a reference to Job refcnt, it will be decreased with job_unref, and then
417  * be freed if it comes to be the last reference.
418  *
419  * Called with job lock held.
420  */
421 void job_ref_locked(Job *job);
422 
423 /**
424  * Release a reference that was previously acquired with job_ref_locked() or
425  * job_create(). If it's the last reference to the object, it will be freed.
426  *
427  * Takes AioContext lock internally to invoke a job->driver callback.
428  * Called with job lock held.
429  */
430 void job_unref_locked(Job *job);
431 
432 /**
433  * @job: The job that has made progress
434  * @done: How much progress the job made since the last call
435  *
436  * Updates the progress counter of the job.
437  *
438  * May be called with mutex held or not held.
439  */
440 void job_progress_update(Job *job, uint64_t done);
441 
442 /**
443  * @job: The job whose expected progress end value is set
444  * @remaining: Missing progress (on top of the current progress counter value)
445  *             until the new expected end value is reached
446  *
447  * Sets the expected end value of the progress counter of a job so that a
448  * completion percentage can be calculated when the progress is updated.
449  *
450  * May be called with mutex held or not held.
451  */
452 void job_progress_set_remaining(Job *job, uint64_t remaining);
453 
454 /**
455  * @job: The job whose expected progress end value is updated
456  * @delta: Value which is to be added to the current expected end
457  *         value
458  *
459  * Increases the expected end value of the progress counter of a job.
460  * This is useful for parenthesis operations: If a job has to
461  * conditionally perform a high-priority operation as part of its
462  * progress, it calls this function with the expected operation's
463  * length before, and job_progress_update() afterwards.
464  * (So the operation acts as a parenthesis in regards to the main job
465  * operation running in background.)
466  *
467  * May be called with mutex held or not held.
468  */
469 void job_progress_increase_remaining(Job *job, uint64_t delta);
470 
471 /**
472  * Conditionally enter the job coroutine if the job is ready to run, not
473  * already busy and fn() returns true. fn() is called while under the job_lock
474  * critical section.
475  *
476  * Called with job lock held, but might release it temporarily.
477  */
478 void job_enter_cond_locked(Job *job, bool(*fn)(Job *job));
479 
480 /**
481  * @job: A job that has not yet been started.
482  *
483  * Begins execution of a job.
484  * Takes ownership of one reference to the job object.
485  *
486  * Called with job_mutex *not* held.
487  */
488 void job_start(Job *job);
489 
490 /**
491  * @job: The job to enter.
492  *
493  * Continue the specified job by entering the coroutine.
494  * Called with job_mutex *not* held.
495  */
496 void job_enter(Job *job);
497 
498 /**
499  * @job: The job that is ready to pause.
500  *
501  * Pause now if job_pause() has been called. Jobs that perform lots of I/O
502  * must call this between requests so that the job can be paused.
503  *
504  * Called with job_mutex *not* held.
505  */
506 void coroutine_fn job_pause_point(Job *job);
507 
508 /**
509  * @job: The job that calls the function.
510  *
511  * Yield the job coroutine.
512  * Called with job_mutex *not* held.
513  */
514 void coroutine_fn job_yield(Job *job);
515 
516 /**
517  * @job: The job that calls the function.
518  * @ns: How many nanoseconds to stop for.
519  *
520  * Put the job to sleep (assuming that it wasn't canceled) for @ns
521  * %QEMU_CLOCK_REALTIME nanoseconds.  Canceling the job will immediately
522  * interrupt the wait.
523  *
524  * Called with job_mutex *not* held.
525  */
526 void coroutine_fn job_sleep_ns(Job *job, int64_t ns);
527 
528 /** Returns the JobType of a given Job. */
529 JobType job_type(const Job *job);
530 
531 /** Returns the enum string for the JobType of a given Job. */
532 const char *job_type_str(const Job *job);
533 
534 /** Returns true if the job should not be visible to the management layer. */
535 bool job_is_internal(Job *job);
536 
537 /**
538  * Returns whether the job is being cancelled.
539  * Called with job_mutex *not* held.
540  */
541 bool job_is_cancelled(Job *job);
542 
543 /* Same as job_is_cancelled(), but called with job lock held. */
544 bool job_is_cancelled_locked(Job *job);
545 
546 /**
547  * Returns whether the job is scheduled for cancellation (at an
548  * indefinite point).
549  * Called with job_mutex *not* held.
550  */
551 bool job_cancel_requested(Job *job);
552 
553 /**
554  * Returns whether the job is in a completed state.
555  * Called with job lock held.
556  */
557 bool job_is_completed_locked(Job *job);
558 
559 /**
560  * Returns whether the job is ready to be completed.
561  * Called with job_mutex *not* held.
562  */
563 bool job_is_ready(Job *job);
564 
565 /* Same as job_is_ready(), but called with job lock held. */
566 bool job_is_ready_locked(Job *job);
567 
568 /**
569  * Request @job to pause at the next pause point. Must be paired with
570  * job_resume(). If the job is supposed to be resumed by user action, call
571  * job_user_pause_locked() instead.
572  *
573  * Called with job lock *not* held.
574  */
575 void job_pause(Job *job);
576 
577 /* Same as job_pause(), but called with job lock held. */
578 void job_pause_locked(Job *job);
579 
580 /** Resumes a @job paused with job_pause. Called with job lock *not* held. */
581 void job_resume(Job *job);
582 
583 /*
584  * Same as job_resume(), but called with job lock held.
585  * Might release the lock temporarily.
586  */
587 void job_resume_locked(Job *job);
588 
589 /**
590  * Asynchronously pause the specified @job.
591  * Do not allow a resume until a matching call to job_user_resume.
592  * Called with job lock held.
593  */
594 void job_user_pause_locked(Job *job, Error **errp);
595 
596 /**
597  * Returns true if the job is user-paused.
598  * Called with job lock held.
599  */
600 bool job_user_paused_locked(Job *job);
601 
602 /**
603  * Resume the specified @job.
604  * Must be paired with a preceding job_user_pause_locked.
605  * Called with job lock held, but might release it temporarily.
606  */
607 void job_user_resume_locked(Job *job, Error **errp);
608 
609 /**
610  * Get the next element from the list of block jobs after @job, or the
611  * first one if @job is %NULL.
612  *
613  * Returns the requested job, or %NULL if there are no more jobs left.
614  * Called with job lock *not* held.
615  */
616 Job *job_next(Job *job);
617 
618 /* Same as job_next(), but called with job lock held. */
619 Job *job_next_locked(Job *job);
620 
621 /**
622  * Get the job identified by @id (which must not be %NULL).
623  *
624  * Returns the requested job, or %NULL if it doesn't exist.
625  * Called with job lock held.
626  */
627 Job *job_get_locked(const char *id);
628 
629 /**
630  * Check whether the verb @verb can be applied to @job in its current state.
631  * Returns 0 if the verb can be applied; otherwise errp is set and -EPERM
632  * returned.
633  *
634  * Called with job lock held.
635  */
636 int job_apply_verb_locked(Job *job, JobVerb verb, Error **errp);
637 
638 /**
639  * The @job could not be started, free it.
640  * Called with job_mutex *not* held.
641  */
642 void job_early_fail(Job *job);
643 
644 /**
645  * Moves the @job from RUNNING to READY.
646  * Called with job_mutex *not* held.
647  */
648 void job_transition_to_ready(Job *job);
649 
650 /**
651  * Asynchronously complete the specified @job.
652  * Called with job lock held, but might release it temporarily.
653  */
654 void job_complete_locked(Job *job, Error **errp);
655 
656 /**
657  * Asynchronously cancel the specified @job. If @force is true, the job should
658  * be cancelled immediately without waiting for a consistent state.
659  * Called with job lock held.
660  */
661 void job_cancel_locked(Job *job, bool force);
662 
663 /**
664  * Cancels the specified job like job_cancel_locked(), but may refuse
665  * to do so if the operation isn't meaningful in the current state of the job.
666  * Called with job lock held.
667  */
668 void job_user_cancel_locked(Job *job, bool force, Error **errp);
669 
670 /**
671  * Synchronously cancel the @job.  The completion callback is called
672  * before the function returns.  If @force is false, the job may
673  * actually complete instead of canceling itself; the circumstances
674  * under which this happens depend on the kind of job that is active.
675  *
676  * Returns the return value from the job if the job actually completed
677  * during the call, or -ECANCELED if it was canceled.
678  *
679  * Called with job_lock *not* held.
680  */
681 int job_cancel_sync(Job *job, bool force);
682 
683 /* Same as job_cancel_sync, but called with job lock held. */
684 int job_cancel_sync_locked(Job *job, bool force);
685 
686 /**
687  * Synchronously force-cancels all jobs using job_cancel_sync_locked().
688  *
689  * Called with job_lock *not* held.
690  */
691 void job_cancel_sync_all(void);
692 
693 /**
694  * @job: The job to be completed.
695  * @errp: Error object which may be set by job_complete_locked(); this is not
696  *        necessarily set on every error, the job return value has to be
697  *        checked as well.
698  *
699  * Synchronously complete the job.  The completion callback is called before the
700  * function returns, unless it is NULL (which is permissible when using this
701  * function).
702  *
703  * Returns the return value from the job.
704  * Called with job_lock held.
705  */
706 int job_complete_sync_locked(Job *job, Error **errp);
707 
708 /**
709  * For a @job that has finished its work and is pending awaiting explicit
710  * acknowledgement to commit its work, this will commit that work.
711  *
712  * FIXME: Make the below statement universally true:
713  * For jobs that support the manual workflow mode, all graph changes that occur
714  * as a result will occur after this command and before a successful reply.
715  *
716  * Called with job lock held.
717  */
718 void job_finalize_locked(Job *job, Error **errp);
719 
720 /**
721  * Remove the concluded @job from the query list and resets the passed pointer
722  * to %NULL. Returns an error if the job is not actually concluded.
723  *
724  * Called with job lock held.
725  */
726 void job_dismiss_locked(Job **job, Error **errp);
727 
728 /**
729  * Synchronously finishes the given @job. If @finish is given, it is called to
730  * trigger completion or cancellation of the job.
731  *
732  * Returns 0 if the job is successfully completed, -ECANCELED if the job was
733  * cancelled before completing, and -errno in other error cases.
734  *
735  * Called with job_lock held, but might release it temporarily.
736  */
737 int job_finish_sync_locked(Job *job, void (*finish)(Job *, Error **errp),
738                            Error **errp);
739 
740 /**
741  * Sets the @job->aio_context.
742  * Called with job_mutex *not* held.
743  *
744  * This function must run in the main thread to protect against
745  * concurrent read in job_finish_sync_locked(), takes the job_mutex
746  * lock to protect against the read in job_do_yield_locked(), and must
747  * be called when the job is quiescent.
748  */
749 void job_set_aio_context(Job *job, AioContext *ctx);
750 
751 #endif
752