xref: /qemu/system/dirtylimit.c (revision 7dcb3c87)
1 /*
2  * Dirty page rate limit implementation code
3  *
4  * Copyright (c) 2022 CHINA TELECOM CO.,LTD.
5  *
6  * Authors:
7  *  Hyman Huang(黄勇) <huangy81@chinatelecom.cn>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 
13 #include "qemu/osdep.h"
14 #include "qemu/main-loop.h"
15 #include "qapi/qapi-commands-migration.h"
16 #include "qapi/qmp/qdict.h"
17 #include "qapi/error.h"
18 #include "sysemu/dirtyrate.h"
19 #include "sysemu/dirtylimit.h"
20 #include "monitor/hmp.h"
21 #include "monitor/monitor.h"
22 #include "exec/memory.h"
23 #include "exec/target_page.h"
24 #include "hw/boards.h"
25 #include "sysemu/kvm.h"
26 #include "trace.h"
27 #include "migration/misc.h"
28 #include "migration/migration.h"
29 
30 /*
31  * Dirtylimit stop working if dirty page rate error
32  * value less than DIRTYLIMIT_TOLERANCE_RANGE
33  */
34 #define DIRTYLIMIT_TOLERANCE_RANGE  25  /* MB/s */
35 /*
36  * Plus or minus vcpu sleep time linearly if dirty
37  * page rate error value percentage over
38  * DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT.
39  * Otherwise, plus or minus a fixed vcpu sleep time.
40  */
41 #define DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT     50
42 /*
43  * Max vcpu sleep time percentage during a cycle
44  * composed of dirty ring full and sleep time.
45  */
46 #define DIRTYLIMIT_THROTTLE_PCT_MAX 99
47 
48 struct {
49     VcpuStat stat;
50     bool running;
51     QemuThread thread;
52 } *vcpu_dirty_rate_stat;
53 
54 typedef struct VcpuDirtyLimitState {
55     int cpu_index;
56     bool enabled;
57     /*
58      * Quota dirty page rate, unit is MB/s
59      * zero if not enabled.
60      */
61     uint64_t quota;
62 } VcpuDirtyLimitState;
63 
64 struct {
65     VcpuDirtyLimitState *states;
66     /* Max cpus number configured by user */
67     int max_cpus;
68     /* Number of vcpu under dirtylimit */
69     int limited_nvcpu;
70 } *dirtylimit_state;
71 
72 /* protect dirtylimit_state */
73 static QemuMutex dirtylimit_mutex;
74 
75 /* dirtylimit thread quit if dirtylimit_quit is true */
76 static bool dirtylimit_quit;
77 
78 static void vcpu_dirty_rate_stat_collect(void)
79 {
80     MigrationState *s = migrate_get_current();
81     VcpuStat stat;
82     int i = 0;
83     int64_t period = DIRTYLIMIT_CALC_TIME_MS;
84 
85     if (migrate_dirty_limit() &&
86         migration_is_active(s)) {
87         period = s->parameters.x_vcpu_dirty_limit_period;
88     }
89 
90     /* calculate vcpu dirtyrate */
91     vcpu_calculate_dirtyrate(period,
92                               &stat,
93                               GLOBAL_DIRTY_LIMIT,
94                               false);
95 
96     for (i = 0; i < stat.nvcpu; i++) {
97         vcpu_dirty_rate_stat->stat.rates[i].id = i;
98         vcpu_dirty_rate_stat->stat.rates[i].dirty_rate =
99             stat.rates[i].dirty_rate;
100     }
101 
102     g_free(stat.rates);
103 }
104 
105 static void *vcpu_dirty_rate_stat_thread(void *opaque)
106 {
107     rcu_register_thread();
108 
109     /* start log sync */
110     global_dirty_log_change(GLOBAL_DIRTY_LIMIT, true);
111 
112     while (qatomic_read(&vcpu_dirty_rate_stat->running)) {
113         vcpu_dirty_rate_stat_collect();
114         if (dirtylimit_in_service()) {
115             dirtylimit_process();
116         }
117     }
118 
119     /* stop log sync */
120     global_dirty_log_change(GLOBAL_DIRTY_LIMIT, false);
121 
122     rcu_unregister_thread();
123     return NULL;
124 }
125 
126 int64_t vcpu_dirty_rate_get(int cpu_index)
127 {
128     DirtyRateVcpu *rates = vcpu_dirty_rate_stat->stat.rates;
129     return qatomic_read_i64(&rates[cpu_index].dirty_rate);
130 }
131 
132 void vcpu_dirty_rate_stat_start(void)
133 {
134     if (qatomic_read(&vcpu_dirty_rate_stat->running)) {
135         return;
136     }
137 
138     qatomic_set(&vcpu_dirty_rate_stat->running, 1);
139     qemu_thread_create(&vcpu_dirty_rate_stat->thread,
140                        "dirtyrate-stat",
141                        vcpu_dirty_rate_stat_thread,
142                        NULL,
143                        QEMU_THREAD_JOINABLE);
144 }
145 
146 void vcpu_dirty_rate_stat_stop(void)
147 {
148     qatomic_set(&vcpu_dirty_rate_stat->running, 0);
149     dirtylimit_state_unlock();
150     bql_unlock();
151     qemu_thread_join(&vcpu_dirty_rate_stat->thread);
152     bql_lock();
153     dirtylimit_state_lock();
154 }
155 
156 void vcpu_dirty_rate_stat_initialize(void)
157 {
158     MachineState *ms = MACHINE(qdev_get_machine());
159     int max_cpus = ms->smp.max_cpus;
160 
161     vcpu_dirty_rate_stat =
162         g_malloc0(sizeof(*vcpu_dirty_rate_stat));
163 
164     vcpu_dirty_rate_stat->stat.nvcpu = max_cpus;
165     vcpu_dirty_rate_stat->stat.rates =
166         g_new0(DirtyRateVcpu, max_cpus);
167 
168     vcpu_dirty_rate_stat->running = false;
169 }
170 
171 void vcpu_dirty_rate_stat_finalize(void)
172 {
173     g_free(vcpu_dirty_rate_stat->stat.rates);
174     vcpu_dirty_rate_stat->stat.rates = NULL;
175 
176     g_free(vcpu_dirty_rate_stat);
177     vcpu_dirty_rate_stat = NULL;
178 }
179 
180 void dirtylimit_state_lock(void)
181 {
182     qemu_mutex_lock(&dirtylimit_mutex);
183 }
184 
185 void dirtylimit_state_unlock(void)
186 {
187     qemu_mutex_unlock(&dirtylimit_mutex);
188 }
189 
190 static void
191 __attribute__((__constructor__)) dirtylimit_mutex_init(void)
192 {
193     qemu_mutex_init(&dirtylimit_mutex);
194 }
195 
196 static inline VcpuDirtyLimitState *dirtylimit_vcpu_get_state(int cpu_index)
197 {
198     return &dirtylimit_state->states[cpu_index];
199 }
200 
201 void dirtylimit_state_initialize(void)
202 {
203     MachineState *ms = MACHINE(qdev_get_machine());
204     int max_cpus = ms->smp.max_cpus;
205     int i;
206 
207     dirtylimit_state = g_malloc0(sizeof(*dirtylimit_state));
208 
209     dirtylimit_state->states =
210             g_new0(VcpuDirtyLimitState, max_cpus);
211 
212     for (i = 0; i < max_cpus; i++) {
213         dirtylimit_state->states[i].cpu_index = i;
214     }
215 
216     dirtylimit_state->max_cpus = max_cpus;
217     trace_dirtylimit_state_initialize(max_cpus);
218 }
219 
220 void dirtylimit_state_finalize(void)
221 {
222     g_free(dirtylimit_state->states);
223     dirtylimit_state->states = NULL;
224 
225     g_free(dirtylimit_state);
226     dirtylimit_state = NULL;
227 
228     trace_dirtylimit_state_finalize();
229 }
230 
231 bool dirtylimit_in_service(void)
232 {
233     return !!dirtylimit_state;
234 }
235 
236 bool dirtylimit_vcpu_index_valid(int cpu_index)
237 {
238     MachineState *ms = MACHINE(qdev_get_machine());
239 
240     return !(cpu_index < 0 ||
241              cpu_index >= ms->smp.max_cpus);
242 }
243 
244 static uint64_t dirtylimit_dirty_ring_full_time(uint64_t dirtyrate)
245 {
246     static uint64_t max_dirtyrate;
247     uint64_t dirty_ring_size_MiB;
248 
249     dirty_ring_size_MiB = qemu_target_pages_to_MiB(kvm_dirty_ring_size());
250 
251     if (max_dirtyrate < dirtyrate) {
252         max_dirtyrate = dirtyrate;
253     }
254 
255     return dirty_ring_size_MiB * 1000000 / max_dirtyrate;
256 }
257 
258 static inline bool dirtylimit_done(uint64_t quota,
259                                    uint64_t current)
260 {
261     uint64_t min, max;
262 
263     min = MIN(quota, current);
264     max = MAX(quota, current);
265 
266     return ((max - min) <= DIRTYLIMIT_TOLERANCE_RANGE) ? true : false;
267 }
268 
269 static inline bool
270 dirtylimit_need_linear_adjustment(uint64_t quota,
271                                   uint64_t current)
272 {
273     uint64_t min, max;
274 
275     min = MIN(quota, current);
276     max = MAX(quota, current);
277 
278     return ((max - min) * 100 / max) > DIRTYLIMIT_LINEAR_ADJUSTMENT_PCT;
279 }
280 
281 static void dirtylimit_set_throttle(CPUState *cpu,
282                                     uint64_t quota,
283                                     uint64_t current)
284 {
285     int64_t ring_full_time_us = 0;
286     uint64_t sleep_pct = 0;
287     uint64_t throttle_us = 0;
288 
289     if (current == 0) {
290         cpu->throttle_us_per_full = 0;
291         return;
292     }
293 
294     ring_full_time_us = dirtylimit_dirty_ring_full_time(current);
295 
296     if (dirtylimit_need_linear_adjustment(quota, current)) {
297         if (quota < current) {
298             sleep_pct = (current - quota) * 100 / current;
299             throttle_us =
300                 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
301             cpu->throttle_us_per_full += throttle_us;
302         } else {
303             sleep_pct = (quota - current) * 100 / quota;
304             throttle_us =
305                 ring_full_time_us * sleep_pct / (double)(100 - sleep_pct);
306             cpu->throttle_us_per_full -= throttle_us;
307         }
308 
309         trace_dirtylimit_throttle_pct(cpu->cpu_index,
310                                       sleep_pct,
311                                       throttle_us);
312     } else {
313         if (quota < current) {
314             cpu->throttle_us_per_full += ring_full_time_us / 10;
315         } else {
316             cpu->throttle_us_per_full -= ring_full_time_us / 10;
317         }
318     }
319 
320     /*
321      * TODO: in the big kvm_dirty_ring_size case (eg: 65536, or other scenario),
322      *       current dirty page rate may never reach the quota, we should stop
323      *       increasing sleep time?
324      */
325     cpu->throttle_us_per_full = MIN(cpu->throttle_us_per_full,
326         ring_full_time_us * DIRTYLIMIT_THROTTLE_PCT_MAX);
327 
328     cpu->throttle_us_per_full = MAX(cpu->throttle_us_per_full, 0);
329 }
330 
331 static void dirtylimit_adjust_throttle(CPUState *cpu)
332 {
333     uint64_t quota = 0;
334     uint64_t current = 0;
335     int cpu_index = cpu->cpu_index;
336 
337     quota = dirtylimit_vcpu_get_state(cpu_index)->quota;
338     current = vcpu_dirty_rate_get(cpu_index);
339 
340     if (!dirtylimit_done(quota, current)) {
341         dirtylimit_set_throttle(cpu, quota, current);
342     }
343 
344     return;
345 }
346 
347 void dirtylimit_process(void)
348 {
349     CPUState *cpu;
350 
351     if (!qatomic_read(&dirtylimit_quit)) {
352         dirtylimit_state_lock();
353 
354         if (!dirtylimit_in_service()) {
355             dirtylimit_state_unlock();
356             return;
357         }
358 
359         CPU_FOREACH(cpu) {
360             if (!dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
361                 continue;
362             }
363             dirtylimit_adjust_throttle(cpu);
364         }
365         dirtylimit_state_unlock();
366     }
367 }
368 
369 void dirtylimit_change(bool start)
370 {
371     if (start) {
372         qatomic_set(&dirtylimit_quit, 0);
373     } else {
374         qatomic_set(&dirtylimit_quit, 1);
375     }
376 }
377 
378 void dirtylimit_set_vcpu(int cpu_index,
379                          uint64_t quota,
380                          bool enable)
381 {
382     trace_dirtylimit_set_vcpu(cpu_index, quota);
383 
384     if (enable) {
385         dirtylimit_state->states[cpu_index].quota = quota;
386         if (!dirtylimit_vcpu_get_state(cpu_index)->enabled) {
387             dirtylimit_state->limited_nvcpu++;
388         }
389     } else {
390         dirtylimit_state->states[cpu_index].quota = 0;
391         if (dirtylimit_state->states[cpu_index].enabled) {
392             dirtylimit_state->limited_nvcpu--;
393         }
394     }
395 
396     dirtylimit_state->states[cpu_index].enabled = enable;
397 }
398 
399 void dirtylimit_set_all(uint64_t quota,
400                         bool enable)
401 {
402     MachineState *ms = MACHINE(qdev_get_machine());
403     int max_cpus = ms->smp.max_cpus;
404     int i;
405 
406     for (i = 0; i < max_cpus; i++) {
407         dirtylimit_set_vcpu(i, quota, enable);
408     }
409 }
410 
411 void dirtylimit_vcpu_execute(CPUState *cpu)
412 {
413     if (cpu->throttle_us_per_full) {
414         dirtylimit_state_lock();
415 
416         if (dirtylimit_in_service() &&
417             dirtylimit_vcpu_get_state(cpu->cpu_index)->enabled) {
418             dirtylimit_state_unlock();
419             trace_dirtylimit_vcpu_execute(cpu->cpu_index,
420                     cpu->throttle_us_per_full);
421 
422             g_usleep(cpu->throttle_us_per_full);
423             return;
424         }
425 
426         dirtylimit_state_unlock();
427     }
428 }
429 
430 static void dirtylimit_init(void)
431 {
432     dirtylimit_state_initialize();
433     dirtylimit_change(true);
434     vcpu_dirty_rate_stat_initialize();
435     vcpu_dirty_rate_stat_start();
436 }
437 
438 static void dirtylimit_cleanup(void)
439 {
440     vcpu_dirty_rate_stat_stop();
441     vcpu_dirty_rate_stat_finalize();
442     dirtylimit_change(false);
443     dirtylimit_state_finalize();
444 }
445 
446 /*
447  * dirty page rate limit is not allowed to set if migration
448  * is running with dirty-limit capability enabled.
449  */
450 static bool dirtylimit_is_allowed(void)
451 {
452     MigrationState *ms = migrate_get_current();
453 
454     if (migration_is_running(ms->state) &&
455         (!qemu_thread_is_self(&ms->thread)) &&
456         migrate_dirty_limit() &&
457         dirtylimit_in_service()) {
458         return false;
459     }
460     return true;
461 }
462 
463 void qmp_cancel_vcpu_dirty_limit(bool has_cpu_index,
464                                  int64_t cpu_index,
465                                  Error **errp)
466 {
467     if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
468         return;
469     }
470 
471     if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
472         error_setg(errp, "incorrect cpu index specified");
473         return;
474     }
475 
476     if (!dirtylimit_is_allowed()) {
477         error_setg(errp, "can't cancel dirty page rate limit while"
478                    " migration is running");
479         return;
480     }
481 
482     if (!dirtylimit_in_service()) {
483         return;
484     }
485 
486     dirtylimit_state_lock();
487 
488     if (has_cpu_index) {
489         dirtylimit_set_vcpu(cpu_index, 0, false);
490     } else {
491         dirtylimit_set_all(0, false);
492     }
493 
494     if (!dirtylimit_state->limited_nvcpu) {
495         dirtylimit_cleanup();
496     }
497 
498     dirtylimit_state_unlock();
499 }
500 
501 void hmp_cancel_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
502 {
503     int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
504     Error *err = NULL;
505 
506     qmp_cancel_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, &err);
507     if (err) {
508         hmp_handle_error(mon, err);
509         return;
510     }
511 
512     monitor_printf(mon, "[Please use 'info vcpu_dirty_limit' to query "
513                    "dirty limit for virtual CPU]\n");
514 }
515 
516 void qmp_set_vcpu_dirty_limit(bool has_cpu_index,
517                               int64_t cpu_index,
518                               uint64_t dirty_rate,
519                               Error **errp)
520 {
521     if (!kvm_enabled() || !kvm_dirty_ring_enabled()) {
522         error_setg(errp, "dirty page limit feature requires KVM with"
523                    " accelerator property 'dirty-ring-size' set'");
524         return;
525     }
526 
527     if (has_cpu_index && !dirtylimit_vcpu_index_valid(cpu_index)) {
528         error_setg(errp, "incorrect cpu index specified");
529         return;
530     }
531 
532     if (!dirtylimit_is_allowed()) {
533         error_setg(errp, "can't set dirty page rate limit while"
534                    " migration is running");
535         return;
536     }
537 
538     if (!dirty_rate) {
539         qmp_cancel_vcpu_dirty_limit(has_cpu_index, cpu_index, errp);
540         return;
541     }
542 
543     dirtylimit_state_lock();
544 
545     if (!dirtylimit_in_service()) {
546         dirtylimit_init();
547     }
548 
549     if (has_cpu_index) {
550         dirtylimit_set_vcpu(cpu_index, dirty_rate, true);
551     } else {
552         dirtylimit_set_all(dirty_rate, true);
553     }
554 
555     dirtylimit_state_unlock();
556 }
557 
558 void hmp_set_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
559 {
560     int64_t dirty_rate = qdict_get_int(qdict, "dirty_rate");
561     int64_t cpu_index = qdict_get_try_int(qdict, "cpu_index", -1);
562     Error *err = NULL;
563 
564     if (dirty_rate < 0) {
565         error_setg(&err, "invalid dirty page limit %" PRId64, dirty_rate);
566         goto out;
567     }
568 
569     qmp_set_vcpu_dirty_limit(!!(cpu_index != -1), cpu_index, dirty_rate, &err);
570 
571 out:
572     hmp_handle_error(mon, err);
573 }
574 
575 /* Return the max throttle time of each virtual CPU */
576 uint64_t dirtylimit_throttle_time_per_round(void)
577 {
578     CPUState *cpu;
579     int64_t max = 0;
580 
581     CPU_FOREACH(cpu) {
582         if (cpu->throttle_us_per_full > max) {
583             max = cpu->throttle_us_per_full;
584         }
585     }
586 
587     return max;
588 }
589 
590 /*
591  * Estimate average dirty ring full time of each virtaul CPU.
592  * Return 0 if guest doesn't dirty memory.
593  */
594 uint64_t dirtylimit_ring_full_time(void)
595 {
596     CPUState *cpu;
597     uint64_t curr_rate = 0;
598     int nvcpus = 0;
599 
600     CPU_FOREACH(cpu) {
601         if (cpu->running) {
602             nvcpus++;
603             curr_rate += vcpu_dirty_rate_get(cpu->cpu_index);
604         }
605     }
606 
607     if (!curr_rate || !nvcpus) {
608         return 0;
609     }
610 
611     return dirtylimit_dirty_ring_full_time(curr_rate / nvcpus);
612 }
613 
614 static struct DirtyLimitInfo *dirtylimit_query_vcpu(int cpu_index)
615 {
616     DirtyLimitInfo *info = NULL;
617 
618     info = g_malloc0(sizeof(*info));
619     info->cpu_index = cpu_index;
620     info->limit_rate = dirtylimit_vcpu_get_state(cpu_index)->quota;
621     info->current_rate = vcpu_dirty_rate_get(cpu_index);
622 
623     return info;
624 }
625 
626 static struct DirtyLimitInfoList *dirtylimit_query_all(void)
627 {
628     int i, index;
629     DirtyLimitInfo *info = NULL;
630     DirtyLimitInfoList *head = NULL, **tail = &head;
631 
632     dirtylimit_state_lock();
633 
634     if (!dirtylimit_in_service()) {
635         dirtylimit_state_unlock();
636         return NULL;
637     }
638 
639     for (i = 0; i < dirtylimit_state->max_cpus; i++) {
640         index = dirtylimit_state->states[i].cpu_index;
641         if (dirtylimit_vcpu_get_state(index)->enabled) {
642             info = dirtylimit_query_vcpu(index);
643             QAPI_LIST_APPEND(tail, info);
644         }
645     }
646 
647     dirtylimit_state_unlock();
648 
649     return head;
650 }
651 
652 struct DirtyLimitInfoList *qmp_query_vcpu_dirty_limit(Error **errp)
653 {
654     return dirtylimit_query_all();
655 }
656 
657 void hmp_info_vcpu_dirty_limit(Monitor *mon, const QDict *qdict)
658 {
659     DirtyLimitInfoList *info;
660     g_autoptr(DirtyLimitInfoList) head = NULL;
661     Error *err = NULL;
662 
663     if (!dirtylimit_in_service()) {
664         monitor_printf(mon, "Dirty page limit not enabled!\n");
665         return;
666     }
667 
668     head = qmp_query_vcpu_dirty_limit(&err);
669     if (err) {
670         hmp_handle_error(mon, err);
671         return;
672     }
673 
674     for (info = head; info != NULL; info = info->next) {
675         monitor_printf(mon, "vcpu[%"PRIi64"], limit rate %"PRIi64 " (MB/s),"
676                             " current rate %"PRIi64 " (MB/s)\n",
677                             info->value->cpu_index,
678                             info->value->limit_rate,
679                             info->value->current_rate);
680     }
681 }
682