1 /*
2 * Dirtyrate implement code
3 *
4 * Copyright (c) 2020 HUAWEI TECHNOLOGIES CO.,LTD.
5 *
6 * Authors:
7 * Chuan Zheng <zhengchuan@huawei.com>
8 *
9 * This work is licensed under the terms of the GNU GPL, version 2 or later.
10 * See the COPYING file in the top-level directory.
11 */
12
13 #include "qemu/osdep.h"
14 #include "qemu/error-report.h"
15 #include <zlib.h>
16 #include "hw/core/cpu.h"
17 #include "qapi/error.h"
18 #include "exec/ramblock.h"
19 #include "exec/target_page.h"
20 #include "qemu/rcu_queue.h"
21 #include "qemu/main-loop.h"
22 #include "qapi/qapi-commands-migration.h"
23 #include "ram.h"
24 #include "trace.h"
25 #include "dirtyrate.h"
26 #include "monitor/hmp.h"
27 #include "monitor/monitor.h"
28 #include "qapi/qmp/qdict.h"
29 #include "sysemu/kvm.h"
30 #include "sysemu/runstate.h"
31 #include "exec/memory.h"
32 #include "qemu/xxhash.h"
33
34 /*
35 * total_dirty_pages is procted by BQL and is used
36 * to stat dirty pages during the period of two
37 * memory_global_dirty_log_sync
38 */
39 uint64_t total_dirty_pages;
40
41 typedef struct DirtyPageRecord {
42 uint64_t start_pages;
43 uint64_t end_pages;
44 } DirtyPageRecord;
45
46 static int CalculatingState = DIRTY_RATE_STATUS_UNSTARTED;
47 static struct DirtyRateStat DirtyStat;
48 static DirtyRateMeasureMode dirtyrate_mode =
49 DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
50
dirty_stat_wait(int64_t msec,int64_t initial_time)51 static int64_t dirty_stat_wait(int64_t msec, int64_t initial_time)
52 {
53 int64_t current_time;
54
55 current_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
56 if ((current_time - initial_time) >= msec) {
57 msec = current_time - initial_time;
58 } else {
59 g_usleep((msec + initial_time - current_time) * 1000);
60 /* g_usleep may overshoot */
61 msec = qemu_clock_get_ms(QEMU_CLOCK_REALTIME) - initial_time;
62 }
63
64 return msec;
65 }
66
record_dirtypages(DirtyPageRecord * dirty_pages,CPUState * cpu,bool start)67 static inline void record_dirtypages(DirtyPageRecord *dirty_pages,
68 CPUState *cpu, bool start)
69 {
70 if (start) {
71 dirty_pages[cpu->cpu_index].start_pages = cpu->dirty_pages;
72 } else {
73 dirty_pages[cpu->cpu_index].end_pages = cpu->dirty_pages;
74 }
75 }
76
do_calculate_dirtyrate(DirtyPageRecord dirty_pages,int64_t calc_time_ms)77 static int64_t do_calculate_dirtyrate(DirtyPageRecord dirty_pages,
78 int64_t calc_time_ms)
79 {
80 uint64_t increased_dirty_pages =
81 dirty_pages.end_pages - dirty_pages.start_pages;
82
83 /*
84 * multiply by 1000ms/s _before_ converting down to megabytes
85 * to avoid losing precision
86 */
87 return qemu_target_pages_to_MiB(increased_dirty_pages * 1000) /
88 calc_time_ms;
89 }
90
global_dirty_log_change(unsigned int flag,bool start)91 void global_dirty_log_change(unsigned int flag, bool start)
92 {
93 Error *local_err = NULL;
94 bool ret;
95
96 bql_lock();
97 if (start) {
98 ret = memory_global_dirty_log_start(flag, &local_err);
99 if (!ret) {
100 error_report_err(local_err);
101 }
102 } else {
103 memory_global_dirty_log_stop(flag);
104 }
105 bql_unlock();
106 }
107
108 /*
109 * global_dirty_log_sync
110 * 1. sync dirty log from kvm
111 * 2. stop dirty tracking if needed.
112 */
global_dirty_log_sync(unsigned int flag,bool one_shot)113 static void global_dirty_log_sync(unsigned int flag, bool one_shot)
114 {
115 bql_lock();
116 memory_global_dirty_log_sync(false);
117 if (one_shot) {
118 memory_global_dirty_log_stop(flag);
119 }
120 bql_unlock();
121 }
122
vcpu_dirty_stat_alloc(VcpuStat * stat)123 static DirtyPageRecord *vcpu_dirty_stat_alloc(VcpuStat *stat)
124 {
125 CPUState *cpu;
126 int nvcpu = 0;
127
128 CPU_FOREACH(cpu) {
129 nvcpu++;
130 }
131
132 stat->nvcpu = nvcpu;
133 stat->rates = g_new0(DirtyRateVcpu, nvcpu);
134
135 return g_new0(DirtyPageRecord, nvcpu);
136 }
137
vcpu_dirty_stat_collect(DirtyPageRecord * records,bool start)138 static void vcpu_dirty_stat_collect(DirtyPageRecord *records,
139 bool start)
140 {
141 CPUState *cpu;
142
143 CPU_FOREACH(cpu) {
144 record_dirtypages(records, cpu, start);
145 }
146 }
147
vcpu_calculate_dirtyrate(int64_t calc_time_ms,VcpuStat * stat,unsigned int flag,bool one_shot)148 int64_t vcpu_calculate_dirtyrate(int64_t calc_time_ms,
149 VcpuStat *stat,
150 unsigned int flag,
151 bool one_shot)
152 {
153 DirtyPageRecord *records;
154 int64_t init_time_ms;
155 int64_t duration;
156 int64_t dirtyrate;
157 int i = 0;
158 unsigned int gen_id;
159
160 retry:
161 init_time_ms = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
162
163 WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
164 gen_id = cpu_list_generation_id_get();
165 records = vcpu_dirty_stat_alloc(stat);
166 vcpu_dirty_stat_collect(records, true);
167 }
168
169 duration = dirty_stat_wait(calc_time_ms, init_time_ms);
170
171 global_dirty_log_sync(flag, one_shot);
172
173 WITH_QEMU_LOCK_GUARD(&qemu_cpu_list_lock) {
174 if (gen_id != cpu_list_generation_id_get()) {
175 g_free(records);
176 g_free(stat->rates);
177 cpu_list_unlock();
178 goto retry;
179 }
180 vcpu_dirty_stat_collect(records, false);
181 }
182
183 for (i = 0; i < stat->nvcpu; i++) {
184 dirtyrate = do_calculate_dirtyrate(records[i], duration);
185
186 stat->rates[i].id = i;
187 stat->rates[i].dirty_rate = dirtyrate;
188
189 trace_dirtyrate_do_calculate_vcpu(i, dirtyrate);
190 }
191
192 g_free(records);
193
194 return duration;
195 }
196
is_calc_time_valid(int64_t msec)197 static bool is_calc_time_valid(int64_t msec)
198 {
199 if ((msec < MIN_CALC_TIME_MS) || (msec > MAX_CALC_TIME_MS)) {
200 return false;
201 }
202
203 return true;
204 }
205
is_sample_pages_valid(int64_t pages)206 static bool is_sample_pages_valid(int64_t pages)
207 {
208 return pages >= MIN_SAMPLE_PAGE_COUNT &&
209 pages <= MAX_SAMPLE_PAGE_COUNT;
210 }
211
dirtyrate_set_state(int * state,int old_state,int new_state)212 static int dirtyrate_set_state(int *state, int old_state, int new_state)
213 {
214 assert(new_state < DIRTY_RATE_STATUS__MAX);
215 trace_dirtyrate_set_state(DirtyRateStatus_str(new_state));
216 if (qatomic_cmpxchg(state, old_state, new_state) == old_state) {
217 return 0;
218 } else {
219 return -1;
220 }
221 }
222
223 /* Decimal power of given time unit relative to one second */
time_unit_to_power(TimeUnit time_unit)224 static int time_unit_to_power(TimeUnit time_unit)
225 {
226 switch (time_unit) {
227 case TIME_UNIT_SECOND:
228 return 0;
229 case TIME_UNIT_MILLISECOND:
230 return -3;
231 default:
232 assert(false); /* unreachable */
233 return 0;
234 }
235 }
236
convert_time_unit(int64_t value,TimeUnit unit_from,TimeUnit unit_to)237 static int64_t convert_time_unit(int64_t value, TimeUnit unit_from,
238 TimeUnit unit_to)
239 {
240 int power = time_unit_to_power(unit_from) -
241 time_unit_to_power(unit_to);
242 while (power < 0) {
243 value /= 10;
244 power += 1;
245 }
246 while (power > 0) {
247 value *= 10;
248 power -= 1;
249 }
250 return value;
251 }
252
253
254 static struct DirtyRateInfo *
query_dirty_rate_info(TimeUnit calc_time_unit)255 query_dirty_rate_info(TimeUnit calc_time_unit)
256 {
257 int i;
258 int64_t dirty_rate = DirtyStat.dirty_rate;
259 struct DirtyRateInfo *info = g_new0(DirtyRateInfo, 1);
260 DirtyRateVcpuList *head = NULL, **tail = &head;
261
262 info->status = CalculatingState;
263 info->start_time = DirtyStat.start_time;
264 info->calc_time = convert_time_unit(DirtyStat.calc_time_ms,
265 TIME_UNIT_MILLISECOND,
266 calc_time_unit);
267 info->calc_time_unit = calc_time_unit;
268 info->sample_pages = DirtyStat.sample_pages;
269 info->mode = dirtyrate_mode;
270
271 if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURED) {
272 info->has_dirty_rate = true;
273 info->dirty_rate = dirty_rate;
274
275 if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
276 /*
277 * set sample_pages with 0 to indicate page sampling
278 * isn't enabled
279 **/
280 info->sample_pages = 0;
281 info->has_vcpu_dirty_rate = true;
282 for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
283 DirtyRateVcpu *rate = g_new0(DirtyRateVcpu, 1);
284 rate->id = DirtyStat.dirty_ring.rates[i].id;
285 rate->dirty_rate = DirtyStat.dirty_ring.rates[i].dirty_rate;
286 QAPI_LIST_APPEND(tail, rate);
287 }
288 info->vcpu_dirty_rate = head;
289 }
290
291 if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) {
292 info->sample_pages = 0;
293 }
294 }
295
296 trace_query_dirty_rate_info(DirtyRateStatus_str(CalculatingState));
297
298 return info;
299 }
300
init_dirtyrate_stat(struct DirtyRateConfig config)301 static void init_dirtyrate_stat(struct DirtyRateConfig config)
302 {
303 DirtyStat.dirty_rate = -1;
304 DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
305 DirtyStat.calc_time_ms = config.calc_time_ms;
306 DirtyStat.sample_pages = config.sample_pages_per_gigabytes;
307
308 switch (config.mode) {
309 case DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING:
310 DirtyStat.page_sampling.total_dirty_samples = 0;
311 DirtyStat.page_sampling.total_sample_count = 0;
312 DirtyStat.page_sampling.total_block_mem_MB = 0;
313 break;
314 case DIRTY_RATE_MEASURE_MODE_DIRTY_RING:
315 DirtyStat.dirty_ring.nvcpu = -1;
316 DirtyStat.dirty_ring.rates = NULL;
317 break;
318 default:
319 break;
320 }
321 }
322
cleanup_dirtyrate_stat(struct DirtyRateConfig config)323 static void cleanup_dirtyrate_stat(struct DirtyRateConfig config)
324 {
325 /* last calc-dirty-rate qmp use dirty ring mode */
326 if (dirtyrate_mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
327 free(DirtyStat.dirty_ring.rates);
328 DirtyStat.dirty_ring.rates = NULL;
329 }
330 }
331
update_dirtyrate_stat(struct RamblockDirtyInfo * info)332 static void update_dirtyrate_stat(struct RamblockDirtyInfo *info)
333 {
334 DirtyStat.page_sampling.total_dirty_samples += info->sample_dirty_count;
335 DirtyStat.page_sampling.total_sample_count += info->sample_pages_count;
336 /* size of total pages in MB */
337 DirtyStat.page_sampling.total_block_mem_MB +=
338 qemu_target_pages_to_MiB(info->ramblock_pages);
339 }
340
update_dirtyrate(uint64_t msec)341 static void update_dirtyrate(uint64_t msec)
342 {
343 uint64_t dirtyrate;
344 uint64_t total_dirty_samples = DirtyStat.page_sampling.total_dirty_samples;
345 uint64_t total_sample_count = DirtyStat.page_sampling.total_sample_count;
346 uint64_t total_block_mem_MB = DirtyStat.page_sampling.total_block_mem_MB;
347
348 dirtyrate = total_dirty_samples * total_block_mem_MB *
349 1000 / (total_sample_count * msec);
350
351 DirtyStat.dirty_rate = dirtyrate;
352 }
353
354 /*
355 * Compute hash of a single page of size TARGET_PAGE_SIZE.
356 */
compute_page_hash(void * ptr)357 static uint32_t compute_page_hash(void *ptr)
358 {
359 size_t page_size = qemu_target_page_size();
360 uint32_t i;
361 uint64_t v1, v2, v3, v4;
362 uint64_t res;
363 const uint64_t *p = ptr;
364
365 v1 = QEMU_XXHASH_SEED + XXH_PRIME64_1 + XXH_PRIME64_2;
366 v2 = QEMU_XXHASH_SEED + XXH_PRIME64_2;
367 v3 = QEMU_XXHASH_SEED + 0;
368 v4 = QEMU_XXHASH_SEED - XXH_PRIME64_1;
369 for (i = 0; i < page_size / 8; i += 4) {
370 v1 = XXH64_round(v1, p[i + 0]);
371 v2 = XXH64_round(v2, p[i + 1]);
372 v3 = XXH64_round(v3, p[i + 2]);
373 v4 = XXH64_round(v4, p[i + 3]);
374 }
375 res = XXH64_mergerounds(v1, v2, v3, v4);
376 res += page_size;
377 res = XXH64_avalanche(res);
378 return (uint32_t)(res & UINT32_MAX);
379 }
380
381
382 /*
383 * get hash result for the sampled memory with length of TARGET_PAGE_SIZE
384 * in ramblock, which starts from ramblock base address.
385 */
get_ramblock_vfn_hash(struct RamblockDirtyInfo * info,uint64_t vfn)386 static uint32_t get_ramblock_vfn_hash(struct RamblockDirtyInfo *info,
387 uint64_t vfn)
388 {
389 uint32_t hash;
390
391 hash = compute_page_hash(info->ramblock_addr +
392 vfn * qemu_target_page_size());
393
394 trace_get_ramblock_vfn_hash(info->idstr, vfn, hash);
395 return hash;
396 }
397
save_ramblock_hash(struct RamblockDirtyInfo * info)398 static bool save_ramblock_hash(struct RamblockDirtyInfo *info)
399 {
400 unsigned int sample_pages_count;
401 int i;
402 GRand *rand;
403
404 sample_pages_count = info->sample_pages_count;
405
406 /* ramblock size less than one page, return success to skip this ramblock */
407 if (unlikely(info->ramblock_pages == 0 || sample_pages_count == 0)) {
408 return true;
409 }
410
411 info->hash_result = g_try_malloc0_n(sample_pages_count,
412 sizeof(uint32_t));
413 if (!info->hash_result) {
414 return false;
415 }
416
417 info->sample_page_vfn = g_try_malloc0_n(sample_pages_count,
418 sizeof(uint64_t));
419 if (!info->sample_page_vfn) {
420 g_free(info->hash_result);
421 return false;
422 }
423
424 rand = g_rand_new();
425 for (i = 0; i < sample_pages_count; i++) {
426 info->sample_page_vfn[i] = g_rand_int_range(rand, 0,
427 info->ramblock_pages - 1);
428 info->hash_result[i] = get_ramblock_vfn_hash(info,
429 info->sample_page_vfn[i]);
430 }
431 g_rand_free(rand);
432
433 return true;
434 }
435
get_ramblock_dirty_info(RAMBlock * block,struct RamblockDirtyInfo * info,struct DirtyRateConfig * config)436 static void get_ramblock_dirty_info(RAMBlock *block,
437 struct RamblockDirtyInfo *info,
438 struct DirtyRateConfig *config)
439 {
440 uint64_t sample_pages_per_gigabytes = config->sample_pages_per_gigabytes;
441
442 /* Right shift 30 bits to calc ramblock size in GB */
443 info->sample_pages_count = (qemu_ram_get_used_length(block) *
444 sample_pages_per_gigabytes) >> 30;
445 /* Right shift TARGET_PAGE_BITS to calc page count */
446 info->ramblock_pages = qemu_ram_get_used_length(block) >>
447 qemu_target_page_bits();
448 info->ramblock_addr = qemu_ram_get_host_addr(block);
449 strcpy(info->idstr, qemu_ram_get_idstr(block));
450 }
451
free_ramblock_dirty_info(struct RamblockDirtyInfo * infos,int count)452 static void free_ramblock_dirty_info(struct RamblockDirtyInfo *infos, int count)
453 {
454 int i;
455
456 if (!infos) {
457 return;
458 }
459
460 for (i = 0; i < count; i++) {
461 g_free(infos[i].sample_page_vfn);
462 g_free(infos[i].hash_result);
463 }
464 g_free(infos);
465 }
466
skip_sample_ramblock(RAMBlock * block)467 static bool skip_sample_ramblock(RAMBlock *block)
468 {
469 /*
470 * Sample only blocks larger than MIN_RAMBLOCK_SIZE.
471 */
472 if (qemu_ram_get_used_length(block) < (MIN_RAMBLOCK_SIZE << 10)) {
473 trace_skip_sample_ramblock(block->idstr,
474 qemu_ram_get_used_length(block));
475 return true;
476 }
477
478 return false;
479 }
480
record_ramblock_hash_info(struct RamblockDirtyInfo ** block_dinfo,struct DirtyRateConfig config,int * block_count)481 static bool record_ramblock_hash_info(struct RamblockDirtyInfo **block_dinfo,
482 struct DirtyRateConfig config,
483 int *block_count)
484 {
485 struct RamblockDirtyInfo *info = NULL;
486 struct RamblockDirtyInfo *dinfo = NULL;
487 RAMBlock *block = NULL;
488 int total_count = 0;
489 int index = 0;
490 bool ret = false;
491
492 RAMBLOCK_FOREACH_MIGRATABLE(block) {
493 if (skip_sample_ramblock(block)) {
494 continue;
495 }
496 total_count++;
497 }
498
499 dinfo = g_try_malloc0_n(total_count, sizeof(struct RamblockDirtyInfo));
500 if (dinfo == NULL) {
501 goto out;
502 }
503
504 RAMBLOCK_FOREACH_MIGRATABLE(block) {
505 if (skip_sample_ramblock(block)) {
506 continue;
507 }
508 if (index >= total_count) {
509 break;
510 }
511 info = &dinfo[index];
512 get_ramblock_dirty_info(block, info, &config);
513 if (!save_ramblock_hash(info)) {
514 goto out;
515 }
516 index++;
517 }
518 ret = true;
519
520 out:
521 *block_count = index;
522 *block_dinfo = dinfo;
523 return ret;
524 }
525
calc_page_dirty_rate(struct RamblockDirtyInfo * info)526 static void calc_page_dirty_rate(struct RamblockDirtyInfo *info)
527 {
528 uint32_t hash;
529 int i;
530
531 for (i = 0; i < info->sample_pages_count; i++) {
532 hash = get_ramblock_vfn_hash(info, info->sample_page_vfn[i]);
533 if (hash != info->hash_result[i]) {
534 trace_calc_page_dirty_rate(info->idstr, hash, info->hash_result[i]);
535 info->sample_dirty_count++;
536 }
537 }
538 }
539
540 static struct RamblockDirtyInfo *
find_block_matched(RAMBlock * block,int count,struct RamblockDirtyInfo * infos)541 find_block_matched(RAMBlock *block, int count,
542 struct RamblockDirtyInfo *infos)
543 {
544 int i;
545
546 for (i = 0; i < count; i++) {
547 if (!strcmp(infos[i].idstr, qemu_ram_get_idstr(block))) {
548 break;
549 }
550 }
551
552 if (i == count) {
553 return NULL;
554 }
555
556 if (infos[i].ramblock_addr != qemu_ram_get_host_addr(block) ||
557 infos[i].ramblock_pages !=
558 (qemu_ram_get_used_length(block) >> qemu_target_page_bits())) {
559 trace_find_page_matched(block->idstr);
560 return NULL;
561 }
562
563 return &infos[i];
564 }
565
compare_page_hash_info(struct RamblockDirtyInfo * info,int block_count)566 static bool compare_page_hash_info(struct RamblockDirtyInfo *info,
567 int block_count)
568 {
569 struct RamblockDirtyInfo *block_dinfo = NULL;
570 RAMBlock *block = NULL;
571
572 RAMBLOCK_FOREACH_MIGRATABLE(block) {
573 if (skip_sample_ramblock(block)) {
574 continue;
575 }
576 block_dinfo = find_block_matched(block, block_count, info);
577 if (block_dinfo == NULL) {
578 continue;
579 }
580 calc_page_dirty_rate(block_dinfo);
581 update_dirtyrate_stat(block_dinfo);
582 }
583
584 if (DirtyStat.page_sampling.total_sample_count == 0) {
585 return false;
586 }
587
588 return true;
589 }
590
record_dirtypages_bitmap(DirtyPageRecord * dirty_pages,bool start)591 static inline void record_dirtypages_bitmap(DirtyPageRecord *dirty_pages,
592 bool start)
593 {
594 if (start) {
595 dirty_pages->start_pages = total_dirty_pages;
596 } else {
597 dirty_pages->end_pages = total_dirty_pages;
598 }
599 }
600
dirtyrate_manual_reset_protect(void)601 static inline void dirtyrate_manual_reset_protect(void)
602 {
603 RAMBlock *block = NULL;
604
605 WITH_RCU_READ_LOCK_GUARD() {
606 RAMBLOCK_FOREACH_MIGRATABLE(block) {
607 memory_region_clear_dirty_bitmap(block->mr, 0,
608 block->used_length);
609 }
610 }
611 }
612
calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config)613 static void calculate_dirtyrate_dirty_bitmap(struct DirtyRateConfig config)
614 {
615 int64_t start_time;
616 DirtyPageRecord dirty_pages;
617 Error *local_err = NULL;
618
619 bql_lock();
620 if (!memory_global_dirty_log_start(GLOBAL_DIRTY_DIRTY_RATE, &local_err)) {
621 error_report_err(local_err);
622 }
623
624 /*
625 * 1'round of log sync may return all 1 bits with
626 * KVM_DIRTY_LOG_INITIALLY_SET enable
627 * skip it unconditionally and start dirty tracking
628 * from 2'round of log sync
629 */
630 memory_global_dirty_log_sync(false);
631
632 /*
633 * reset page protect manually and unconditionally.
634 * this make sure kvm dirty log be cleared if
635 * KVM_DIRTY_LOG_MANUAL_PROTECT_ENABLE cap is enabled.
636 */
637 dirtyrate_manual_reset_protect();
638 bql_unlock();
639
640 record_dirtypages_bitmap(&dirty_pages, true);
641
642 start_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
643 DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
644
645 DirtyStat.calc_time_ms = dirty_stat_wait(config.calc_time_ms, start_time);
646
647 /*
648 * do two things.
649 * 1. fetch dirty bitmap from kvm
650 * 2. stop dirty tracking
651 */
652 global_dirty_log_sync(GLOBAL_DIRTY_DIRTY_RATE, true);
653
654 record_dirtypages_bitmap(&dirty_pages, false);
655
656 DirtyStat.dirty_rate = do_calculate_dirtyrate(dirty_pages,
657 DirtyStat.calc_time_ms);
658 }
659
calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config)660 static void calculate_dirtyrate_dirty_ring(struct DirtyRateConfig config)
661 {
662 uint64_t dirtyrate = 0;
663 uint64_t dirtyrate_sum = 0;
664 int i = 0;
665
666 /* start log sync */
667 global_dirty_log_change(GLOBAL_DIRTY_DIRTY_RATE, true);
668
669 DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
670
671 /* calculate vcpu dirtyrate */
672 DirtyStat.calc_time_ms = vcpu_calculate_dirtyrate(config.calc_time_ms,
673 &DirtyStat.dirty_ring,
674 GLOBAL_DIRTY_DIRTY_RATE,
675 true);
676
677 /* calculate vm dirtyrate */
678 for (i = 0; i < DirtyStat.dirty_ring.nvcpu; i++) {
679 dirtyrate = DirtyStat.dirty_ring.rates[i].dirty_rate;
680 DirtyStat.dirty_ring.rates[i].dirty_rate = dirtyrate;
681 dirtyrate_sum += dirtyrate;
682 }
683
684 DirtyStat.dirty_rate = dirtyrate_sum;
685 }
686
calculate_dirtyrate_sample_vm(struct DirtyRateConfig config)687 static void calculate_dirtyrate_sample_vm(struct DirtyRateConfig config)
688 {
689 struct RamblockDirtyInfo *block_dinfo = NULL;
690 int block_count = 0;
691 int64_t initial_time;
692
693 rcu_read_lock();
694 initial_time = qemu_clock_get_ms(QEMU_CLOCK_REALTIME);
695 DirtyStat.start_time = qemu_clock_get_ms(QEMU_CLOCK_HOST) / 1000;
696 if (!record_ramblock_hash_info(&block_dinfo, config, &block_count)) {
697 goto out;
698 }
699 rcu_read_unlock();
700
701 DirtyStat.calc_time_ms = dirty_stat_wait(config.calc_time_ms,
702 initial_time);
703
704 rcu_read_lock();
705 if (!compare_page_hash_info(block_dinfo, block_count)) {
706 goto out;
707 }
708
709 update_dirtyrate(DirtyStat.calc_time_ms);
710
711 out:
712 rcu_read_unlock();
713 free_ramblock_dirty_info(block_dinfo, block_count);
714 }
715
calculate_dirtyrate(struct DirtyRateConfig config)716 static void calculate_dirtyrate(struct DirtyRateConfig config)
717 {
718 if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) {
719 calculate_dirtyrate_dirty_bitmap(config);
720 } else if (config.mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) {
721 calculate_dirtyrate_dirty_ring(config);
722 } else {
723 calculate_dirtyrate_sample_vm(config);
724 }
725
726 trace_dirtyrate_calculate(DirtyStat.dirty_rate);
727 }
728
get_dirtyrate_thread(void * arg)729 void *get_dirtyrate_thread(void *arg)
730 {
731 struct DirtyRateConfig config = *(struct DirtyRateConfig *)arg;
732 int ret;
733 rcu_register_thread();
734
735 ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_UNSTARTED,
736 DIRTY_RATE_STATUS_MEASURING);
737 if (ret == -1) {
738 error_report("change dirtyrate state failed.");
739 return NULL;
740 }
741
742 calculate_dirtyrate(config);
743
744 ret = dirtyrate_set_state(&CalculatingState, DIRTY_RATE_STATUS_MEASURING,
745 DIRTY_RATE_STATUS_MEASURED);
746 if (ret == -1) {
747 error_report("change dirtyrate state failed.");
748 }
749
750 rcu_unregister_thread();
751 return NULL;
752 }
753
qmp_calc_dirty_rate(int64_t calc_time,bool has_calc_time_unit,TimeUnit calc_time_unit,bool has_sample_pages,int64_t sample_pages,bool has_mode,DirtyRateMeasureMode mode,Error ** errp)754 void qmp_calc_dirty_rate(int64_t calc_time,
755 bool has_calc_time_unit,
756 TimeUnit calc_time_unit,
757 bool has_sample_pages,
758 int64_t sample_pages,
759 bool has_mode,
760 DirtyRateMeasureMode mode,
761 Error **errp)
762 {
763 static struct DirtyRateConfig config;
764 QemuThread thread;
765 int ret;
766
767 /*
768 * If the dirty rate is already being measured, don't attempt to start.
769 */
770 if (qatomic_read(&CalculatingState) == DIRTY_RATE_STATUS_MEASURING) {
771 error_setg(errp, "the dirty rate is already being measured.");
772 return;
773 }
774
775 int64_t calc_time_ms = convert_time_unit(
776 calc_time,
777 has_calc_time_unit ? calc_time_unit : TIME_UNIT_SECOND,
778 TIME_UNIT_MILLISECOND
779 );
780
781 if (!is_calc_time_valid(calc_time_ms)) {
782 error_setg(errp, "Calculation time is out of range [%dms, %dms].",
783 MIN_CALC_TIME_MS, MAX_CALC_TIME_MS);
784 return;
785 }
786
787 if (!has_mode) {
788 mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
789 }
790
791 if (has_sample_pages && mode != DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING) {
792 error_setg(errp, "sample-pages is used only in page-sampling mode");
793 return;
794 }
795
796 if (has_sample_pages) {
797 if (!is_sample_pages_valid(sample_pages)) {
798 error_setg(errp, "sample-pages is out of range[%d, %d].",
799 MIN_SAMPLE_PAGE_COUNT,
800 MAX_SAMPLE_PAGE_COUNT);
801 return;
802 }
803 } else {
804 sample_pages = DIRTYRATE_DEFAULT_SAMPLE_PAGES;
805 }
806
807 /*
808 * dirty ring mode only works when kvm dirty ring is enabled.
809 * on the contrary, dirty bitmap mode is not.
810 */
811 if (((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_RING) &&
812 !kvm_dirty_ring_enabled()) ||
813 ((mode == DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP) &&
814 kvm_dirty_ring_enabled())) {
815 error_setg(errp, "mode %s is not enabled, use other method instead.",
816 DirtyRateMeasureMode_str(mode));
817 return;
818 }
819
820 /*
821 * Init calculation state as unstarted.
822 */
823 ret = dirtyrate_set_state(&CalculatingState, CalculatingState,
824 DIRTY_RATE_STATUS_UNSTARTED);
825 if (ret == -1) {
826 error_setg(errp, "init dirty rate calculation state failed.");
827 return;
828 }
829
830 config.calc_time_ms = calc_time_ms;
831 config.sample_pages_per_gigabytes = sample_pages;
832 config.mode = mode;
833
834 cleanup_dirtyrate_stat(config);
835
836 /*
837 * update dirty rate mode so that we can figure out what mode has
838 * been used in last calculation
839 **/
840 dirtyrate_mode = mode;
841
842 init_dirtyrate_stat(config);
843
844 qemu_thread_create(&thread, "get_dirtyrate", get_dirtyrate_thread,
845 (void *)&config, QEMU_THREAD_DETACHED);
846 }
847
848
qmp_query_dirty_rate(bool has_calc_time_unit,TimeUnit calc_time_unit,Error ** errp)849 struct DirtyRateInfo *qmp_query_dirty_rate(bool has_calc_time_unit,
850 TimeUnit calc_time_unit,
851 Error **errp)
852 {
853 return query_dirty_rate_info(
854 has_calc_time_unit ? calc_time_unit : TIME_UNIT_SECOND);
855 }
856
hmp_info_dirty_rate(Monitor * mon,const QDict * qdict)857 void hmp_info_dirty_rate(Monitor *mon, const QDict *qdict)
858 {
859 DirtyRateInfo *info = query_dirty_rate_info(TIME_UNIT_SECOND);
860
861 monitor_printf(mon, "Status: %s\n",
862 DirtyRateStatus_str(info->status));
863 monitor_printf(mon, "Start Time: %"PRIi64" (ms)\n",
864 info->start_time);
865 if (info->mode == DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING) {
866 monitor_printf(mon, "Sample Pages: %"PRIu64" (per GB)\n",
867 info->sample_pages);
868 }
869 monitor_printf(mon, "Period: %"PRIi64" (sec)\n",
870 info->calc_time);
871 monitor_printf(mon, "Mode: %s\n",
872 DirtyRateMeasureMode_str(info->mode));
873 monitor_printf(mon, "Dirty rate: ");
874 if (info->has_dirty_rate) {
875 monitor_printf(mon, "%"PRIi64" (MB/s)\n", info->dirty_rate);
876 if (info->has_vcpu_dirty_rate) {
877 DirtyRateVcpuList *rate, *head = info->vcpu_dirty_rate;
878 for (rate = head; rate != NULL; rate = rate->next) {
879 monitor_printf(mon, "vcpu[%"PRIi64"], Dirty rate: %"PRIi64
880 " (MB/s)\n", rate->value->id,
881 rate->value->dirty_rate);
882 }
883 }
884 } else {
885 monitor_printf(mon, "(not ready)\n");
886 }
887
888 qapi_free_DirtyRateVcpuList(info->vcpu_dirty_rate);
889 g_free(info);
890 }
891
hmp_calc_dirty_rate(Monitor * mon,const QDict * qdict)892 void hmp_calc_dirty_rate(Monitor *mon, const QDict *qdict)
893 {
894 int64_t sec = qdict_get_try_int(qdict, "second", 0);
895 int64_t sample_pages = qdict_get_try_int(qdict, "sample_pages_per_GB", -1);
896 bool has_sample_pages = (sample_pages != -1);
897 bool dirty_ring = qdict_get_try_bool(qdict, "dirty_ring", false);
898 bool dirty_bitmap = qdict_get_try_bool(qdict, "dirty_bitmap", false);
899 DirtyRateMeasureMode mode = DIRTY_RATE_MEASURE_MODE_PAGE_SAMPLING;
900 Error *err = NULL;
901
902 if (!sec) {
903 monitor_printf(mon, "Incorrect period length specified!\n");
904 return;
905 }
906
907 if (dirty_ring && dirty_bitmap) {
908 monitor_printf(mon, "Either dirty ring or dirty bitmap "
909 "can be specified!\n");
910 return;
911 }
912
913 if (dirty_bitmap) {
914 mode = DIRTY_RATE_MEASURE_MODE_DIRTY_BITMAP;
915 } else if (dirty_ring) {
916 mode = DIRTY_RATE_MEASURE_MODE_DIRTY_RING;
917 }
918
919 qmp_calc_dirty_rate(sec, /* calc-time */
920 false, TIME_UNIT_SECOND, /* calc-time-unit */
921 has_sample_pages, sample_pages,
922 true, mode,
923 &err);
924 if (err) {
925 hmp_handle_error(mon, err);
926 return;
927 }
928
929 monitor_printf(mon, "Starting dirty rate measurement with period %"PRIi64
930 " seconds\n", sec);
931 monitor_printf(mon, "[Please use 'info dirty_rate' to check results]\n");
932 }
933