xref: /qemu/include/exec/ram_addr.h (revision ab9056ff)
1 /*
2  * Declarations for cpu physical memory functions
3  *
4  * Copyright 2011 Red Hat, Inc. and/or its affiliates
5  *
6  * Authors:
7  *  Avi Kivity <avi@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or
10  * later.  See the COPYING file in the top-level directory.
11  *
12  */
13 
14 /*
15  * This header is for use by exec.c and memory.c ONLY.  Do not include it.
16  * The functions declared here will be removed soon.
17  */
18 
19 #ifndef RAM_ADDR_H
20 #define RAM_ADDR_H
21 
22 #ifndef CONFIG_USER_ONLY
23 #include "cpu.h"
24 #include "hw/xen/xen.h"
25 #include "sysemu/tcg.h"
26 #include "exec/ramlist.h"
27 
28 struct RAMBlock {
29     struct rcu_head rcu;
30     struct MemoryRegion *mr;
31     uint8_t *host;
32     uint8_t *colo_cache; /* For colo, VM's ram cache */
33     ram_addr_t offset;
34     ram_addr_t used_length;
35     ram_addr_t max_length;
36     void (*resized)(const char*, uint64_t length, void *host);
37     uint32_t flags;
38     /* Protected by iothread lock.  */
39     char idstr[256];
40     /* RCU-enabled, writes protected by the ramlist lock */
41     QLIST_ENTRY(RAMBlock) next;
42     QLIST_HEAD(, RAMBlockNotifier) ramblock_notifiers;
43     int fd;
44     size_t page_size;
45     /* dirty bitmap used during migration */
46     unsigned long *bmap;
47     /* bitmap of already received pages in postcopy */
48     unsigned long *receivedmap;
49 
50     /*
51      * bitmap to track already cleared dirty bitmap.  When the bit is
52      * set, it means the corresponding memory chunk needs a log-clear.
53      * Set this up to non-NULL to enable the capability to postpone
54      * and split clearing of dirty bitmap on the remote node (e.g.,
55      * KVM).  The bitmap will be set only when doing global sync.
56      *
57      * NOTE: this bitmap is different comparing to the other bitmaps
58      * in that one bit can represent multiple guest pages (which is
59      * decided by the `clear_bmap_shift' variable below).  On
60      * destination side, this should always be NULL, and the variable
61      * `clear_bmap_shift' is meaningless.
62      */
63     unsigned long *clear_bmap;
64     uint8_t clear_bmap_shift;
65 };
66 
67 /**
68  * clear_bmap_size: calculate clear bitmap size
69  *
70  * @pages: number of guest pages
71  * @shift: guest page number shift
72  *
73  * Returns: number of bits for the clear bitmap
74  */
75 static inline long clear_bmap_size(uint64_t pages, uint8_t shift)
76 {
77     return DIV_ROUND_UP(pages, 1UL << shift);
78 }
79 
80 /**
81  * clear_bmap_set: set clear bitmap for the page range
82  *
83  * @rb: the ramblock to operate on
84  * @start: the start page number
85  * @size: number of pages to set in the bitmap
86  *
87  * Returns: None
88  */
89 static inline void clear_bmap_set(RAMBlock *rb, uint64_t start,
90                                   uint64_t npages)
91 {
92     uint8_t shift = rb->clear_bmap_shift;
93 
94     bitmap_set_atomic(rb->clear_bmap, start >> shift,
95                       clear_bmap_size(npages, shift));
96 }
97 
98 /**
99  * clear_bmap_test_and_clear: test clear bitmap for the page, clear if set
100  *
101  * @rb: the ramblock to operate on
102  * @page: the page number to check
103  *
104  * Returns: true if the bit was set, false otherwise
105  */
106 static inline bool clear_bmap_test_and_clear(RAMBlock *rb, uint64_t page)
107 {
108     uint8_t shift = rb->clear_bmap_shift;
109 
110     return bitmap_test_and_clear_atomic(rb->clear_bmap, page >> shift, 1);
111 }
112 
113 static inline bool offset_in_ramblock(RAMBlock *b, ram_addr_t offset)
114 {
115     return (b && b->host && offset < b->used_length) ? true : false;
116 }
117 
118 static inline void *ramblock_ptr(RAMBlock *block, ram_addr_t offset)
119 {
120     assert(offset_in_ramblock(block, offset));
121     return (char *)block->host + offset;
122 }
123 
124 static inline unsigned long int ramblock_recv_bitmap_offset(void *host_addr,
125                                                             RAMBlock *rb)
126 {
127     uint64_t host_addr_offset =
128             (uint64_t)(uintptr_t)(host_addr - (void *)rb->host);
129     return host_addr_offset >> TARGET_PAGE_BITS;
130 }
131 
132 bool ramblock_is_pmem(RAMBlock *rb);
133 
134 long qemu_minrampagesize(void);
135 long qemu_maxrampagesize(void);
136 
137 /**
138  * qemu_ram_alloc_from_file,
139  * qemu_ram_alloc_from_fd:  Allocate a ram block from the specified backing
140  *                          file or device
141  *
142  * Parameters:
143  *  @size: the size in bytes of the ram block
144  *  @mr: the memory region where the ram block is
145  *  @ram_flags: specify the properties of the ram block, which can be one
146  *              or bit-or of following values
147  *              - RAM_SHARED: mmap the backing file or device with MAP_SHARED
148  *              - RAM_PMEM: the backend @mem_path or @fd is persistent memory
149  *              Other bits are ignored.
150  *  @mem_path or @fd: specify the backing file or device
151  *  @errp: pointer to Error*, to store an error if it happens
152  *
153  * Return:
154  *  On success, return a pointer to the ram block.
155  *  On failure, return NULL.
156  */
157 RAMBlock *qemu_ram_alloc_from_file(ram_addr_t size, MemoryRegion *mr,
158                                    uint32_t ram_flags, const char *mem_path,
159                                    Error **errp);
160 RAMBlock *qemu_ram_alloc_from_fd(ram_addr_t size, MemoryRegion *mr,
161                                  uint32_t ram_flags, int fd,
162                                  Error **errp);
163 
164 RAMBlock *qemu_ram_alloc_from_ptr(ram_addr_t size, void *host,
165                                   MemoryRegion *mr, Error **errp);
166 RAMBlock *qemu_ram_alloc(ram_addr_t size, bool share, MemoryRegion *mr,
167                          Error **errp);
168 RAMBlock *qemu_ram_alloc_resizeable(ram_addr_t size, ram_addr_t max_size,
169                                     void (*resized)(const char*,
170                                                     uint64_t length,
171                                                     void *host),
172                                     MemoryRegion *mr, Error **errp);
173 void qemu_ram_free(RAMBlock *block);
174 
175 int qemu_ram_resize(RAMBlock *block, ram_addr_t newsize, Error **errp);
176 
177 #define DIRTY_CLIENTS_ALL     ((1 << DIRTY_MEMORY_NUM) - 1)
178 #define DIRTY_CLIENTS_NOCODE  (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE))
179 
180 void tb_invalidate_phys_range(ram_addr_t start, ram_addr_t end);
181 
182 static inline bool cpu_physical_memory_get_dirty(ram_addr_t start,
183                                                  ram_addr_t length,
184                                                  unsigned client)
185 {
186     DirtyMemoryBlocks *blocks;
187     unsigned long end, page;
188     unsigned long idx, offset, base;
189     bool dirty = false;
190 
191     assert(client < DIRTY_MEMORY_NUM);
192 
193     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
194     page = start >> TARGET_PAGE_BITS;
195 
196     WITH_RCU_READ_LOCK_GUARD() {
197         blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
198 
199         idx = page / DIRTY_MEMORY_BLOCK_SIZE;
200         offset = page % DIRTY_MEMORY_BLOCK_SIZE;
201         base = page - offset;
202         while (page < end) {
203             unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
204             unsigned long num = next - base;
205             unsigned long found = find_next_bit(blocks->blocks[idx],
206                                                 num, offset);
207             if (found < num) {
208                 dirty = true;
209                 break;
210             }
211 
212             page = next;
213             idx++;
214             offset = 0;
215             base += DIRTY_MEMORY_BLOCK_SIZE;
216         }
217     }
218 
219     return dirty;
220 }
221 
222 static inline bool cpu_physical_memory_all_dirty(ram_addr_t start,
223                                                  ram_addr_t length,
224                                                  unsigned client)
225 {
226     DirtyMemoryBlocks *blocks;
227     unsigned long end, page;
228     unsigned long idx, offset, base;
229     bool dirty = true;
230 
231     assert(client < DIRTY_MEMORY_NUM);
232 
233     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
234     page = start >> TARGET_PAGE_BITS;
235 
236     RCU_READ_LOCK_GUARD();
237 
238     blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
239 
240     idx = page / DIRTY_MEMORY_BLOCK_SIZE;
241     offset = page % DIRTY_MEMORY_BLOCK_SIZE;
242     base = page - offset;
243     while (page < end) {
244         unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
245         unsigned long num = next - base;
246         unsigned long found = find_next_zero_bit(blocks->blocks[idx], num, offset);
247         if (found < num) {
248             dirty = false;
249             break;
250         }
251 
252         page = next;
253         idx++;
254         offset = 0;
255         base += DIRTY_MEMORY_BLOCK_SIZE;
256     }
257 
258     return dirty;
259 }
260 
261 static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr,
262                                                       unsigned client)
263 {
264     return cpu_physical_memory_get_dirty(addr, 1, client);
265 }
266 
267 static inline bool cpu_physical_memory_is_clean(ram_addr_t addr)
268 {
269     bool vga = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_VGA);
270     bool code = cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_CODE);
271     bool migration =
272         cpu_physical_memory_get_dirty_flag(addr, DIRTY_MEMORY_MIGRATION);
273     return !(vga && code && migration);
274 }
275 
276 static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start,
277                                                                ram_addr_t length,
278                                                                uint8_t mask)
279 {
280     uint8_t ret = 0;
281 
282     if (mask & (1 << DIRTY_MEMORY_VGA) &&
283         !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) {
284         ret |= (1 << DIRTY_MEMORY_VGA);
285     }
286     if (mask & (1 << DIRTY_MEMORY_CODE) &&
287         !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) {
288         ret |= (1 << DIRTY_MEMORY_CODE);
289     }
290     if (mask & (1 << DIRTY_MEMORY_MIGRATION) &&
291         !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) {
292         ret |= (1 << DIRTY_MEMORY_MIGRATION);
293     }
294     return ret;
295 }
296 
297 static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr,
298                                                       unsigned client)
299 {
300     unsigned long page, idx, offset;
301     DirtyMemoryBlocks *blocks;
302 
303     assert(client < DIRTY_MEMORY_NUM);
304 
305     page = addr >> TARGET_PAGE_BITS;
306     idx = page / DIRTY_MEMORY_BLOCK_SIZE;
307     offset = page % DIRTY_MEMORY_BLOCK_SIZE;
308 
309     RCU_READ_LOCK_GUARD();
310 
311     blocks = atomic_rcu_read(&ram_list.dirty_memory[client]);
312 
313     set_bit_atomic(offset, blocks->blocks[idx]);
314 }
315 
316 static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start,
317                                                        ram_addr_t length,
318                                                        uint8_t mask)
319 {
320     DirtyMemoryBlocks *blocks[DIRTY_MEMORY_NUM];
321     unsigned long end, page;
322     unsigned long idx, offset, base;
323     int i;
324 
325     if (!mask && !xen_enabled()) {
326         return;
327     }
328 
329     end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS;
330     page = start >> TARGET_PAGE_BITS;
331 
332     WITH_RCU_READ_LOCK_GUARD() {
333         for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
334             blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i]);
335         }
336 
337         idx = page / DIRTY_MEMORY_BLOCK_SIZE;
338         offset = page % DIRTY_MEMORY_BLOCK_SIZE;
339         base = page - offset;
340         while (page < end) {
341             unsigned long next = MIN(end, base + DIRTY_MEMORY_BLOCK_SIZE);
342 
343             if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) {
344                 bitmap_set_atomic(blocks[DIRTY_MEMORY_MIGRATION]->blocks[idx],
345                                   offset, next - page);
346             }
347             if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) {
348                 bitmap_set_atomic(blocks[DIRTY_MEMORY_VGA]->blocks[idx],
349                                   offset, next - page);
350             }
351             if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) {
352                 bitmap_set_atomic(blocks[DIRTY_MEMORY_CODE]->blocks[idx],
353                                   offset, next - page);
354             }
355 
356             page = next;
357             idx++;
358             offset = 0;
359             base += DIRTY_MEMORY_BLOCK_SIZE;
360         }
361     }
362 
363     xen_hvm_modified_memory(start, length);
364 }
365 
366 #if !defined(_WIN32)
367 static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap,
368                                                           ram_addr_t start,
369                                                           ram_addr_t pages)
370 {
371     unsigned long i, j;
372     unsigned long page_number, c;
373     hwaddr addr;
374     ram_addr_t ram_addr;
375     unsigned long len = (pages + HOST_LONG_BITS - 1) / HOST_LONG_BITS;
376     unsigned long hpratio = qemu_real_host_page_size / TARGET_PAGE_SIZE;
377     unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
378 
379     /* start address is aligned at the start of a word? */
380     if ((((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) &&
381         (hpratio == 1)) {
382         unsigned long **blocks[DIRTY_MEMORY_NUM];
383         unsigned long idx;
384         unsigned long offset;
385         long k;
386         long nr = BITS_TO_LONGS(pages);
387 
388         idx = (start >> TARGET_PAGE_BITS) / DIRTY_MEMORY_BLOCK_SIZE;
389         offset = BIT_WORD((start >> TARGET_PAGE_BITS) %
390                           DIRTY_MEMORY_BLOCK_SIZE);
391 
392         WITH_RCU_READ_LOCK_GUARD() {
393             for (i = 0; i < DIRTY_MEMORY_NUM; i++) {
394                 blocks[i] = atomic_rcu_read(&ram_list.dirty_memory[i])->blocks;
395             }
396 
397             for (k = 0; k < nr; k++) {
398                 if (bitmap[k]) {
399                     unsigned long temp = leul_to_cpu(bitmap[k]);
400 
401                     atomic_or(&blocks[DIRTY_MEMORY_VGA][idx][offset], temp);
402 
403                     if (global_dirty_log) {
404                         atomic_or(&blocks[DIRTY_MEMORY_MIGRATION][idx][offset],
405                                   temp);
406                     }
407 
408                     if (tcg_enabled()) {
409                         atomic_or(&blocks[DIRTY_MEMORY_CODE][idx][offset],
410                                   temp);
411                     }
412                 }
413 
414                 if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) {
415                     offset = 0;
416                     idx++;
417                 }
418             }
419         }
420 
421         xen_hvm_modified_memory(start, pages << TARGET_PAGE_BITS);
422     } else {
423         uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE;
424 
425         if (!global_dirty_log) {
426             clients &= ~(1 << DIRTY_MEMORY_MIGRATION);
427         }
428 
429         /*
430          * bitmap-traveling is faster than memory-traveling (for addr...)
431          * especially when most of the memory is not dirty.
432          */
433         for (i = 0; i < len; i++) {
434             if (bitmap[i] != 0) {
435                 c = leul_to_cpu(bitmap[i]);
436                 do {
437                     j = ctzl(c);
438                     c &= ~(1ul << j);
439                     page_number = (i * HOST_LONG_BITS + j) * hpratio;
440                     addr = page_number * TARGET_PAGE_SIZE;
441                     ram_addr = start + addr;
442                     cpu_physical_memory_set_dirty_range(ram_addr,
443                                        TARGET_PAGE_SIZE * hpratio, clients);
444                 } while (c != 0);
445             }
446         }
447     }
448 }
449 #endif /* not _WIN32 */
450 
451 bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start,
452                                               ram_addr_t length,
453                                               unsigned client);
454 
455 DirtyBitmapSnapshot *cpu_physical_memory_snapshot_and_clear_dirty
456     (MemoryRegion *mr, hwaddr offset, hwaddr length, unsigned client);
457 
458 bool cpu_physical_memory_snapshot_get_dirty(DirtyBitmapSnapshot *snap,
459                                             ram_addr_t start,
460                                             ram_addr_t length);
461 
462 static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start,
463                                                          ram_addr_t length)
464 {
465     cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION);
466     cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA);
467     cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE);
468 }
469 
470 
471 /* Called with RCU critical section */
472 static inline
473 uint64_t cpu_physical_memory_sync_dirty_bitmap(RAMBlock *rb,
474                                                ram_addr_t start,
475                                                ram_addr_t length,
476                                                uint64_t *real_dirty_pages)
477 {
478     ram_addr_t addr;
479     unsigned long word = BIT_WORD((start + rb->offset) >> TARGET_PAGE_BITS);
480     uint64_t num_dirty = 0;
481     unsigned long *dest = rb->bmap;
482 
483     /* start address and length is aligned at the start of a word? */
484     if (((word * BITS_PER_LONG) << TARGET_PAGE_BITS) ==
485          (start + rb->offset) &&
486         !(length & ((BITS_PER_LONG << TARGET_PAGE_BITS) - 1))) {
487         int k;
488         int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS);
489         unsigned long * const *src;
490         unsigned long idx = (word * BITS_PER_LONG) / DIRTY_MEMORY_BLOCK_SIZE;
491         unsigned long offset = BIT_WORD((word * BITS_PER_LONG) %
492                                         DIRTY_MEMORY_BLOCK_SIZE);
493         unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS);
494 
495         src = atomic_rcu_read(
496                 &ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION])->blocks;
497 
498         for (k = page; k < page + nr; k++) {
499             if (src[idx][offset]) {
500                 unsigned long bits = atomic_xchg(&src[idx][offset], 0);
501                 unsigned long new_dirty;
502                 *real_dirty_pages += ctpopl(bits);
503                 new_dirty = ~dest[k];
504                 dest[k] |= bits;
505                 new_dirty &= bits;
506                 num_dirty += ctpopl(new_dirty);
507             }
508 
509             if (++offset >= BITS_TO_LONGS(DIRTY_MEMORY_BLOCK_SIZE)) {
510                 offset = 0;
511                 idx++;
512             }
513         }
514 
515         if (rb->clear_bmap) {
516             /*
517              * Postpone the dirty bitmap clear to the point before we
518              * really send the pages, also we will split the clear
519              * dirty procedure into smaller chunks.
520              */
521             clear_bmap_set(rb, start >> TARGET_PAGE_BITS,
522                            length >> TARGET_PAGE_BITS);
523         } else {
524             /* Slow path - still do that in a huge chunk */
525             memory_region_clear_dirty_bitmap(rb->mr, start, length);
526         }
527     } else {
528         ram_addr_t offset = rb->offset;
529 
530         for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) {
531             if (cpu_physical_memory_test_and_clear_dirty(
532                         start + addr + offset,
533                         TARGET_PAGE_SIZE,
534                         DIRTY_MEMORY_MIGRATION)) {
535                 *real_dirty_pages += 1;
536                 long k = (start + addr) >> TARGET_PAGE_BITS;
537                 if (!test_and_set_bit(k, dest)) {
538                     num_dirty++;
539                 }
540             }
541         }
542     }
543 
544     return num_dirty;
545 }
546 #endif
547 #endif
548