xref: /qemu/hw/mem/memory-device.c (revision 766aa0a6)
1 /*
2  * Memory Device Interface
3  *
4  * Copyright ProfitBricks GmbH 2012
5  * Copyright (C) 2014 Red Hat Inc
6  * Copyright (c) 2018 Red Hat Inc
7  *
8  * This work is licensed under the terms of the GNU GPL, version 2 or later.
9  * See the COPYING file in the top-level directory.
10  */
11 
12 #include "qemu/osdep.h"
13 #include "qemu/error-report.h"
14 #include "hw/mem/memory-device.h"
15 #include "qapi/error.h"
16 #include "hw/boards.h"
17 #include "qemu/range.h"
18 #include "hw/virtio/vhost.h"
19 #include "sysemu/kvm.h"
20 #include "exec/address-spaces.h"
21 #include "trace.h"
22 
23 static gint memory_device_addr_sort(gconstpointer a, gconstpointer b)
24 {
25     const MemoryDeviceState *md_a = MEMORY_DEVICE(a);
26     const MemoryDeviceState *md_b = MEMORY_DEVICE(b);
27     const MemoryDeviceClass *mdc_a = MEMORY_DEVICE_GET_CLASS(a);
28     const MemoryDeviceClass *mdc_b = MEMORY_DEVICE_GET_CLASS(b);
29     const uint64_t addr_a = mdc_a->get_addr(md_a);
30     const uint64_t addr_b = mdc_b->get_addr(md_b);
31 
32     if (addr_a > addr_b) {
33         return 1;
34     } else if (addr_a < addr_b) {
35         return -1;
36     }
37     return 0;
38 }
39 
40 static int memory_device_build_list(Object *obj, void *opaque)
41 {
42     GSList **list = opaque;
43 
44     if (object_dynamic_cast(obj, TYPE_MEMORY_DEVICE)) {
45         DeviceState *dev = DEVICE(obj);
46         if (dev->realized) { /* only realized memory devices matter */
47             *list = g_slist_insert_sorted(*list, dev, memory_device_addr_sort);
48         }
49     }
50 
51     object_child_foreach(obj, memory_device_build_list, opaque);
52     return 0;
53 }
54 
55 static unsigned int memory_device_get_memslots(MemoryDeviceState *md)
56 {
57     const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
58 
59     if (mdc->get_memslots) {
60         return mdc->get_memslots(md);
61     }
62     return 1;
63 }
64 
65 /*
66  * Memslots that are reserved by memory devices (required but still reported
67  * as free from KVM / vhost).
68  */
69 static unsigned int get_reserved_memslots(MachineState *ms)
70 {
71     if (ms->device_memory->used_memslots >
72         ms->device_memory->required_memslots) {
73         /* This is unexpected, and we warned already in the memory notifier. */
74         return 0;
75     }
76     return ms->device_memory->required_memslots -
77            ms->device_memory->used_memslots;
78 }
79 
80 unsigned int memory_devices_get_reserved_memslots(void)
81 {
82     if (!current_machine->device_memory) {
83         return 0;
84     }
85     return get_reserved_memslots(current_machine);
86 }
87 
88 static void memory_device_check_addable(MachineState *ms, MemoryDeviceState *md,
89                                         MemoryRegion *mr, Error **errp)
90 {
91     const uint64_t used_region_size = ms->device_memory->used_region_size;
92     const uint64_t size = memory_region_size(mr);
93     const unsigned int required_memslots = memory_device_get_memslots(md);
94     const unsigned int reserved_memslots = get_reserved_memslots(ms);
95 
96     /* we will need memory slots for kvm and vhost */
97     if (kvm_enabled() &&
98         kvm_get_free_memslots() < required_memslots + reserved_memslots) {
99         error_setg(errp, "hypervisor has not enough free memory slots left");
100         return;
101     }
102     if (vhost_get_free_memslots() < required_memslots + reserved_memslots) {
103         error_setg(errp, "a used vhost backend has not enough free memory slots left");
104         return;
105     }
106 
107     /* will we exceed the total amount of memory specified */
108     if (used_region_size + size < used_region_size ||
109         used_region_size + size > ms->maxram_size - ms->ram_size) {
110         error_setg(errp, "not enough space, currently 0x%" PRIx64
111                    " in use of total space for memory devices 0x" RAM_ADDR_FMT,
112                    used_region_size, ms->maxram_size - ms->ram_size);
113         return;
114     }
115 
116 }
117 
118 static uint64_t memory_device_get_free_addr(MachineState *ms,
119                                             const uint64_t *hint,
120                                             uint64_t align, uint64_t size,
121                                             Error **errp)
122 {
123     GSList *list = NULL, *item;
124     Range as, new = range_empty;
125 
126     range_init_nofail(&as, ms->device_memory->base,
127                       memory_region_size(&ms->device_memory->mr));
128 
129     /* start of address space indicates the maximum alignment we expect */
130     if (!QEMU_IS_ALIGNED(range_lob(&as), align)) {
131         warn_report("the alignment (0x%" PRIx64 ") exceeds the expected"
132                     " maximum alignment, memory will get fragmented and not"
133                     " all 'maxmem' might be usable for memory devices.",
134                     align);
135     }
136 
137     if (hint && !QEMU_IS_ALIGNED(*hint, align)) {
138         error_setg(errp, "address must be aligned to 0x%" PRIx64 " bytes",
139                    align);
140         return 0;
141     }
142 
143     if (!QEMU_IS_ALIGNED(size, align)) {
144         error_setg(errp, "backend memory size must be multiple of 0x%"
145                    PRIx64, align);
146         return 0;
147     }
148 
149     if (hint) {
150         if (range_init(&new, *hint, size) || !range_contains_range(&as, &new)) {
151             error_setg(errp, "can't add memory device [0x%" PRIx64 ":0x%" PRIx64
152                        "], usable range for memory devices [0x%" PRIx64 ":0x%"
153                        PRIx64 "]", *hint, size, range_lob(&as),
154                        range_size(&as));
155             return 0;
156         }
157     } else {
158         if (range_init(&new, QEMU_ALIGN_UP(range_lob(&as), align), size)) {
159             error_setg(errp, "can't add memory device, device too big");
160             return 0;
161         }
162     }
163 
164     /* find address range that will fit new memory device */
165     object_child_foreach(OBJECT(ms), memory_device_build_list, &list);
166     for (item = list; item; item = g_slist_next(item)) {
167         const MemoryDeviceState *md = item->data;
168         const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(OBJECT(md));
169         uint64_t next_addr;
170         Range tmp;
171 
172         range_init_nofail(&tmp, mdc->get_addr(md),
173                           memory_device_get_region_size(md, &error_abort));
174 
175         if (range_overlaps_range(&tmp, &new)) {
176             if (hint) {
177                 const DeviceState *d = DEVICE(md);
178                 error_setg(errp, "address range conflicts with memory device"
179                            " id='%s'", d->id ? d->id : "(unnamed)");
180                 goto out;
181             }
182 
183             next_addr = QEMU_ALIGN_UP(range_upb(&tmp) + 1, align);
184             if (!next_addr || range_init(&new, next_addr, range_size(&new))) {
185                 range_make_empty(&new);
186                 break;
187             }
188         } else if (range_lob(&tmp) > range_upb(&new)) {
189             break;
190         }
191     }
192 
193     if (!range_contains_range(&as, &new)) {
194         error_setg(errp, "could not find position in guest address space for "
195                    "memory device - memory fragmented due to alignments");
196     }
197 out:
198     g_slist_free(list);
199     return range_lob(&new);
200 }
201 
202 MemoryDeviceInfoList *qmp_memory_device_list(void)
203 {
204     GSList *devices = NULL, *item;
205     MemoryDeviceInfoList *list = NULL, **tail = &list;
206 
207     object_child_foreach(qdev_get_machine(), memory_device_build_list,
208                          &devices);
209 
210     for (item = devices; item; item = g_slist_next(item)) {
211         const MemoryDeviceState *md = MEMORY_DEVICE(item->data);
212         const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(item->data);
213         MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1);
214 
215         mdc->fill_device_info(md, info);
216 
217         QAPI_LIST_APPEND(tail, info);
218     }
219 
220     g_slist_free(devices);
221 
222     return list;
223 }
224 
225 static int memory_device_plugged_size(Object *obj, void *opaque)
226 {
227     uint64_t *size = opaque;
228 
229     if (object_dynamic_cast(obj, TYPE_MEMORY_DEVICE)) {
230         const DeviceState *dev = DEVICE(obj);
231         const MemoryDeviceState *md = MEMORY_DEVICE(obj);
232         const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(obj);
233 
234         if (dev->realized) {
235             *size += mdc->get_plugged_size(md, &error_abort);
236         }
237     }
238 
239     object_child_foreach(obj, memory_device_plugged_size, opaque);
240     return 0;
241 }
242 
243 uint64_t get_plugged_memory_size(void)
244 {
245     uint64_t size = 0;
246 
247     memory_device_plugged_size(qdev_get_machine(), &size);
248 
249     return size;
250 }
251 
252 void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms,
253                             const uint64_t *legacy_align, Error **errp)
254 {
255     const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
256     Error *local_err = NULL;
257     uint64_t addr, align = 0;
258     MemoryRegion *mr;
259 
260     if (!ms->device_memory) {
261         error_setg(errp, "the configuration is not prepared for memory devices"
262                          " (e.g., for memory hotplug), consider specifying the"
263                          " maxmem option");
264         return;
265     }
266 
267     mr = mdc->get_memory_region(md, &local_err);
268     if (local_err) {
269         goto out;
270     }
271 
272     memory_device_check_addable(ms, md, mr, &local_err);
273     if (local_err) {
274         goto out;
275     }
276 
277     if (legacy_align) {
278         align = *legacy_align;
279     } else {
280         if (mdc->get_min_alignment) {
281             align = mdc->get_min_alignment(md);
282         }
283         align = MAX(align, memory_region_get_alignment(mr));
284     }
285     addr = mdc->get_addr(md);
286     addr = memory_device_get_free_addr(ms, !addr ? NULL : &addr, align,
287                                        memory_region_size(mr), &local_err);
288     if (local_err) {
289         goto out;
290     }
291     mdc->set_addr(md, addr, &local_err);
292     if (!local_err) {
293         trace_memory_device_pre_plug(DEVICE(md)->id ? DEVICE(md)->id : "",
294                                      addr);
295     }
296 out:
297     error_propagate(errp, local_err);
298 }
299 
300 void memory_device_plug(MemoryDeviceState *md, MachineState *ms)
301 {
302     const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
303     const uint64_t addr = mdc->get_addr(md);
304     MemoryRegion *mr;
305 
306     /*
307      * We expect that a previous call to memory_device_pre_plug() succeeded, so
308      * it can't fail at this point.
309      */
310     mr = mdc->get_memory_region(md, &error_abort);
311     g_assert(ms->device_memory);
312 
313     ms->device_memory->used_region_size += memory_region_size(mr);
314     ms->device_memory->required_memslots += memory_device_get_memslots(md);
315     memory_region_add_subregion(&ms->device_memory->mr,
316                                 addr - ms->device_memory->base, mr);
317     trace_memory_device_plug(DEVICE(md)->id ? DEVICE(md)->id : "", addr);
318 }
319 
320 void memory_device_unplug(MemoryDeviceState *md, MachineState *ms)
321 {
322     const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
323     MemoryRegion *mr;
324 
325     /*
326      * We expect that a previous call to memory_device_pre_plug() succeeded, so
327      * it can't fail at this point.
328      */
329     mr = mdc->get_memory_region(md, &error_abort);
330     g_assert(ms->device_memory);
331 
332     memory_region_del_subregion(&ms->device_memory->mr, mr);
333     ms->device_memory->used_region_size -= memory_region_size(mr);
334     ms->device_memory->required_memslots -= memory_device_get_memslots(md);
335     trace_memory_device_unplug(DEVICE(md)->id ? DEVICE(md)->id : "",
336                                mdc->get_addr(md));
337 }
338 
339 uint64_t memory_device_get_region_size(const MemoryDeviceState *md,
340                                        Error **errp)
341 {
342     const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md);
343     MemoryRegion *mr;
344 
345     /* dropping const here is fine as we don't touch the memory region */
346     mr = mdc->get_memory_region((MemoryDeviceState *)md, errp);
347     if (!mr) {
348         return 0;
349     }
350 
351     return memory_region_size(mr);
352 }
353 
354 static void memory_devices_region_mod(MemoryListener *listener,
355                                       MemoryRegionSection *mrs, bool add)
356 {
357     DeviceMemoryState *dms = container_of(listener, DeviceMemoryState,
358                                           listener);
359 
360     if (!memory_region_is_ram(mrs->mr)) {
361         warn_report("Unexpected memory region mapped into device memory region.");
362         return;
363     }
364 
365     /*
366      * The expectation is that each distinct RAM memory region section in
367      * our region for memory devices consumes exactly one memslot in KVM
368      * and in vhost. For vhost, this is true, except:
369      * * ROM memory regions don't consume a memslot. These get used very
370      *   rarely for memory devices (R/O NVDIMMs).
371      * * Memslots without a fd (memory-backend-ram) don't necessarily
372      *   consume a memslot. Such setups are quite rare and possibly bogus:
373      *   the memory would be inaccessible by such vhost devices.
374      *
375      * So for vhost, in corner cases we might over-estimate the number of
376      * memslots that are currently used or that might still be reserved
377      * (required - used).
378      */
379     dms->used_memslots += add ? 1 : -1;
380 
381     if (dms->used_memslots > dms->required_memslots) {
382         warn_report("Memory devices use more memory slots than indicated as required.");
383     }
384 }
385 
386 static void memory_devices_region_add(MemoryListener *listener,
387                                       MemoryRegionSection *mrs)
388 {
389     return memory_devices_region_mod(listener, mrs, true);
390 }
391 
392 static void memory_devices_region_del(MemoryListener *listener,
393                                       MemoryRegionSection *mrs)
394 {
395     return memory_devices_region_mod(listener, mrs, false);
396 }
397 
398 void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size)
399 {
400     g_assert(size);
401     g_assert(!ms->device_memory);
402     ms->device_memory = g_new0(DeviceMemoryState, 1);
403     ms->device_memory->base = base;
404 
405     memory_region_init(&ms->device_memory->mr, OBJECT(ms), "device-memory",
406                        size);
407     address_space_init(&ms->device_memory->as, &ms->device_memory->mr,
408                        "device-memory");
409     memory_region_add_subregion(get_system_memory(), ms->device_memory->base,
410                                 &ms->device_memory->mr);
411 
412     /* Track the number of memslots used by memory devices. */
413     ms->device_memory->listener.region_add = memory_devices_region_add;
414     ms->device_memory->listener.region_del = memory_devices_region_del;
415     memory_listener_register(&ms->device_memory->listener,
416                              &ms->device_memory->as);
417 }
418 
419 static const TypeInfo memory_device_info = {
420     .name          = TYPE_MEMORY_DEVICE,
421     .parent        = TYPE_INTERFACE,
422     .class_size = sizeof(MemoryDeviceClass),
423 };
424 
425 static void memory_device_register_types(void)
426 {
427     type_register_static(&memory_device_info);
428 }
429 
430 type_init(memory_device_register_types)
431