1 /* 2 * Memory Device Interface 3 * 4 * Copyright ProfitBricks GmbH 2012 5 * Copyright (C) 2014 Red Hat Inc 6 * Copyright (c) 2018 Red Hat Inc 7 * 8 * This work is licensed under the terms of the GNU GPL, version 2 or later. 9 * See the COPYING file in the top-level directory. 10 */ 11 12 #include "qemu/osdep.h" 13 #include "qemu/error-report.h" 14 #include "hw/mem/memory-device.h" 15 #include "qapi/error.h" 16 #include "hw/boards.h" 17 #include "qemu/range.h" 18 #include "hw/virtio/vhost.h" 19 #include "sysemu/kvm.h" 20 #include "exec/address-spaces.h" 21 #include "trace.h" 22 23 static gint memory_device_addr_sort(gconstpointer a, gconstpointer b) 24 { 25 const MemoryDeviceState *md_a = MEMORY_DEVICE(a); 26 const MemoryDeviceState *md_b = MEMORY_DEVICE(b); 27 const MemoryDeviceClass *mdc_a = MEMORY_DEVICE_GET_CLASS(a); 28 const MemoryDeviceClass *mdc_b = MEMORY_DEVICE_GET_CLASS(b); 29 const uint64_t addr_a = mdc_a->get_addr(md_a); 30 const uint64_t addr_b = mdc_b->get_addr(md_b); 31 32 if (addr_a > addr_b) { 33 return 1; 34 } else if (addr_a < addr_b) { 35 return -1; 36 } 37 return 0; 38 } 39 40 static int memory_device_build_list(Object *obj, void *opaque) 41 { 42 GSList **list = opaque; 43 44 if (object_dynamic_cast(obj, TYPE_MEMORY_DEVICE)) { 45 DeviceState *dev = DEVICE(obj); 46 if (dev->realized) { /* only realized memory devices matter */ 47 *list = g_slist_insert_sorted(*list, dev, memory_device_addr_sort); 48 } 49 } 50 51 object_child_foreach(obj, memory_device_build_list, opaque); 52 return 0; 53 } 54 55 static unsigned int memory_device_get_memslots(MemoryDeviceState *md) 56 { 57 const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md); 58 59 if (mdc->get_memslots) { 60 return mdc->get_memslots(md); 61 } 62 return 1; 63 } 64 65 /* 66 * Memslots that are reserved by memory devices (required but still reported 67 * as free from KVM / vhost). 68 */ 69 static unsigned int get_reserved_memslots(MachineState *ms) 70 { 71 if (ms->device_memory->used_memslots > 72 ms->device_memory->required_memslots) { 73 /* This is unexpected, and we warned already in the memory notifier. */ 74 return 0; 75 } 76 return ms->device_memory->required_memslots - 77 ms->device_memory->used_memslots; 78 } 79 80 unsigned int memory_devices_get_reserved_memslots(void) 81 { 82 if (!current_machine->device_memory) { 83 return 0; 84 } 85 return get_reserved_memslots(current_machine); 86 } 87 88 static void memory_device_check_addable(MachineState *ms, MemoryDeviceState *md, 89 MemoryRegion *mr, Error **errp) 90 { 91 const uint64_t used_region_size = ms->device_memory->used_region_size; 92 const uint64_t size = memory_region_size(mr); 93 const unsigned int required_memslots = memory_device_get_memslots(md); 94 const unsigned int reserved_memslots = get_reserved_memslots(ms); 95 96 /* we will need memory slots for kvm and vhost */ 97 if (kvm_enabled() && 98 kvm_get_free_memslots() < required_memslots + reserved_memslots) { 99 error_setg(errp, "hypervisor has not enough free memory slots left"); 100 return; 101 } 102 if (vhost_get_free_memslots() < required_memslots + reserved_memslots) { 103 error_setg(errp, "a used vhost backend has not enough free memory slots left"); 104 return; 105 } 106 107 /* will we exceed the total amount of memory specified */ 108 if (used_region_size + size < used_region_size || 109 used_region_size + size > ms->maxram_size - ms->ram_size) { 110 error_setg(errp, "not enough space, currently 0x%" PRIx64 111 " in use of total space for memory devices 0x" RAM_ADDR_FMT, 112 used_region_size, ms->maxram_size - ms->ram_size); 113 return; 114 } 115 116 } 117 118 static uint64_t memory_device_get_free_addr(MachineState *ms, 119 const uint64_t *hint, 120 uint64_t align, uint64_t size, 121 Error **errp) 122 { 123 GSList *list = NULL, *item; 124 Range as, new = range_empty; 125 126 range_init_nofail(&as, ms->device_memory->base, 127 memory_region_size(&ms->device_memory->mr)); 128 129 /* start of address space indicates the maximum alignment we expect */ 130 if (!QEMU_IS_ALIGNED(range_lob(&as), align)) { 131 warn_report("the alignment (0x%" PRIx64 ") exceeds the expected" 132 " maximum alignment, memory will get fragmented and not" 133 " all 'maxmem' might be usable for memory devices.", 134 align); 135 } 136 137 if (hint && !QEMU_IS_ALIGNED(*hint, align)) { 138 error_setg(errp, "address must be aligned to 0x%" PRIx64 " bytes", 139 align); 140 return 0; 141 } 142 143 if (!QEMU_IS_ALIGNED(size, align)) { 144 error_setg(errp, "backend memory size must be multiple of 0x%" 145 PRIx64, align); 146 return 0; 147 } 148 149 if (hint) { 150 if (range_init(&new, *hint, size) || !range_contains_range(&as, &new)) { 151 error_setg(errp, "can't add memory device [0x%" PRIx64 ":0x%" PRIx64 152 "], usable range for memory devices [0x%" PRIx64 ":0x%" 153 PRIx64 "]", *hint, size, range_lob(&as), 154 range_size(&as)); 155 return 0; 156 } 157 } else { 158 if (range_init(&new, QEMU_ALIGN_UP(range_lob(&as), align), size)) { 159 error_setg(errp, "can't add memory device, device too big"); 160 return 0; 161 } 162 } 163 164 /* find address range that will fit new memory device */ 165 object_child_foreach(OBJECT(ms), memory_device_build_list, &list); 166 for (item = list; item; item = g_slist_next(item)) { 167 const MemoryDeviceState *md = item->data; 168 const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(OBJECT(md)); 169 uint64_t next_addr; 170 Range tmp; 171 172 range_init_nofail(&tmp, mdc->get_addr(md), 173 memory_device_get_region_size(md, &error_abort)); 174 175 if (range_overlaps_range(&tmp, &new)) { 176 if (hint) { 177 const DeviceState *d = DEVICE(md); 178 error_setg(errp, "address range conflicts with memory device" 179 " id='%s'", d->id ? d->id : "(unnamed)"); 180 goto out; 181 } 182 183 next_addr = QEMU_ALIGN_UP(range_upb(&tmp) + 1, align); 184 if (!next_addr || range_init(&new, next_addr, range_size(&new))) { 185 range_make_empty(&new); 186 break; 187 } 188 } else if (range_lob(&tmp) > range_upb(&new)) { 189 break; 190 } 191 } 192 193 if (!range_contains_range(&as, &new)) { 194 error_setg(errp, "could not find position in guest address space for " 195 "memory device - memory fragmented due to alignments"); 196 } 197 out: 198 g_slist_free(list); 199 return range_lob(&new); 200 } 201 202 MemoryDeviceInfoList *qmp_memory_device_list(void) 203 { 204 GSList *devices = NULL, *item; 205 MemoryDeviceInfoList *list = NULL, **tail = &list; 206 207 object_child_foreach(qdev_get_machine(), memory_device_build_list, 208 &devices); 209 210 for (item = devices; item; item = g_slist_next(item)) { 211 const MemoryDeviceState *md = MEMORY_DEVICE(item->data); 212 const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(item->data); 213 MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1); 214 215 mdc->fill_device_info(md, info); 216 217 QAPI_LIST_APPEND(tail, info); 218 } 219 220 g_slist_free(devices); 221 222 return list; 223 } 224 225 static int memory_device_plugged_size(Object *obj, void *opaque) 226 { 227 uint64_t *size = opaque; 228 229 if (object_dynamic_cast(obj, TYPE_MEMORY_DEVICE)) { 230 const DeviceState *dev = DEVICE(obj); 231 const MemoryDeviceState *md = MEMORY_DEVICE(obj); 232 const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(obj); 233 234 if (dev->realized) { 235 *size += mdc->get_plugged_size(md, &error_abort); 236 } 237 } 238 239 object_child_foreach(obj, memory_device_plugged_size, opaque); 240 return 0; 241 } 242 243 uint64_t get_plugged_memory_size(void) 244 { 245 uint64_t size = 0; 246 247 memory_device_plugged_size(qdev_get_machine(), &size); 248 249 return size; 250 } 251 252 void memory_device_pre_plug(MemoryDeviceState *md, MachineState *ms, 253 const uint64_t *legacy_align, Error **errp) 254 { 255 const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md); 256 Error *local_err = NULL; 257 uint64_t addr, align = 0; 258 MemoryRegion *mr; 259 260 if (!ms->device_memory) { 261 error_setg(errp, "the configuration is not prepared for memory devices" 262 " (e.g., for memory hotplug), consider specifying the" 263 " maxmem option"); 264 return; 265 } 266 267 mr = mdc->get_memory_region(md, &local_err); 268 if (local_err) { 269 goto out; 270 } 271 272 memory_device_check_addable(ms, md, mr, &local_err); 273 if (local_err) { 274 goto out; 275 } 276 277 if (legacy_align) { 278 align = *legacy_align; 279 } else { 280 if (mdc->get_min_alignment) { 281 align = mdc->get_min_alignment(md); 282 } 283 align = MAX(align, memory_region_get_alignment(mr)); 284 } 285 addr = mdc->get_addr(md); 286 addr = memory_device_get_free_addr(ms, !addr ? NULL : &addr, align, 287 memory_region_size(mr), &local_err); 288 if (local_err) { 289 goto out; 290 } 291 mdc->set_addr(md, addr, &local_err); 292 if (!local_err) { 293 trace_memory_device_pre_plug(DEVICE(md)->id ? DEVICE(md)->id : "", 294 addr); 295 } 296 out: 297 error_propagate(errp, local_err); 298 } 299 300 void memory_device_plug(MemoryDeviceState *md, MachineState *ms) 301 { 302 const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md); 303 const uint64_t addr = mdc->get_addr(md); 304 MemoryRegion *mr; 305 306 /* 307 * We expect that a previous call to memory_device_pre_plug() succeeded, so 308 * it can't fail at this point. 309 */ 310 mr = mdc->get_memory_region(md, &error_abort); 311 g_assert(ms->device_memory); 312 313 ms->device_memory->used_region_size += memory_region_size(mr); 314 ms->device_memory->required_memslots += memory_device_get_memslots(md); 315 memory_region_add_subregion(&ms->device_memory->mr, 316 addr - ms->device_memory->base, mr); 317 trace_memory_device_plug(DEVICE(md)->id ? DEVICE(md)->id : "", addr); 318 } 319 320 void memory_device_unplug(MemoryDeviceState *md, MachineState *ms) 321 { 322 const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md); 323 MemoryRegion *mr; 324 325 /* 326 * We expect that a previous call to memory_device_pre_plug() succeeded, so 327 * it can't fail at this point. 328 */ 329 mr = mdc->get_memory_region(md, &error_abort); 330 g_assert(ms->device_memory); 331 332 memory_region_del_subregion(&ms->device_memory->mr, mr); 333 ms->device_memory->used_region_size -= memory_region_size(mr); 334 ms->device_memory->required_memslots -= memory_device_get_memslots(md); 335 trace_memory_device_unplug(DEVICE(md)->id ? DEVICE(md)->id : "", 336 mdc->get_addr(md)); 337 } 338 339 uint64_t memory_device_get_region_size(const MemoryDeviceState *md, 340 Error **errp) 341 { 342 const MemoryDeviceClass *mdc = MEMORY_DEVICE_GET_CLASS(md); 343 MemoryRegion *mr; 344 345 /* dropping const here is fine as we don't touch the memory region */ 346 mr = mdc->get_memory_region((MemoryDeviceState *)md, errp); 347 if (!mr) { 348 return 0; 349 } 350 351 return memory_region_size(mr); 352 } 353 354 static void memory_devices_region_mod(MemoryListener *listener, 355 MemoryRegionSection *mrs, bool add) 356 { 357 DeviceMemoryState *dms = container_of(listener, DeviceMemoryState, 358 listener); 359 360 if (!memory_region_is_ram(mrs->mr)) { 361 warn_report("Unexpected memory region mapped into device memory region."); 362 return; 363 } 364 365 /* 366 * The expectation is that each distinct RAM memory region section in 367 * our region for memory devices consumes exactly one memslot in KVM 368 * and in vhost. For vhost, this is true, except: 369 * * ROM memory regions don't consume a memslot. These get used very 370 * rarely for memory devices (R/O NVDIMMs). 371 * * Memslots without a fd (memory-backend-ram) don't necessarily 372 * consume a memslot. Such setups are quite rare and possibly bogus: 373 * the memory would be inaccessible by such vhost devices. 374 * 375 * So for vhost, in corner cases we might over-estimate the number of 376 * memslots that are currently used or that might still be reserved 377 * (required - used). 378 */ 379 dms->used_memslots += add ? 1 : -1; 380 381 if (dms->used_memslots > dms->required_memslots) { 382 warn_report("Memory devices use more memory slots than indicated as required."); 383 } 384 } 385 386 static void memory_devices_region_add(MemoryListener *listener, 387 MemoryRegionSection *mrs) 388 { 389 return memory_devices_region_mod(listener, mrs, true); 390 } 391 392 static void memory_devices_region_del(MemoryListener *listener, 393 MemoryRegionSection *mrs) 394 { 395 return memory_devices_region_mod(listener, mrs, false); 396 } 397 398 void machine_memory_devices_init(MachineState *ms, hwaddr base, uint64_t size) 399 { 400 g_assert(size); 401 g_assert(!ms->device_memory); 402 ms->device_memory = g_new0(DeviceMemoryState, 1); 403 ms->device_memory->base = base; 404 405 memory_region_init(&ms->device_memory->mr, OBJECT(ms), "device-memory", 406 size); 407 address_space_init(&ms->device_memory->as, &ms->device_memory->mr, 408 "device-memory"); 409 memory_region_add_subregion(get_system_memory(), ms->device_memory->base, 410 &ms->device_memory->mr); 411 412 /* Track the number of memslots used by memory devices. */ 413 ms->device_memory->listener.region_add = memory_devices_region_add; 414 ms->device_memory->listener.region_del = memory_devices_region_del; 415 memory_listener_register(&ms->device_memory->listener, 416 &ms->device_memory->as); 417 } 418 419 static const TypeInfo memory_device_info = { 420 .name = TYPE_MEMORY_DEVICE, 421 .parent = TYPE_INTERFACE, 422 .class_size = sizeof(MemoryDeviceClass), 423 }; 424 425 static void memory_device_register_types(void) 426 { 427 type_register_static(&memory_device_info); 428 } 429 430 type_init(memory_device_register_types) 431