xref: /qemu/backends/hostmem.c (revision 2c533c54)
1 /*
2  * QEMU Host Memory Backend
3  *
4  * Copyright (C) 2013-2014 Red Hat Inc
5  *
6  * Authors:
7  *   Igor Mammedov <imammedo@redhat.com>
8  *
9  * This work is licensed under the terms of the GNU GPL, version 2 or later.
10  * See the COPYING file in the top-level directory.
11  */
12 #include "qemu/osdep.h"
13 #include "sysemu/hostmem.h"
14 #include "hw/boards.h"
15 #include "qapi/error.h"
16 #include "qapi/visitor.h"
17 #include "qapi-types.h"
18 #include "qapi-visit.h"
19 #include "qemu/config-file.h"
20 #include "qom/object_interfaces.h"
21 
22 #ifdef CONFIG_NUMA
23 #include <numaif.h>
24 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_DEFAULT != MPOL_DEFAULT);
25 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_PREFERRED != MPOL_PREFERRED);
26 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_BIND != MPOL_BIND);
27 QEMU_BUILD_BUG_ON(HOST_MEM_POLICY_INTERLEAVE != MPOL_INTERLEAVE);
28 #endif
29 
30 static void
31 host_memory_backend_get_size(Object *obj, Visitor *v, const char *name,
32                              void *opaque, Error **errp)
33 {
34     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
35     uint64_t value = backend->size;
36 
37     visit_type_size(v, name, &value, errp);
38 }
39 
40 static void
41 host_memory_backend_set_size(Object *obj, Visitor *v, const char *name,
42                              void *opaque, Error **errp)
43 {
44     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
45     Error *local_err = NULL;
46     uint64_t value;
47 
48     if (memory_region_size(&backend->mr)) {
49         error_setg(&local_err, "cannot change property value");
50         goto out;
51     }
52 
53     visit_type_size(v, name, &value, &local_err);
54     if (local_err) {
55         goto out;
56     }
57     if (!value) {
58         error_setg(&local_err, "Property '%s.%s' doesn't take value '%"
59                    PRIu64 "'", object_get_typename(obj), name, value);
60         goto out;
61     }
62     backend->size = value;
63 out:
64     error_propagate(errp, local_err);
65 }
66 
67 static uint16List **host_memory_append_node(uint16List **node,
68                                             unsigned long value)
69 {
70      *node = g_malloc0(sizeof(**node));
71      (*node)->value = value;
72      return &(*node)->next;
73 }
74 
75 static void
76 host_memory_backend_get_host_nodes(Object *obj, Visitor *v, const char *name,
77                                    void *opaque, Error **errp)
78 {
79     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
80     uint16List *host_nodes = NULL;
81     uint16List **node = &host_nodes;
82     unsigned long value;
83 
84     value = find_first_bit(backend->host_nodes, MAX_NODES);
85 
86     node = host_memory_append_node(node, value);
87 
88     if (value == MAX_NODES) {
89         goto out;
90     }
91 
92     do {
93         value = find_next_bit(backend->host_nodes, MAX_NODES, value + 1);
94         if (value == MAX_NODES) {
95             break;
96         }
97 
98         node = host_memory_append_node(node, value);
99     } while (true);
100 
101 out:
102     visit_type_uint16List(v, name, &host_nodes, errp);
103 }
104 
105 static void
106 host_memory_backend_set_host_nodes(Object *obj, Visitor *v, const char *name,
107                                    void *opaque, Error **errp)
108 {
109 #ifdef CONFIG_NUMA
110     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
111     uint16List *l = NULL;
112 
113     visit_type_uint16List(v, name, &l, errp);
114 
115     while (l) {
116         bitmap_set(backend->host_nodes, l->value, 1);
117         l = l->next;
118     }
119 #else
120     error_setg(errp, "NUMA node binding are not supported by this QEMU");
121 #endif
122 }
123 
124 static int
125 host_memory_backend_get_policy(Object *obj, Error **errp G_GNUC_UNUSED)
126 {
127     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
128     return backend->policy;
129 }
130 
131 static void
132 host_memory_backend_set_policy(Object *obj, int policy, Error **errp)
133 {
134     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
135     backend->policy = policy;
136 
137 #ifndef CONFIG_NUMA
138     if (policy != HOST_MEM_POLICY_DEFAULT) {
139         error_setg(errp, "NUMA policies are not supported by this QEMU");
140     }
141 #endif
142 }
143 
144 static bool host_memory_backend_get_merge(Object *obj, Error **errp)
145 {
146     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
147 
148     return backend->merge;
149 }
150 
151 static void host_memory_backend_set_merge(Object *obj, bool value, Error **errp)
152 {
153     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
154 
155     if (!memory_region_size(&backend->mr)) {
156         backend->merge = value;
157         return;
158     }
159 
160     if (value != backend->merge) {
161         void *ptr = memory_region_get_ram_ptr(&backend->mr);
162         uint64_t sz = memory_region_size(&backend->mr);
163 
164         qemu_madvise(ptr, sz,
165                      value ? QEMU_MADV_MERGEABLE : QEMU_MADV_UNMERGEABLE);
166         backend->merge = value;
167     }
168 }
169 
170 static bool host_memory_backend_get_dump(Object *obj, Error **errp)
171 {
172     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
173 
174     return backend->dump;
175 }
176 
177 static void host_memory_backend_set_dump(Object *obj, bool value, Error **errp)
178 {
179     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
180 
181     if (!memory_region_size(&backend->mr)) {
182         backend->dump = value;
183         return;
184     }
185 
186     if (value != backend->dump) {
187         void *ptr = memory_region_get_ram_ptr(&backend->mr);
188         uint64_t sz = memory_region_size(&backend->mr);
189 
190         qemu_madvise(ptr, sz,
191                      value ? QEMU_MADV_DODUMP : QEMU_MADV_DONTDUMP);
192         backend->dump = value;
193     }
194 }
195 
196 static bool host_memory_backend_get_prealloc(Object *obj, Error **errp)
197 {
198     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
199 
200     return backend->prealloc || backend->force_prealloc;
201 }
202 
203 static void host_memory_backend_set_prealloc(Object *obj, bool value,
204                                              Error **errp)
205 {
206     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
207 
208     if (backend->force_prealloc) {
209         if (value) {
210             error_setg(errp,
211                        "remove -mem-prealloc to use the prealloc property");
212             return;
213         }
214     }
215 
216     if (!memory_region_size(&backend->mr)) {
217         backend->prealloc = value;
218         return;
219     }
220 
221     if (value && !backend->prealloc) {
222         int fd = memory_region_get_fd(&backend->mr);
223         void *ptr = memory_region_get_ram_ptr(&backend->mr);
224         uint64_t sz = memory_region_size(&backend->mr);
225 
226         os_mem_prealloc(fd, ptr, sz);
227         backend->prealloc = true;
228     }
229 }
230 
231 static void host_memory_backend_init(Object *obj)
232 {
233     HostMemoryBackend *backend = MEMORY_BACKEND(obj);
234     MachineState *machine = MACHINE(qdev_get_machine());
235 
236     backend->merge = machine_mem_merge(machine);
237     backend->dump = machine_dump_guest_core(machine);
238     backend->prealloc = mem_prealloc;
239 
240     object_property_add_bool(obj, "merge",
241                         host_memory_backend_get_merge,
242                         host_memory_backend_set_merge, NULL);
243     object_property_add_bool(obj, "dump",
244                         host_memory_backend_get_dump,
245                         host_memory_backend_set_dump, NULL);
246     object_property_add_bool(obj, "prealloc",
247                         host_memory_backend_get_prealloc,
248                         host_memory_backend_set_prealloc, NULL);
249     object_property_add(obj, "size", "int",
250                         host_memory_backend_get_size,
251                         host_memory_backend_set_size, NULL, NULL, NULL);
252     object_property_add(obj, "host-nodes", "int",
253                         host_memory_backend_get_host_nodes,
254                         host_memory_backend_set_host_nodes, NULL, NULL, NULL);
255     object_property_add_enum(obj, "policy", "HostMemPolicy",
256                              HostMemPolicy_lookup,
257                              host_memory_backend_get_policy,
258                              host_memory_backend_set_policy, NULL);
259 }
260 
261 MemoryRegion *
262 host_memory_backend_get_memory(HostMemoryBackend *backend, Error **errp)
263 {
264     return memory_region_size(&backend->mr) ? &backend->mr : NULL;
265 }
266 
267 void host_memory_backend_set_mapped(HostMemoryBackend *backend, bool mapped)
268 {
269     backend->is_mapped = mapped;
270 }
271 
272 bool host_memory_backend_is_mapped(HostMemoryBackend *backend)
273 {
274     return backend->is_mapped;
275 }
276 
277 static void
278 host_memory_backend_memory_complete(UserCreatable *uc, Error **errp)
279 {
280     HostMemoryBackend *backend = MEMORY_BACKEND(uc);
281     HostMemoryBackendClass *bc = MEMORY_BACKEND_GET_CLASS(uc);
282     Error *local_err = NULL;
283     void *ptr;
284     uint64_t sz;
285 
286     if (bc->alloc) {
287         bc->alloc(backend, &local_err);
288         if (local_err) {
289             error_propagate(errp, local_err);
290             return;
291         }
292 
293         ptr = memory_region_get_ram_ptr(&backend->mr);
294         sz = memory_region_size(&backend->mr);
295 
296         if (backend->merge) {
297             qemu_madvise(ptr, sz, QEMU_MADV_MERGEABLE);
298         }
299         if (!backend->dump) {
300             qemu_madvise(ptr, sz, QEMU_MADV_DONTDUMP);
301         }
302 #ifdef CONFIG_NUMA
303         unsigned long lastbit = find_last_bit(backend->host_nodes, MAX_NODES);
304         /* lastbit == MAX_NODES means maxnode = 0 */
305         unsigned long maxnode = (lastbit + 1) % (MAX_NODES + 1);
306         /* ensure policy won't be ignored in case memory is preallocated
307          * before mbind(). note: MPOL_MF_STRICT is ignored on hugepages so
308          * this doesn't catch hugepage case. */
309         unsigned flags = MPOL_MF_STRICT | MPOL_MF_MOVE;
310 
311         /* check for invalid host-nodes and policies and give more verbose
312          * error messages than mbind(). */
313         if (maxnode && backend->policy == MPOL_DEFAULT) {
314             error_setg(errp, "host-nodes must be empty for policy default,"
315                        " or you should explicitly specify a policy other"
316                        " than default");
317             return;
318         } else if (maxnode == 0 && backend->policy != MPOL_DEFAULT) {
319             error_setg(errp, "host-nodes must be set for policy %s",
320                        HostMemPolicy_lookup[backend->policy]);
321             return;
322         }
323 
324         /* We can have up to MAX_NODES nodes, but we need to pass maxnode+1
325          * as argument to mbind() due to an old Linux bug (feature?) which
326          * cuts off the last specified node. This means backend->host_nodes
327          * must have MAX_NODES+1 bits available.
328          */
329         assert(sizeof(backend->host_nodes) >=
330                BITS_TO_LONGS(MAX_NODES + 1) * sizeof(unsigned long));
331         assert(maxnode <= MAX_NODES);
332         if (mbind(ptr, sz, backend->policy,
333                   maxnode ? backend->host_nodes : NULL, maxnode + 1, flags)) {
334             if (backend->policy != MPOL_DEFAULT || errno != ENOSYS) {
335                 error_setg_errno(errp, errno,
336                                  "cannot bind memory to host NUMA nodes");
337                 return;
338             }
339         }
340 #endif
341         /* Preallocate memory after the NUMA policy has been instantiated.
342          * This is necessary to guarantee memory is allocated with
343          * specified NUMA policy in place.
344          */
345         if (backend->prealloc) {
346             os_mem_prealloc(memory_region_get_fd(&backend->mr), ptr, sz);
347         }
348     }
349 }
350 
351 static bool
352 host_memory_backend_can_be_deleted(UserCreatable *uc, Error **errp)
353 {
354     if (host_memory_backend_is_mapped(MEMORY_BACKEND(uc))) {
355         return false;
356     } else {
357         return true;
358     }
359 }
360 
361 static void
362 host_memory_backend_class_init(ObjectClass *oc, void *data)
363 {
364     UserCreatableClass *ucc = USER_CREATABLE_CLASS(oc);
365 
366     ucc->complete = host_memory_backend_memory_complete;
367     ucc->can_be_deleted = host_memory_backend_can_be_deleted;
368 }
369 
370 static const TypeInfo host_memory_backend_info = {
371     .name = TYPE_MEMORY_BACKEND,
372     .parent = TYPE_OBJECT,
373     .abstract = true,
374     .class_size = sizeof(HostMemoryBackendClass),
375     .class_init = host_memory_backend_class_init,
376     .instance_size = sizeof(HostMemoryBackend),
377     .instance_init = host_memory_backend_init,
378     .interfaces = (InterfaceInfo[]) {
379         { TYPE_USER_CREATABLE },
380         { }
381     }
382 };
383 
384 static void register_types(void)
385 {
386     type_register_static(&host_memory_backend_info);
387 }
388 
389 type_init(register_types);
390