xref: /qemu/hw/mem/pc-dimm.c (revision a75eb03b9fca3af291ec2c433ddda06121ae927d)
1  /*
2   * Dimm device for Memory Hotplug
3   *
4   * Copyright ProfitBricks GmbH 2012
5   * Copyright (C) 2014 Red Hat Inc
6   *
7   * This library is free software; you can redistribute it and/or
8   * modify it under the terms of the GNU Lesser General Public
9   * License as published by the Free Software Foundation; either
10   * version 2 of the License, or (at your option) any later version.
11   *
12   * This library is distributed in the hope that it will be useful,
13   * but WITHOUT ANY WARRANTY; without even the implied warranty of
14   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   * Lesser General Public License for more details.
16   *
17   * You should have received a copy of the GNU Lesser General Public
18   * License along with this library; if not, see <http://www.gnu.org/licenses/>
19   */
20  
21  #include "hw/mem/pc-dimm.h"
22  #include "qemu/config-file.h"
23  #include "qapi/visitor.h"
24  #include "qemu/range.h"
25  #include "sysemu/numa.h"
26  #include "sysemu/kvm.h"
27  #include "trace.h"
28  #include "hw/virtio/vhost.h"
29  
30  typedef struct pc_dimms_capacity {
31       uint64_t size;
32       Error    **errp;
33  } pc_dimms_capacity;
34  
35  void pc_dimm_memory_plug(DeviceState *dev, MemoryHotplugState *hpms,
36                           MemoryRegion *mr, uint64_t align, bool gap,
37                           Error **errp)
38  {
39      int slot;
40      MachineState *machine = MACHINE(qdev_get_machine());
41      PCDIMMDevice *dimm = PC_DIMM(dev);
42      Error *local_err = NULL;
43      uint64_t existing_dimms_capacity = 0;
44      uint64_t addr;
45  
46      addr = object_property_get_int(OBJECT(dimm), PC_DIMM_ADDR_PROP, &local_err);
47      if (local_err) {
48          goto out;
49      }
50  
51      addr = pc_dimm_get_free_addr(hpms->base,
52                                   memory_region_size(&hpms->mr),
53                                   !addr ? NULL : &addr, align, gap,
54                                   memory_region_size(mr), &local_err);
55      if (local_err) {
56          goto out;
57      }
58  
59      existing_dimms_capacity = pc_existing_dimms_capacity(&local_err);
60      if (local_err) {
61          goto out;
62      }
63  
64      if (existing_dimms_capacity + memory_region_size(mr) >
65          machine->maxram_size - machine->ram_size) {
66          error_setg(&local_err, "not enough space, currently 0x%" PRIx64
67                     " in use of total hot pluggable 0x" RAM_ADDR_FMT,
68                     existing_dimms_capacity,
69                     machine->maxram_size - machine->ram_size);
70          goto out;
71      }
72  
73      object_property_set_int(OBJECT(dev), addr, PC_DIMM_ADDR_PROP, &local_err);
74      if (local_err) {
75          goto out;
76      }
77      trace_mhp_pc_dimm_assigned_address(addr);
78  
79      slot = object_property_get_int(OBJECT(dev), PC_DIMM_SLOT_PROP, &local_err);
80      if (local_err) {
81          goto out;
82      }
83  
84      slot = pc_dimm_get_free_slot(slot == PC_DIMM_UNASSIGNED_SLOT ? NULL : &slot,
85                                   machine->ram_slots, &local_err);
86      if (local_err) {
87          goto out;
88      }
89      object_property_set_int(OBJECT(dev), slot, PC_DIMM_SLOT_PROP, &local_err);
90      if (local_err) {
91          goto out;
92      }
93      trace_mhp_pc_dimm_assigned_slot(slot);
94  
95      if (kvm_enabled() && !kvm_has_free_slot(machine)) {
96          error_setg(&local_err, "hypervisor has no free memory slots left");
97          goto out;
98      }
99  
100      if (!vhost_has_free_slot()) {
101          error_setg(&local_err, "a used vhost backend has no free"
102                                 " memory slots left");
103          goto out;
104      }
105  
106      memory_region_add_subregion(&hpms->mr, addr - hpms->base, mr);
107      vmstate_register_ram(mr, dev);
108      numa_set_mem_node_id(addr, memory_region_size(mr), dimm->node);
109  
110  out:
111      error_propagate(errp, local_err);
112  }
113  
114  void pc_dimm_memory_unplug(DeviceState *dev, MemoryHotplugState *hpms,
115                             MemoryRegion *mr)
116  {
117      PCDIMMDevice *dimm = PC_DIMM(dev);
118  
119      numa_unset_mem_node_id(dimm->addr, memory_region_size(mr), dimm->node);
120      memory_region_del_subregion(&hpms->mr, mr);
121      vmstate_unregister_ram(mr, dev);
122  }
123  
124  static int pc_existing_dimms_capacity_internal(Object *obj, void *opaque)
125  {
126      pc_dimms_capacity *cap = opaque;
127      uint64_t *size = &cap->size;
128  
129      if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
130          DeviceState *dev = DEVICE(obj);
131  
132          if (dev->realized) {
133              (*size) += object_property_get_int(obj, PC_DIMM_SIZE_PROP,
134                  cap->errp);
135          }
136  
137          if (cap->errp && *cap->errp) {
138              return 1;
139          }
140      }
141      object_child_foreach(obj, pc_existing_dimms_capacity_internal, opaque);
142      return 0;
143  }
144  
145  uint64_t pc_existing_dimms_capacity(Error **errp)
146  {
147      pc_dimms_capacity cap;
148  
149      cap.size = 0;
150      cap.errp = errp;
151  
152      pc_existing_dimms_capacity_internal(qdev_get_machine(), &cap);
153      return cap.size;
154  }
155  
156  int qmp_pc_dimm_device_list(Object *obj, void *opaque)
157  {
158      MemoryDeviceInfoList ***prev = opaque;
159  
160      if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
161          DeviceState *dev = DEVICE(obj);
162  
163          if (dev->realized) {
164              MemoryDeviceInfoList *elem = g_new0(MemoryDeviceInfoList, 1);
165              MemoryDeviceInfo *info = g_new0(MemoryDeviceInfo, 1);
166              PCDIMMDeviceInfo *di = g_new0(PCDIMMDeviceInfo, 1);
167              DeviceClass *dc = DEVICE_GET_CLASS(obj);
168              PCDIMMDevice *dimm = PC_DIMM(obj);
169  
170              if (dev->id) {
171                  di->has_id = true;
172                  di->id = g_strdup(dev->id);
173              }
174              di->hotplugged = dev->hotplugged;
175              di->hotpluggable = dc->hotpluggable;
176              di->addr = dimm->addr;
177              di->slot = dimm->slot;
178              di->node = dimm->node;
179              di->size = object_property_get_int(OBJECT(dimm), PC_DIMM_SIZE_PROP,
180                                                 NULL);
181              di->memdev = object_get_canonical_path(OBJECT(dimm->hostmem));
182  
183              info->dimm = di;
184              elem->value = info;
185              elem->next = NULL;
186              **prev = elem;
187              *prev = &elem->next;
188          }
189      }
190  
191      object_child_foreach(obj, qmp_pc_dimm_device_list, opaque);
192      return 0;
193  }
194  
195  ram_addr_t get_current_ram_size(void)
196  {
197      MemoryDeviceInfoList *info_list = NULL;
198      MemoryDeviceInfoList **prev = &info_list;
199      MemoryDeviceInfoList *info;
200      ram_addr_t size = ram_size;
201  
202      qmp_pc_dimm_device_list(qdev_get_machine(), &prev);
203      for (info = info_list; info; info = info->next) {
204          MemoryDeviceInfo *value = info->value;
205  
206          if (value) {
207              switch (value->kind) {
208              case MEMORY_DEVICE_INFO_KIND_DIMM:
209                  size += value->dimm->size;
210                  break;
211              default:
212                  break;
213              }
214          }
215      }
216      qapi_free_MemoryDeviceInfoList(info_list);
217  
218      return size;
219  }
220  
221  static int pc_dimm_slot2bitmap(Object *obj, void *opaque)
222  {
223      unsigned long *bitmap = opaque;
224  
225      if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
226          DeviceState *dev = DEVICE(obj);
227          if (dev->realized) { /* count only realized DIMMs */
228              PCDIMMDevice *d = PC_DIMM(obj);
229              set_bit(d->slot, bitmap);
230          }
231      }
232  
233      object_child_foreach(obj, pc_dimm_slot2bitmap, opaque);
234      return 0;
235  }
236  
237  int pc_dimm_get_free_slot(const int *hint, int max_slots, Error **errp)
238  {
239      unsigned long *bitmap = bitmap_new(max_slots);
240      int slot = 0;
241  
242      object_child_foreach(qdev_get_machine(), pc_dimm_slot2bitmap, bitmap);
243  
244      /* check if requested slot is not occupied */
245      if (hint) {
246          if (*hint >= max_slots) {
247              error_setg(errp, "invalid slot# %d, should be less than %d",
248                         *hint, max_slots);
249          } else if (!test_bit(*hint, bitmap)) {
250              slot = *hint;
251          } else {
252              error_setg(errp, "slot %d is busy", *hint);
253          }
254          goto out;
255      }
256  
257      /* search for free slot */
258      slot = find_first_zero_bit(bitmap, max_slots);
259      if (slot == max_slots) {
260          error_setg(errp, "no free slots available");
261      }
262  out:
263      g_free(bitmap);
264      return slot;
265  }
266  
267  static gint pc_dimm_addr_sort(gconstpointer a, gconstpointer b)
268  {
269      PCDIMMDevice *x = PC_DIMM(a);
270      PCDIMMDevice *y = PC_DIMM(b);
271      Int128 diff = int128_sub(int128_make64(x->addr), int128_make64(y->addr));
272  
273      if (int128_lt(diff, int128_zero())) {
274          return -1;
275      } else if (int128_gt(diff, int128_zero())) {
276          return 1;
277      }
278      return 0;
279  }
280  
281  static int pc_dimm_built_list(Object *obj, void *opaque)
282  {
283      GSList **list = opaque;
284  
285      if (object_dynamic_cast(obj, TYPE_PC_DIMM)) {
286          DeviceState *dev = DEVICE(obj);
287          if (dev->realized) { /* only realized DIMMs matter */
288              *list = g_slist_insert_sorted(*list, dev, pc_dimm_addr_sort);
289          }
290      }
291  
292      object_child_foreach(obj, pc_dimm_built_list, opaque);
293      return 0;
294  }
295  
296  uint64_t pc_dimm_get_free_addr(uint64_t address_space_start,
297                                 uint64_t address_space_size,
298                                 uint64_t *hint, uint64_t align, bool gap,
299                                 uint64_t size, Error **errp)
300  {
301      GSList *list = NULL, *item;
302      uint64_t new_addr, ret = 0;
303      uint64_t address_space_end = address_space_start + address_space_size;
304  
305      g_assert(QEMU_ALIGN_UP(address_space_start, align) == address_space_start);
306  
307      if (!address_space_size) {
308          error_setg(errp, "memory hotplug is not enabled, "
309                           "please add maxmem option");
310          goto out;
311      }
312  
313      if (hint && QEMU_ALIGN_UP(*hint, align) != *hint) {
314          error_setg(errp, "address must be aligned to 0x%" PRIx64 " bytes",
315                     align);
316          goto out;
317      }
318  
319      if (QEMU_ALIGN_UP(size, align) != size) {
320          error_setg(errp, "backend memory size must be multiple of 0x%"
321                     PRIx64, align);
322          goto out;
323      }
324  
325      assert(address_space_end > address_space_start);
326      object_child_foreach(qdev_get_machine(), pc_dimm_built_list, &list);
327  
328      if (hint) {
329          new_addr = *hint;
330      } else {
331          new_addr = address_space_start;
332      }
333  
334      /* find address range that will fit new DIMM */
335      for (item = list; item; item = g_slist_next(item)) {
336          PCDIMMDevice *dimm = item->data;
337          uint64_t dimm_size = object_property_get_int(OBJECT(dimm),
338                                                       PC_DIMM_SIZE_PROP,
339                                                       errp);
340          if (errp && *errp) {
341              goto out;
342          }
343  
344          if (ranges_overlap(dimm->addr, dimm_size, new_addr,
345                             size + (gap ? 1 : 0))) {
346              if (hint) {
347                  DeviceState *d = DEVICE(dimm);
348                  error_setg(errp, "address range conflicts with '%s'", d->id);
349                  goto out;
350              }
351              new_addr = QEMU_ALIGN_UP(dimm->addr + dimm_size + (gap ? 1 : 0),
352                                       align);
353          }
354      }
355      ret = new_addr;
356  
357      if (new_addr < address_space_start) {
358          error_setg(errp, "can't add memory [0x%" PRIx64 ":0x%" PRIx64
359                     "] at 0x%" PRIx64, new_addr, size, address_space_start);
360      } else if ((new_addr + size) > address_space_end) {
361          error_setg(errp, "can't add memory [0x%" PRIx64 ":0x%" PRIx64
362                     "] beyond 0x%" PRIx64, new_addr, size, address_space_end);
363      }
364  
365  out:
366      g_slist_free(list);
367      return ret;
368  }
369  
370  static Property pc_dimm_properties[] = {
371      DEFINE_PROP_UINT64(PC_DIMM_ADDR_PROP, PCDIMMDevice, addr, 0),
372      DEFINE_PROP_UINT32(PC_DIMM_NODE_PROP, PCDIMMDevice, node, 0),
373      DEFINE_PROP_INT32(PC_DIMM_SLOT_PROP, PCDIMMDevice, slot,
374                        PC_DIMM_UNASSIGNED_SLOT),
375      DEFINE_PROP_END_OF_LIST(),
376  };
377  
378  static void pc_dimm_get_size(Object *obj, Visitor *v, void *opaque,
379                            const char *name, Error **errp)
380  {
381      int64_t value;
382      MemoryRegion *mr;
383      PCDIMMDevice *dimm = PC_DIMM(obj);
384  
385      mr = host_memory_backend_get_memory(dimm->hostmem, errp);
386      value = memory_region_size(mr);
387  
388      visit_type_int(v, &value, name, errp);
389  }
390  
391  static void pc_dimm_check_memdev_is_busy(Object *obj, const char *name,
392                                        Object *val, Error **errp)
393  {
394      MemoryRegion *mr;
395  
396      mr = host_memory_backend_get_memory(MEMORY_BACKEND(val), errp);
397      if (memory_region_is_mapped(mr)) {
398          char *path = object_get_canonical_path_component(val);
399          error_setg(errp, "can't use already busy memdev: %s", path);
400          g_free(path);
401      } else {
402          qdev_prop_allow_set_link_before_realize(obj, name, val, errp);
403      }
404  }
405  
406  static void pc_dimm_init(Object *obj)
407  {
408      PCDIMMDevice *dimm = PC_DIMM(obj);
409  
410      object_property_add(obj, PC_DIMM_SIZE_PROP, "int", pc_dimm_get_size,
411                          NULL, NULL, NULL, &error_abort);
412      object_property_add_link(obj, PC_DIMM_MEMDEV_PROP, TYPE_MEMORY_BACKEND,
413                               (Object **)&dimm->hostmem,
414                               pc_dimm_check_memdev_is_busy,
415                               OBJ_PROP_LINK_UNREF_ON_RELEASE,
416                               &error_abort);
417  }
418  
419  static void pc_dimm_realize(DeviceState *dev, Error **errp)
420  {
421      PCDIMMDevice *dimm = PC_DIMM(dev);
422  
423      if (!dimm->hostmem) {
424          error_setg(errp, "'" PC_DIMM_MEMDEV_PROP "' property is not set");
425          return;
426      }
427      if (((nb_numa_nodes > 0) && (dimm->node >= nb_numa_nodes)) ||
428          (!nb_numa_nodes && dimm->node)) {
429          error_setg(errp, "'DIMM property " PC_DIMM_NODE_PROP " has value %"
430                     PRIu32 "' which exceeds the number of numa nodes: %d",
431                     dimm->node, nb_numa_nodes ? nb_numa_nodes : 1);
432          return;
433      }
434  }
435  
436  static MemoryRegion *pc_dimm_get_memory_region(PCDIMMDevice *dimm)
437  {
438      return host_memory_backend_get_memory(dimm->hostmem, &error_abort);
439  }
440  
441  static void pc_dimm_class_init(ObjectClass *oc, void *data)
442  {
443      DeviceClass *dc = DEVICE_CLASS(oc);
444      PCDIMMDeviceClass *ddc = PC_DIMM_CLASS(oc);
445  
446      dc->realize = pc_dimm_realize;
447      dc->props = pc_dimm_properties;
448      dc->desc = "DIMM memory module";
449  
450      ddc->get_memory_region = pc_dimm_get_memory_region;
451  }
452  
453  static TypeInfo pc_dimm_info = {
454      .name          = TYPE_PC_DIMM,
455      .parent        = TYPE_DEVICE,
456      .instance_size = sizeof(PCDIMMDevice),
457      .instance_init = pc_dimm_init,
458      .class_init    = pc_dimm_class_init,
459      .class_size    = sizeof(PCDIMMDeviceClass),
460  };
461  
462  static void pc_dimm_register_types(void)
463  {
464      type_register_static(&pc_dimm_info);
465  }
466  
467  type_init(pc_dimm_register_types)
468