1 /* 2 * QEMU PowerPC pSeries Logical Partition NUMA associativity handling 3 * 4 * Copyright IBM Corp. 2020 5 * 6 * Authors: 7 * Daniel Henrique Barboza <danielhb413@gmail.com> 8 * 9 * This work is licensed under the terms of the GNU GPL, version 2 or later. 10 * See the COPYING file in the top-level directory. 11 */ 12 13 #include "qemu/osdep.h" 14 #include "qemu-common.h" 15 #include "hw/ppc/spapr_numa.h" 16 #include "hw/pci-host/spapr.h" 17 #include "hw/ppc/fdt.h" 18 19 /* Moved from hw/ppc/spapr_pci_nvlink2.c */ 20 #define SPAPR_GPU_NUMA_ID (cpu_to_be32(1)) 21 22 void spapr_numa_associativity_init(SpaprMachineState *spapr, 23 MachineState *machine) 24 { 25 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 26 int nb_numa_nodes = machine->numa_state->num_nodes; 27 int i, j, max_nodes_with_gpus; 28 29 /* 30 * For all associativity arrays: first position is the size, 31 * position MAX_DISTANCE_REF_POINTS is always the numa_id, 32 * represented by the index 'i'. 33 * 34 * This will break on sparse NUMA setups, when/if QEMU starts 35 * to support it, because there will be no more guarantee that 36 * 'i' will be a valid node_id set by the user. 37 */ 38 for (i = 0; i < nb_numa_nodes; i++) { 39 spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS); 40 spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i); 41 } 42 43 /* 44 * Initialize NVLink GPU associativity arrays. We know that 45 * the first GPU will take the first available NUMA id, and 46 * we'll have a maximum of NVGPU_MAX_NUM GPUs in the machine. 47 * At this point we're not sure if there are GPUs or not, but 48 * let's initialize the associativity arrays and allow NVLink 49 * GPUs to be handled like regular NUMA nodes later on. 50 */ 51 max_nodes_with_gpus = nb_numa_nodes + NVGPU_MAX_NUM; 52 53 for (i = nb_numa_nodes; i < max_nodes_with_gpus; i++) { 54 spapr->numa_assoc_array[i][0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS); 55 56 for (j = 1; j < MAX_DISTANCE_REF_POINTS; j++) { 57 uint32_t gpu_assoc = smc->pre_5_1_assoc_refpoints ? 58 SPAPR_GPU_NUMA_ID : cpu_to_be32(i); 59 spapr->numa_assoc_array[i][j] = gpu_assoc; 60 } 61 62 spapr->numa_assoc_array[i][MAX_DISTANCE_REF_POINTS] = cpu_to_be32(i); 63 } 64 } 65 66 void spapr_numa_write_associativity_dt(SpaprMachineState *spapr, void *fdt, 67 int offset, int nodeid) 68 { 69 _FDT((fdt_setprop(fdt, offset, "ibm,associativity", 70 spapr->numa_assoc_array[nodeid], 71 sizeof(spapr->numa_assoc_array[nodeid])))); 72 } 73 74 int spapr_numa_fixup_cpu_dt(SpaprMachineState *spapr, void *fdt, 75 int offset, PowerPCCPU *cpu) 76 { 77 int vcpu_assoc_size = NUMA_ASSOC_SIZE + 1; 78 uint32_t vcpu_assoc[vcpu_assoc_size]; 79 int index = spapr_get_vcpu_id(cpu); 80 int i; 81 82 /* 83 * VCPUs have an extra 'cpu_id' value in ibm,associativity 84 * compared to other resources. Increment the size at index 85 * 0, copy all associativity domains already set, then put 86 * cpu_id last. 87 */ 88 vcpu_assoc[0] = cpu_to_be32(MAX_DISTANCE_REF_POINTS + 1); 89 90 for (i = 1; i <= MAX_DISTANCE_REF_POINTS; i++) { 91 vcpu_assoc[i] = spapr->numa_assoc_array[cpu->node_id][i]; 92 } 93 94 vcpu_assoc[vcpu_assoc_size - 1] = cpu_to_be32(index); 95 96 /* Advertise NUMA via ibm,associativity */ 97 return fdt_setprop(fdt, offset, "ibm,associativity", 98 vcpu_assoc, sizeof(vcpu_assoc)); 99 } 100 101 102 int spapr_numa_write_assoc_lookup_arrays(SpaprMachineState *spapr, void *fdt, 103 int offset) 104 { 105 MachineState *machine = MACHINE(spapr); 106 int nb_numa_nodes = machine->numa_state->num_nodes; 107 int nr_nodes = nb_numa_nodes ? nb_numa_nodes : 1; 108 uint32_t *int_buf, *cur_index, buf_len; 109 int ret, i; 110 111 /* ibm,associativity-lookup-arrays */ 112 buf_len = (nr_nodes * MAX_DISTANCE_REF_POINTS + 2) * sizeof(uint32_t); 113 cur_index = int_buf = g_malloc0(buf_len); 114 int_buf[0] = cpu_to_be32(nr_nodes); 115 /* Number of entries per associativity list */ 116 int_buf[1] = cpu_to_be32(MAX_DISTANCE_REF_POINTS); 117 cur_index += 2; 118 for (i = 0; i < nr_nodes; i++) { 119 /* 120 * For the lookup-array we use the ibm,associativity array, 121 * from numa_assoc_array. without the first element (size). 122 */ 123 uint32_t *associativity = spapr->numa_assoc_array[i]; 124 memcpy(cur_index, ++associativity, 125 sizeof(uint32_t) * MAX_DISTANCE_REF_POINTS); 126 cur_index += MAX_DISTANCE_REF_POINTS; 127 } 128 ret = fdt_setprop(fdt, offset, "ibm,associativity-lookup-arrays", int_buf, 129 (cur_index - int_buf) * sizeof(uint32_t)); 130 g_free(int_buf); 131 132 return ret; 133 } 134 135 /* 136 * Helper that writes ibm,associativity-reference-points and 137 * max-associativity-domains in the RTAS pointed by @rtas 138 * in the DT @fdt. 139 */ 140 void spapr_numa_write_rtas_dt(SpaprMachineState *spapr, void *fdt, int rtas) 141 { 142 SpaprMachineClass *smc = SPAPR_MACHINE_GET_CLASS(spapr); 143 uint32_t refpoints[] = { 144 cpu_to_be32(0x4), 145 cpu_to_be32(0x4), 146 cpu_to_be32(0x2), 147 }; 148 uint32_t nr_refpoints = ARRAY_SIZE(refpoints); 149 uint32_t maxdomain = cpu_to_be32(spapr->gpu_numa_id > 1 ? 1 : 0); 150 uint32_t maxdomains[] = { 151 cpu_to_be32(4), 152 maxdomain, 153 maxdomain, 154 maxdomain, 155 cpu_to_be32(spapr->gpu_numa_id), 156 }; 157 158 if (smc->pre_5_1_assoc_refpoints) { 159 nr_refpoints = 2; 160 } 161 162 _FDT(fdt_setprop(fdt, rtas, "ibm,associativity-reference-points", 163 refpoints, nr_refpoints * sizeof(refpoints[0]))); 164 165 _FDT(fdt_setprop(fdt, rtas, "ibm,max-associativity-domains", 166 maxdomains, sizeof(maxdomains))); 167 } 168