1 /* Copyright 2013-2018 IBM Corp.
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12 * implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include <skiboot.h>
17 #include <io.h>
18 #include <timebase.h>
19 #include <pci-cfg.h>
20 #include <pci.h>
21 #include <pci-slot.h>
22 #include <pci-virt.h>
23 #include <opal.h>
24 #include <opal-api.h>
25 #include <cpu.h>
26 #include <device.h>
27 #include <ccan/str/str.h>
28 #include <ccan/array_size/array_size.h>
29 #include <affinity.h>
30 #include <npu2.h>
31 #include <lock.h>
32 #include <xscom.h>
33 #include <bitutils.h>
34 #include <chip.h>
35 #include <phys-map.h>
36 #include <nvram.h>
37 #include <xscom-p9-regs.h>
38 #include <phb4.h>
39
40 #define VENDOR_CAP_START 0x80
41 #define VENDOR_CAP_END 0x90
42 #define VENDOR_CAP_LEN 0x10
43 #define VENDOR_CAP_VERSION 0x01
44 #define VENDOR_CAP_PCI_DEV_OFFSET 0x0d
45
46 /*
47 * NPU2 BAR layout definition. We have 3 stacks and each of them
48 * contains 2 bricks. So every NPU2 has 6 bricks in total. There are 2
49 * PHY BARs and each of them is shared by 3 bricks. Every brick has
50 * one NTL BAR and two bricks share one GENID BAR. There is also a
51 * global MMIO BAR. We only expose DL and GENID BARs to the OS and all
52 * other BARs will be hidden in skiboot.
53 *
54 * Before the global MMIO BAR is configured, scom is the only way to
55 * access the BAR registers. At NPU2 PHB probing time, we rely on scom
56 * to assign all BARs until the global MMIO BAR is established.
57 *
58 * We need to access 4 SM registers in the same stack in order to
59 * configure one particular BAR.
60 */
61
62 /* Set a specific flag in the vendor config space */
npu2_set_link_flag(struct npu2_dev * ndev,uint8_t flag)63 void npu2_set_link_flag(struct npu2_dev *ndev, uint8_t flag)
64 {
65 ndev->nvlink.link_flags |= flag;
66 PCI_VIRT_CFG_INIT_RO(ndev->nvlink.pvd, VENDOR_CAP_START +
67 VENDOR_CAP_PCI_DEV_OFFSET, 1, ndev->nvlink.link_flags);
68 }
69
npu2_clear_link_flag(struct npu2_dev * ndev,uint8_t flag)70 void npu2_clear_link_flag(struct npu2_dev *ndev, uint8_t flag)
71 {
72 ndev->nvlink.link_flags &= ~flag;
73 PCI_VIRT_CFG_INIT_RO(ndev->nvlink.pvd, VENDOR_CAP_START +
74 VENDOR_CAP_PCI_DEV_OFFSET, 1, ndev->nvlink.link_flags);
75 }
76
npu2_ioda_sel(struct npu2 * p,uint32_t table,uint32_t index,bool autoinc)77 static inline void npu2_ioda_sel(struct npu2 *p, uint32_t table,
78 uint32_t index, bool autoinc)
79 {
80 out_be64(p->regs + NPU2_ATS_IODA_TBL,
81 (autoinc ? NPU2_ATS_IODA_TBL_AUTOINC : 0ul) |
82 SETFIELD(NPU2_ATS_IODA_TBL_SELECT, 0ul, table) |
83 SETFIELD(NPU2_ATS_IODA_TBL_INDEX, 0ul, index));
84 }
85
npu2_bdf_to_dev(struct npu2 * p,uint32_t bdfn)86 static struct npu2_dev *npu2_bdf_to_dev(struct npu2 *p,
87 uint32_t bdfn)
88 {
89 struct pci_virt_device *pvd;
90
91 /* All emulated devices are attached to root bus */
92 if (bdfn & ~0xff)
93 return NULL;
94
95 pvd = pci_virt_find_device(&p->phb_nvlink, bdfn);
96 if (pvd)
97 return pvd->data;
98
99 return NULL;
100 }
101
npu2_get_bar(uint32_t gcid,struct npu2_bar * bar)102 static inline void npu2_get_bar(uint32_t gcid, struct npu2_bar *bar)
103 {
104 phys_map_get(gcid, bar->type, bar->index, &bar->base, &bar->size);
105 }
106
npu2_read_bar(struct npu2 * p,struct npu2_bar * bar)107 static void npu2_read_bar(struct npu2 *p, struct npu2_bar *bar)
108 {
109 uint64_t reg, val;
110 int enabled;
111
112 reg = NPU2_REG_OFFSET(0, NPU2_BLOCK_SM_0, bar->reg);
113 val = npu2_read(p, reg);
114
115 switch (NPU2_REG(bar->reg)) {
116 case NPU2_PHY_BAR:
117 bar->base = GETFIELD(NPU2_PHY_BAR_ADDR, val) << 21;
118 enabled = GETFIELD(NPU2_PHY_BAR_ENABLE, val);
119
120 if (NPU2_REG_STACK(reg) == NPU2_STACK_STCK_2)
121 /* This is the global MMIO BAR */
122 bar->size = 0x1000000;
123 else
124 bar->size = 0x200000;
125 break;
126 case NPU2_NTL0_BAR:
127 case NPU2_NTL1_BAR:
128 bar->base = GETFIELD(NPU2_NTL_BAR_ADDR, val) << 16;
129 enabled = GETFIELD(NPU2_NTL_BAR_ENABLE, val);
130 bar->size = 0x10000 << GETFIELD(NPU2_NTL_BAR_SIZE, val);
131 break;
132 case NPU2_GENID_BAR:
133 bar->base = GETFIELD(NPU2_GENID_BAR_ADDR, val) << 16;
134 enabled = GETFIELD(NPU2_GENID_BAR_ENABLE, val);
135 bar->size = 0x20000;
136 break;
137 default:
138 bar->base = 0ul;
139 enabled = 0;
140 bar->size = 0;
141 break;
142 }
143
144 bar->flags = SETFIELD(NPU2_BAR_FLAG_ENABLED, bar->flags, enabled);
145 }
146
npu2_write_bar(struct npu2 * p,struct npu2_bar * bar,uint32_t gcid,uint32_t scom)147 static void npu2_write_bar(struct npu2 *p,
148 struct npu2_bar *bar,
149 uint32_t gcid,
150 uint32_t scom)
151 {
152 uint64_t reg, val, enable = !!(bar->flags & NPU2_BAR_FLAG_ENABLED);
153 int block;
154
155 switch (NPU2_REG(bar->reg)) {
156 case NPU2_PHY_BAR:
157 val = SETFIELD(NPU2_PHY_BAR_ADDR, 0ul, bar->base >> 21);
158 val = SETFIELD(NPU2_PHY_BAR_ENABLE, val, enable);
159 break;
160 case NPU2_NTL0_BAR:
161 case NPU2_NTL1_BAR:
162 val = SETFIELD(NPU2_NTL_BAR_ADDR, 0ul, bar->base >> 16);
163 val = SETFIELD(NPU2_NTL_BAR_ENABLE, val, enable);
164 val = SETFIELD(NPU2_NTL_BAR_SIZE, val, 1);
165 break;
166 case NPU2_GENID_BAR:
167 val = SETFIELD(NPU2_GENID_BAR_ADDR, 0ul, bar->base >> 16);
168 val = SETFIELD(NPU2_GENID_BAR_ENABLE, val, enable);
169 break;
170 default:
171 val = 0ul;
172 }
173
174 for (block = NPU2_BLOCK_SM_0; block <= NPU2_BLOCK_SM_3; block++) {
175 reg = NPU2_REG_OFFSET(0, block, bar->reg);
176 if (p)
177 npu2_write(p, reg, val);
178 else
179 npu2_scom_write(gcid, scom, reg, NPU2_MISC_DA_LEN_8B, val);
180 }
181 }
182
183 /* Trap for PCI command (0x4) to enable or disable device's BARs */
npu2_cfg_write_cmd(void * dev,struct pci_cfg_reg_filter * pcrf __unused,uint32_t offset,uint32_t size,uint32_t * data,bool write)184 static int64_t npu2_cfg_write_cmd(void *dev,
185 struct pci_cfg_reg_filter *pcrf __unused,
186 uint32_t offset, uint32_t size,
187 uint32_t *data, bool write)
188 {
189 struct pci_virt_device *pvd = dev;
190 struct npu2_dev *ndev = pvd->data;
191 struct npu2_bar *ntl_npu_bar, *genid_npu_bar;
192 bool enabled;
193
194 if (!write)
195 return OPAL_PARTIAL;
196
197 if (offset != PCI_CFG_CMD)
198 return OPAL_PARAMETER;
199 if (size != 1 && size != 2 && size != 4)
200 return OPAL_PARAMETER;
201
202 /*
203 * Enable or disable NTL and GENID BAR. Two bricks share
204 * one GENID BAR, which is exposed via the first brick.
205 */
206 enabled = !!(*data & PCI_CFG_CMD_MEM_EN);
207 ntl_npu_bar = &ndev->bars[0].npu2_bar;
208 genid_npu_bar = &ndev->bars[1].npu2_bar;
209
210 ntl_npu_bar->flags = SETFIELD(NPU2_BAR_FLAG_ENABLED, ntl_npu_bar->flags, enabled);
211 npu2_write_bar(ndev->npu, ntl_npu_bar, 0, 0);
212
213 /*
214 * Enable/disable the GENID BAR. Two bricks share one GENID
215 * BAR which is exposed via the first brick so we need to
216 * track the enables separately.
217 */
218 if (NPU2DEV_BRICK(ndev))
219 genid_npu_bar->flags = SETFIELD(NPU2_BAR_FLAG_ENABLED1, genid_npu_bar->flags,
220 enabled);
221 else
222 genid_npu_bar->flags = SETFIELD(NPU2_BAR_FLAG_ENABLED0, genid_npu_bar->flags,
223 enabled);
224
225 /* Enable the BAR if either device requests it enabled, otherwise disable it */
226 genid_npu_bar->flags = SETFIELD(NPU2_BAR_FLAG_ENABLED, genid_npu_bar->flags,
227 !!(genid_npu_bar->flags & (NPU2_BAR_FLAG_ENABLED0 |
228 NPU2_BAR_FLAG_ENABLED1)));
229 npu2_write_bar(ndev->npu, genid_npu_bar, 0, 0);
230
231 return OPAL_PARTIAL;
232 }
233
npu2_cfg_read_bar(struct npu2_dev * dev __unused,struct pci_cfg_reg_filter * pcrf,uint32_t offset,uint32_t size,uint32_t * data)234 static int64_t npu2_cfg_read_bar(struct npu2_dev *dev __unused,
235 struct pci_cfg_reg_filter *pcrf,
236 uint32_t offset, uint32_t size,
237 uint32_t *data)
238 {
239 struct npu2_pcie_bar *bar = (struct npu2_pcie_bar *) pcrf->data;
240
241 if (!(bar->flags & NPU2_PCIE_BAR_FLAG_TRAPPED))
242 return OPAL_PARTIAL;
243
244 if ((size != 4) ||
245 (offset != pcrf->start && offset != pcrf->start + 4))
246 return OPAL_PARAMETER;
247
248 if (bar->flags & NPU2_PCIE_BAR_FLAG_SIZE_HI)
249 *data = bar->npu2_bar.size >> 32;
250 else
251 *data = bar->npu2_bar.size;
252 bar->flags &= ~(NPU2_PCIE_BAR_FLAG_TRAPPED | NPU2_PCIE_BAR_FLAG_SIZE_HI);
253
254 return OPAL_SUCCESS;
255 }
256
npu2_cfg_write_bar(struct npu2_dev * dev,struct pci_cfg_reg_filter * pcrf,uint32_t offset,uint32_t size,uint32_t data)257 static int64_t npu2_cfg_write_bar(struct npu2_dev *dev,
258 struct pci_cfg_reg_filter *pcrf,
259 uint32_t offset, uint32_t size,
260 uint32_t data)
261 {
262 struct npu2_pcie_bar *bar = (struct npu2_pcie_bar *) pcrf->data;
263 struct npu2_bar old_bar, *npu2_bar = &bar->npu2_bar;
264
265 if ((size != 4) ||
266 (offset != pcrf->start && offset != pcrf->start + 4))
267 return OPAL_PARAMETER;
268
269 /* Return BAR size on next read */
270 if (data == 0xffffffff) {
271 bar->flags |= NPU2_PCIE_BAR_FLAG_TRAPPED;
272 if (offset == pcrf->start + 4)
273 bar->flags |= NPU2_PCIE_BAR_FLAG_SIZE_HI;
274
275 return OPAL_SUCCESS;
276 }
277
278 if (offset == pcrf->start) {
279 npu2_bar->base &= 0xffffffff00000000UL;
280 npu2_bar->base |= (data & 0xfffffff0);
281 } else {
282 npu2_bar->base &= 0x00000000ffffffffUL;
283 npu2_bar->base |= ((uint64_t)data << 32);
284
285 if (NPU2_REG(npu2_bar->reg) == NPU2_GENID_BAR && NPU2DEV_BRICK(dev))
286 npu2_bar->base -= 0x10000;
287
288 old_bar.reg = npu2_bar->reg;
289 npu2_read_bar(dev->npu, &old_bar);
290
291 /* Only allow changing the base address if the BAR is not enabled */
292 if ((npu2_bar->flags & NPU2_BAR_FLAG_ENABLED) &&
293 (npu2_bar->base != old_bar.base)) {
294 npu2_bar->base = old_bar.base;
295 return OPAL_HARDWARE;
296 }
297
298 npu2_write_bar(dev->npu, &bar->npu2_bar, 0, 0);
299 }
300
301 /* To update the config cache */
302 return OPAL_PARTIAL;
303 }
304
npu2_dev_cfg_bar(void * dev,struct pci_cfg_reg_filter * pcrf,uint32_t offset,uint32_t len,uint32_t * data,bool write)305 static int64_t npu2_dev_cfg_bar(void *dev, struct pci_cfg_reg_filter *pcrf,
306 uint32_t offset, uint32_t len, uint32_t *data,
307 bool write)
308 {
309 struct pci_virt_device *pvd = dev;
310 struct npu2_dev *ndev = (struct npu2_dev *) pvd->data;
311
312 if (write)
313 return npu2_cfg_write_bar(ndev, pcrf, offset, len, *data);
314
315 return npu2_cfg_read_bar(ndev, pcrf, offset, len, data);
316 }
317
start_l2_purge(uint32_t chip_id,uint32_t core_id)318 static int start_l2_purge(uint32_t chip_id, uint32_t core_id)
319 {
320 uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L2_PRD_PURGE_CMD_REG);
321 int rc;
322
323 rc = xscom_write_mask(chip_id, addr, L2CAC_FLUSH,
324 L2_PRD_PURGE_CMD_TYPE_MASK);
325 if (!rc)
326 rc = xscom_write_mask(chip_id, addr, L2_PRD_PURGE_CMD_TRIGGER,
327 L2_PRD_PURGE_CMD_TRIGGER);
328 if (rc)
329 prlog(PR_ERR, "PURGE L2 on core 0x%x: XSCOM write_mask "
330 "failed %i\n", core_id, rc);
331 return rc;
332 }
333
wait_l2_purge(uint32_t chip_id,uint32_t core_id)334 static int wait_l2_purge(uint32_t chip_id, uint32_t core_id)
335 {
336 uint64_t val;
337 uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L2_PRD_PURGE_CMD_REG);
338 unsigned long now = mftb();
339 unsigned long end = now + msecs_to_tb(L2_L3_PRD_PURGE_TIMEOUT_MS);
340 int rc;
341
342 while (1) {
343 rc = xscom_read(chip_id, addr, &val);
344 if (rc) {
345 prlog(PR_ERR, "PURGE L2 on core 0x%x: XSCOM read "
346 "failed %i\n", core_id, rc);
347 break;
348 }
349 if (!(val & L2_PRD_PURGE_CMD_REG_BUSY))
350 break;
351 now = mftb();
352 if (tb_compare(now, end) == TB_AAFTERB) {
353 prlog(PR_ERR, "PURGE L2 on core 0x%x timed out %i\n",
354 core_id, rc);
355 return OPAL_BUSY;
356 }
357 }
358
359 /* We have to clear the trigger bit ourselves */
360 val &= ~L2_PRD_PURGE_CMD_TRIGGER;
361 rc = xscom_write(chip_id, addr, val);
362 if (rc)
363 prlog(PR_ERR, "PURGE L2 on core 0x%x: XSCOM write failed %i\n",
364 core_id, rc);
365 return rc;
366 }
367
start_l3_purge(uint32_t chip_id,uint32_t core_id)368 static int start_l3_purge(uint32_t chip_id, uint32_t core_id)
369 {
370 uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L3_PRD_PURGE_REG);
371 int rc;
372
373 rc = xscom_write_mask(chip_id, addr, L3_FULL_PURGE,
374 L3_PRD_PURGE_TTYPE_MASK);
375 if (!rc)
376 rc = xscom_write_mask(chip_id, addr, L3_PRD_PURGE_REQ,
377 L3_PRD_PURGE_REQ);
378 if (rc)
379 prlog(PR_ERR, "PURGE L3 on core 0x%x: XSCOM write_mask "
380 "failed %i\n", core_id, rc);
381 return rc;
382 }
383
wait_l3_purge(uint32_t chip_id,uint32_t core_id)384 static int wait_l3_purge(uint32_t chip_id, uint32_t core_id)
385 {
386 uint64_t val;
387 uint64_t addr = XSCOM_ADDR_P9_EX(core_id, L3_PRD_PURGE_REG);
388 unsigned long now = mftb();
389 unsigned long end = now + msecs_to_tb(L2_L3_PRD_PURGE_TIMEOUT_MS);
390 int rc;
391
392 /* Trigger bit is automatically set to zero when flushing is done */
393 while (1) {
394 rc = xscom_read(chip_id, addr, &val);
395 if (rc) {
396 prlog(PR_ERR, "PURGE L3 on core 0x%x: XSCOM read "
397 "failed %i\n", core_id, rc);
398 break;
399 }
400 if (!(val & L3_PRD_PURGE_REQ))
401 break;
402 now = mftb();
403 if (tb_compare(now, end) == TB_AAFTERB) {
404 prlog(PR_ERR, "PURGE L3 on core 0x%x timed out %i\n",
405 core_id, rc);
406 return OPAL_BUSY;
407 }
408 }
409 return rc;
410 }
411
purge_l2_l3_caches(void)412 static int64_t purge_l2_l3_caches(void)
413 {
414 struct cpu_thread *t;
415 uint64_t core_id, prev_core_id = (uint64_t)-1;
416 int rc;
417 unsigned long now = mftb();
418
419 for_each_ungarded_cpu(t) {
420 /* Only need to do it once per core chiplet */
421 core_id = pir_to_core_id(t->pir);
422 if (prev_core_id == core_id)
423 continue;
424 prev_core_id = core_id;
425 rc = start_l2_purge(t->chip_id, core_id);
426 if (rc)
427 goto trace_exit;
428 rc = start_l3_purge(t->chip_id, core_id);
429 if (rc)
430 goto trace_exit;
431 }
432
433 prev_core_id = (uint64_t)-1;
434 for_each_ungarded_cpu(t) {
435 /* Only need to do it once per core chiplet */
436 core_id = pir_to_core_id(t->pir);
437 if (prev_core_id == core_id)
438 continue;
439 prev_core_id = core_id;
440
441 rc = wait_l2_purge(t->chip_id, core_id);
442 if (rc)
443 goto trace_exit;
444 rc = wait_l3_purge(t->chip_id, core_id);
445 if (rc)
446 goto trace_exit;
447 }
448
449 trace_exit:
450 prlog(PR_TRACE, "L2/L3 purging took %ldus\n",
451 tb_to_usecs(mftb() - now));
452
453 return rc;
454 }
455
npu2_dev_cfg_exp_devcap(void * dev,struct pci_cfg_reg_filter * pcrf __unused,uint32_t offset,uint32_t size,uint32_t * data,bool write)456 static int64_t npu2_dev_cfg_exp_devcap(void *dev,
457 struct pci_cfg_reg_filter *pcrf __unused,
458 uint32_t offset, uint32_t size,
459 uint32_t *data, bool write)
460 {
461 struct pci_virt_device *pvd = dev;
462 struct npu2_dev *ndev = pvd->data;
463 int rc;
464
465 assert(write);
466
467 if ((size != 2) || (offset & 1)) {
468 /* Short config writes are not supported */
469 prlog(PR_ERR, "NPU%d: Unsupported write to pcie control register\n",
470 ndev->nvlink.phb->opal_id);
471 return OPAL_PARAMETER;
472 }
473
474 if (*data & PCICAP_EXP_DEVCTL_FUNC_RESET)
475 npu2_dev_procedure_reset(ndev);
476
477 rc = purge_l2_l3_caches();
478 if (rc)
479 return rc;
480
481 return OPAL_PARTIAL;
482 }
483
484 #define NPU2_CFG_READ(size, type) \
485 static int64_t npu2_cfg_read##size(struct phb *phb, uint32_t bdfn, \
486 uint32_t offset, type *data) \
487 { \
488 uint32_t val; \
489 int64_t ret; \
490 \
491 ret = pci_virt_cfg_read(phb, bdfn, offset, \
492 sizeof(*data), &val); \
493 *data = (type)val; \
494 return ret; \
495 }
496 #define NPU2_CFG_WRITE(size, type) \
497 static int64_t npu2_cfg_write##size(struct phb *phb, uint32_t bdfn, \
498 uint32_t offset, type data) \
499 { \
500 uint32_t val = data; \
501 int64_t ret; \
502 \
503 ret = pci_virt_cfg_write(phb, bdfn, offset, \
504 sizeof(data), val); \
505 return ret; \
506 }
507
508 NPU2_CFG_READ(8, u8);
509 NPU2_CFG_READ(16, u16);
510 NPU2_CFG_READ(32, u32);
511 NPU2_CFG_WRITE(8, u8);
512 NPU2_CFG_WRITE(16, u16);
513 NPU2_CFG_WRITE(32, u32);
514
__npu2_dev_bind_pci_dev(struct phb * phb __unused,struct pci_device * pd,void * data)515 static int __npu2_dev_bind_pci_dev(struct phb *phb __unused,
516 struct pci_device *pd,
517 void *data)
518 {
519 struct npu2_dev *dev = data;
520 struct dt_node *pci_dt_node;
521 char *pcislot;
522
523 /* Ignore non-nvidia PCI devices */
524 if ((pd->vdid & 0xffff) != 0x10de)
525 return 0;
526
527 /* Find the PCI device's slot location */
528 for (pci_dt_node = pd->dn;
529 pci_dt_node && !dt_find_property(pci_dt_node, "ibm,loc-code");
530 pci_dt_node = pci_dt_node->parent);
531
532 if (!pci_dt_node)
533 return 0;
534
535 pcislot = (char *)dt_prop_get(pci_dt_node, "ibm,loc-code");
536
537 NPU2DEVDBG(dev, "Comparing GPU '%s' and NPU2 '%s'\n",
538 pcislot, dev->nvlink.slot_label);
539
540 if (streq(pcislot, dev->nvlink.slot_label))
541 return 1;
542
543 return 0;
544 }
545
npu2_gpu_bridge_sec_bus_reset(void * dev,struct pci_cfg_reg_filter * pcrf __unused,uint32_t offset,uint32_t len,uint32_t * data,bool write)546 static int64_t npu2_gpu_bridge_sec_bus_reset(void *dev,
547 struct pci_cfg_reg_filter *pcrf __unused,
548 uint32_t offset, uint32_t len,
549 uint32_t *data, bool write)
550 {
551 struct pci_device *pd = dev;
552 struct pci_device *gpu;
553 struct phb *npphb;
554 struct npu2 *npu;
555 struct dt_node *np;
556 struct npu2_dev *ndev;
557 int i;
558
559 assert(write);
560
561 if ((len != 2) || (offset & 1)) {
562 /* Short config writes are not supported */
563 PCIERR(pd->phb, pd->bdfn,
564 "Unsupported write to bridge control register\n");
565 return OPAL_PARAMETER;
566 }
567
568 gpu = list_top(&pd->children, struct pci_device, link);
569 if (gpu && (*data & PCI_CFG_BRCTL_SECONDARY_RESET)) {
570 int64_t rc;
571
572 dt_for_each_compatible(dt_root, np, "ibm,power9-npu-pciex") {
573 npphb = pci_get_phb(dt_prop_get_cell(np,
574 "ibm,opal-phbid", 1));
575 if (!npphb || npphb->phb_type != phb_type_npu_v2)
576 continue;
577
578 npu = phb_to_npu2_nvlink(npphb);
579 for (i = 0; i < npu->total_devices; ++i) {
580 ndev = &npu->devices[i];
581 if (ndev->nvlink.pd == gpu)
582 npu2_dev_procedure_reset(ndev);
583 }
584 }
585
586 rc = purge_l2_l3_caches();
587 if (rc)
588 return rc;
589 }
590
591 return OPAL_PARTIAL;
592 }
593
npu2_dev_bind_pci_dev(struct npu2_dev * dev)594 static void npu2_dev_bind_pci_dev(struct npu2_dev *dev)
595 {
596 struct phb *phb;
597 uint32_t i;
598
599 if (dev->nvlink.pd)
600 return;
601
602 for (i = 0; i < 64; i++) {
603 if (dev->npu->phb_nvlink.opal_id == i)
604 continue;
605
606 phb = pci_get_phb(i);
607 if (!phb)
608 continue;
609
610 dev->nvlink.pd = pci_walk_dev(phb, NULL, __npu2_dev_bind_pci_dev, dev);
611 if (dev->nvlink.pd) {
612 dev->nvlink.phb = phb;
613 /* Found the device, set the bit in config space */
614 npu2_set_link_flag(dev, NPU2_DEV_PCI_LINKED);
615
616 /*
617 * We define a custom sec bus reset handler for a slot
618 * with an NVLink-connected GPU to prevent HMIs which
619 * will otherwise happen if we reset GPU before
620 * resetting NVLinks.
621 */
622 if (dev->nvlink.pd->parent &&
623 dev->nvlink.pd->parent->slot)
624 pci_add_cfg_reg_filter(dev->nvlink.pd->parent,
625 PCI_CFG_BRCTL, 2,
626 PCI_REG_FLAG_WRITE,
627 npu2_gpu_bridge_sec_bus_reset);
628 return;
629 }
630 }
631
632 NPU2DEVINF(dev, "No PCI device found for slot '%s'\n",
633 dev->nvlink.slot_label);
634 }
635
636 static struct lock pci_npu_phandle_lock = LOCK_UNLOCKED;
637
npu2_append_phandle(struct dt_node * dn,u32 phandle)638 static void npu2_append_phandle(struct dt_node *dn,
639 u32 phandle)
640 {
641 struct dt_property *prop;
642 uint32_t *npu_phandles;
643 size_t len;
644
645 /*
646 * Use a lock to make sure no one else has a reference to an
647 * ibm,npu property (this assumes this is the only function
648 * that holds a reference to it)
649 */
650 lock(&pci_npu_phandle_lock);
651
652 /* This function shouldn't be called unless ibm,npu exists */
653 prop = (struct dt_property *)dt_require_property(dn, "ibm,npu", -1);
654
655 /* Need to append to the properties */
656 len = prop->len + sizeof(*npu_phandles);
657 dt_resize_property(&prop, len);
658 prop->len = len;
659
660 npu_phandles = (uint32_t *)prop->prop;
661 npu_phandles[len / sizeof(*npu_phandles) - 1] = phandle;
662 unlock(&pci_npu_phandle_lock);
663 }
664
npu2_create_memory_dn(uint64_t addr,uint64_t size)665 static struct dt_node *npu2_create_memory_dn(uint64_t addr, uint64_t size)
666 {
667 struct dt_node *mem;
668 static u32 chip_id = 255;
669
670 mem = dt_find_by_name_addr(dt_root, "memory", addr);
671 if (mem)
672 return mem;
673
674 mem = dt_new_addr(dt_root, "memory", addr);
675 if (!mem)
676 return NULL;
677 dt_add_property_string(mem, "device_type", "memory");
678 dt_add_property_string(mem, "compatible", "ibm,coherent-device-memory");
679 dt_add_property_u64s(mem, "reg", addr, size);
680 dt_add_property_cells(mem, "ibm,chip-id", chip_id);
681 dt_add_property_u64s(mem, "linux,usable-memory", addr, 0);
682 dt_add_property_cells(mem, "ibm,associativity", 4, chip_id, chip_id, chip_id, chip_id);
683 chip_id--;
684
685 assert(chip_id);
686 return mem;
687 }
688
689 /* There are potentially multiple links per GPU, so lookup the GPU memory based
690 * on bdfn. */
npu2_get_gpu_base(struct npu2_dev * ndev,uint64_t * addr,uint64_t * size)691 static void npu2_get_gpu_base(struct npu2_dev *ndev, uint64_t *addr, uint64_t *size)
692 {
693 struct npu2 *p = ndev->npu;
694 int group;
695
696 group = (ndev->bdfn >> 3) & 0x1f;
697 phys_map_get(ndev->npu->chip_id, p->gpu_map_type, group, addr, size);
698 }
699
npu2_dn_fixup_gmb(struct dt_node * pd_dn,struct npu2_dev * ndev)700 static void npu2_dn_fixup_gmb(struct dt_node *pd_dn, struct npu2_dev *ndev)
701 {
702 uint64_t gpu_base, gpu_size, gta;
703 struct dt_node *mem_dn;
704
705 npu2_get_gpu_base(ndev, &gpu_base, &gpu_size);
706 mem_dn = npu2_create_memory_dn(gpu_base, gpu_size);
707 assert(mem_dn);
708 dt_add_property_cells(pd_dn, "memory-region", mem_dn->phandle);
709
710 /* Coral mode address compression. This is documented in Figure 3.5
711 * "P9->GPU RA Compression (Coral) of the NPU2 workbook". */
712 gta = ((gpu_base >> 42) & 0x1) << 42;
713 gta |= ((gpu_base >> 45) & 0x3) << 43;
714 gta |= ((gpu_base >> 49) & 0x3) << 45;
715 gta |= gpu_base & ((1UL << 43) - 1);
716
717 dt_add_property_u64s(pd_dn, "ibm,device-tgt-addr", gta);
718 }
719
npu2_assign_gmb(struct npu2_dev * ndev)720 static int npu2_assign_gmb(struct npu2_dev *ndev)
721 {
722 struct npu2 *p = ndev->npu;
723 int peers, mode;
724 uint32_t bdfn;
725 uint64_t base, size, reg, val, gmb;
726
727 /* Need to work out number of link peers. This amount to
728 * working out the maximum function number. So work start at
729 * the highest bdfn (fn = 6) and count back until we find a
730 * npu2_dev. */
731 for (bdfn = (ndev->bdfn & ~0x7) | NPU2_LINKS_PER_CHIP;
732 (bdfn & 0x7) != 0x7; bdfn = (bdfn & ~0x7) | ((bdfn & 0x7) - 1))
733 if (npu2_bdf_to_dev(p, bdfn))
734 break;
735 peers = bdfn & 0x7;
736
737 npu2_get_gpu_base(ndev, &base, &size);
738
739 NPU2DBG(p, "Setting BAR region dt:%llx\n", base);
740 val = SETFIELD(NPU2_MEM_BAR_EN, 0ULL, 1);
741 val = SETFIELD(NPU2_MEM_BAR_SEL_MEM, val, base >> (63-14));
742 val = SETFIELD(NPU2_MEM_BAR_GROUP, val, base >> (63-18));
743 val = SETFIELD(NPU2_MEM_BAR_CHIP, val, base >> (63-21));
744 val = SETFIELD(NPU2_MEM_BAR_NODE_ADDR, val, base >> (63-33));
745 val = SETFIELD(NPU2_MEM_BAR_POISON, val, 1);
746 val = SETFIELD(NPU2_MEM_BAR_GRANULE, val, 0);
747
748 /* We don't know how much memory the GPU has, so we may as well just
749 * pass the whole aperture through at this point. */
750 val = SETFIELD(NPU2_MEM_BAR_BAR_SIZE, val, ilog2(size >> 30));
751
752 switch (peers) {
753 case 0:
754 mode = 0;
755 break;
756 case 1:
757 mode = 1;
758 break;
759 case 2:
760 mode = 3;
761 break;
762 case 3:
763 mode = 6;
764 break;
765 case 5:
766 mode = 10;
767 break;
768 default:
769 /* Hardware does not support this configuration */
770 assert(0);
771 }
772
773 mode += ndev->bdfn & 0x7;
774 val = SETFIELD(NPU2_MEM_BAR_MODE, val, mode);
775
776 gmb = NPU2_GPU0_MEM_BAR;
777 if (NPU2DEV_BRICK(ndev))
778 gmb = NPU2_GPU1_MEM_BAR;
779
780 reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(ndev),
781 NPU2_BLOCK_SM_0, gmb);
782
783 npu2_write(p, reg, val);
784 reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(ndev),
785 NPU2_BLOCK_SM_1, gmb);
786 npu2_write(p, reg, val);
787 reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(ndev),
788 NPU2_BLOCK_SM_2, gmb);
789 npu2_write(p, reg, val);
790 reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + NPU2DEV_STACK(ndev),
791 NPU2_BLOCK_SM_3, gmb);
792 npu2_write(p, reg, val);
793
794 return 0;
795 }
796
npu2_dn_fixup(struct phb * phb,struct pci_device * pd,void * data __unused)797 static int npu2_dn_fixup(struct phb *phb,
798 struct pci_device *pd,
799 void *data __unused)
800 {
801 struct npu2 *p = phb_to_npu2_nvlink(phb);
802 struct npu2_dev *dev;
803 uint32_t speed;
804 const char *label;
805
806 dev = npu2_bdf_to_dev(p, pd->bdfn);
807 assert(dev);
808 if (dev->nvlink.phb || dev->nvlink.pd)
809 return 0;
810
811 npu2_assign_gmb(dev);
812 npu2_dn_fixup_gmb(pd->dn, dev);
813 dt_add_property_cells(pd->dn, "ibm,nvlink", dev->dt_node->phandle);
814
815 /*
816 * NVLink supports multiple speeds and device drivers need to know what
817 * speed has been set by firmware. Hostboot does the inits that set the
818 * link speed and tell us via HDAT and we need to copy that from the
819 * link node.
820 */
821 speed = dt_prop_get_u32_def(dev->dt_node, "nvidia,link-speed", 0xff);
822 if (speed != 0xff)
823 dt_add_property_cells(pd->dn, "ibm,nvlink-speed", speed);
824
825 /*
826 * NPU2 devices have a slot label that indicates which GPU slot
827 * this NPU is connected to. Add a location code to the NVlink
828 * device node based on the slot label.
829 */
830 label = dt_prop_get_def(dev->dt_node, "ibm,slot-label", NULL);
831 if (!label) {
832 /**
833 * @fwts-label NPUNoPHBSlotLabel
834 * @fwts-advice No GPU/NPU2 slot information was found.
835 * NVLink2 functionality will not work.
836 */
837 prlog(PR_ERR, "NPU: Cannot find GPU slot information\n");
838 return 0;
839 }
840 dt_add_property_string(pd->dn, "ibm,loc-code", label);
841
842 dev->nvlink.slot_label = label;
843
844 /*
845 * Bind the emulated PCI device with the real one, which can't
846 * be done until the PCI devices are populated. Once the real
847 * PCI device is identified, we also need fix the device-tree
848 * for it
849 */
850 npu2_dev_bind_pci_dev(dev);
851 if (dev->nvlink.phb && dev->nvlink.pd && dev->nvlink.pd->dn) {
852 if (dt_find_property(dev->nvlink.pd->dn, "ibm,npu"))
853 npu2_append_phandle(dev->nvlink.pd->dn, pd->dn->phandle);
854 else
855 dt_add_property_cells(dev->nvlink.pd->dn, "ibm,npu", pd->dn->phandle);
856
857 dt_add_property_cells(pd->dn, "ibm,gpu", dev->nvlink.pd->dn->phandle);
858 dev->nvlink.gpu_bdfn = dev->nvlink.pd->bdfn;
859 }
860
861 return 0;
862 }
863
npu2_links_per_gpu(struct phb * phb,struct pci_device * pd,void * data)864 static int npu2_links_per_gpu(struct phb *phb,
865 struct pci_device *pd,
866 void *data)
867 {
868 struct npu2 *p = phb_to_npu2_nvlink(phb);
869 struct npu2_dev *dev;
870 int *nlinks = (int *)data;
871
872 dev = npu2_bdf_to_dev(p, pd->bdfn);
873 assert(dev);
874
875 if (dev->nvlink.phb && dev->nvlink.pd && dev->nvlink.pd->dn) {
876 const struct dt_property *prop;
877 int n;
878
879 /* The link count is the number of phandles in "ibm,npu" */
880 prop = dt_find_property(dev->nvlink.pd->dn, "ibm,npu");
881 if (!prop)
882 return 0;
883
884 /* Count could vary by gpu, so find the max */
885 n = prop->len / sizeof(uint32_t);
886 if (n > *nlinks)
887 *nlinks = n;
888 }
889
890 return 0;
891 }
892
npu2_phb_fixup_scominit(struct dt_node * dn,int links_per_gpu)893 static void npu2_phb_fixup_scominit(struct dt_node *dn, int links_per_gpu)
894 {
895 uint32_t gcid = dt_get_chip_id(dn);
896 uint64_t val, mask;
897
898 /*
899 * MRBSP settings for 2- and 3-link GPU systems. These can improve
900 * GPU peer-to-peer fully ordered write performance.
901 */
902 if (links_per_gpu == 3) {
903 val = PPC_BIT(30) | PPC_BIT(34) | PPC_BIT(36) | PPC_BIT(37) |
904 PPC_BIT(44) | PPC_BIT(45);
905 mask = PPC_BITMASK(28,39) | PPC_BITMASK(44,47);
906 } else if (links_per_gpu == 2) {
907 val = PPC_BIT(46) | PPC_BIT(47);
908 mask = PPC_BITMASK(44,47);
909 } else
910 return;
911
912 xscom_write_mask(gcid, 0x50110c0, val, mask);
913 xscom_write_mask(gcid, 0x50112c0, val, mask);
914 xscom_write_mask(gcid, 0x50114c0, val, mask);
915 }
916
npu2_phb_final_fixup(struct phb * phb)917 static void npu2_phb_final_fixup(struct phb *phb)
918 {
919 int links_per_gpu = 0;
920 struct dt_node *np;
921
922 pci_walk_dev(phb, NULL, npu2_dn_fixup, NULL);
923
924 /*
925 * Now that the emulated devices are bound to the real ones, we can
926 * determine links_per_gpu and do some final init.
927 */
928 pci_walk_dev(phb, NULL, npu2_links_per_gpu, &links_per_gpu);
929 dt_for_each_compatible(dt_root, np, "ibm,power9-npu")
930 npu2_phb_fixup_scominit(np, links_per_gpu);
931 }
932
npu2_init_ioda_cache(struct npu2 * p)933 static void npu2_init_ioda_cache(struct npu2 *p)
934 {
935 /* TVT */
936 memset(p->tve_cache, 0, sizeof(p->tve_cache));
937 }
938
npu2_ioda_reset(struct phb * phb,bool purge)939 static int64_t npu2_ioda_reset(struct phb *phb, bool purge)
940 {
941 struct npu2 *p = phb_to_npu2_nvlink(phb);
942 uint32_t i;
943
944 if (purge) {
945 NPU2DBG(p, "Purging all IODA tables...\n");
946 npu2_init_ioda_cache(p);
947 }
948
949 /* TVT */
950 npu2_ioda_sel(p, NPU2_ATS_IODA_TBL_TVT, 0, true);
951 for (i = 0; i < ARRAY_SIZE(p->tve_cache); i++)
952 out_be64(p->regs + NPU2_ATS_IODA_DATA, p->tve_cache[i]);
953
954 return OPAL_SUCCESS;
955 }
956
npu2_write_mcd(struct npu2 * p,uint64_t pcb_addr,uint64_t addr,uint64_t size)957 static void npu2_write_mcd(struct npu2 *p, uint64_t pcb_addr, uint64_t addr,
958 uint64_t size)
959 {
960 uint64_t val;
961
962 NPU2DBG(p, "Setting MCD addr:%llx\n", pcb_addr);
963 assert(is_pow2(size));
964
965 val = MCD_BANK_CN_VALID;
966 val = SETFIELD(MCD_BANK_CN_SIZE, val, (size >> 25) - 1);
967 val = SETFIELD(MCD_BANK_CN_ADDR, val, addr >> 25);
968 xscom_write(p->chip_id, pcb_addr, val);
969 }
970
npu2_mcd_init(struct npu2 * p)971 static void npu2_mcd_init(struct npu2 *p)
972 {
973 int i;
974 uint64_t size, addr, gpu_min_addr, gpu_max_addr, total_size;
975
976 /* Init memory cache directory (MCD) registers. */
977 phys_map_get(p->chip_id, p->gpu_map_type, NPU2_LINKS_PER_CHIP - 1,
978 &gpu_min_addr, NULL);
979 phys_map_get(p->chip_id, p->gpu_map_type, 0, &gpu_max_addr, &size);
980 gpu_max_addr += size;
981
982 /* We assume GPU memory is contiguous from the first possible GPU to the
983 * last and that the size is the same so best to check that. */
984 for (i = 0; i < NPU2_LINKS_PER_CHIP; i++) {
985 uint64_t tmp;
986 phys_map_get(p->chip_id, p->gpu_map_type, i, &addr, &tmp);
987 assert((addr >= gpu_min_addr) && (addr + tmp <= gpu_max_addr));
988 assert(tmp == size);
989 }
990
991 /* We have two MCDs, so if neccessary we can split the region covered
992 * across both if total_size is not a power of two. */
993 total_size = gpu_max_addr - gpu_min_addr;
994 size = 1ull << ilog2(total_size);
995
996 /* Allocate the biggest chunk first as we assume gpu_max_addr has the
997 * highest alignment. */
998 addr = gpu_max_addr - size;
999 npu2_write_mcd(p, MCD0_BANK0_CN3, addr, size);
1000 total_size -= size;
1001 if (total_size) {
1002 /* total_size was not a power of two, but the remainder should
1003 * be if all GPUs were assigned the same size. */
1004 assert(is_pow2(total_size));
1005 size = 1ull << ilog2(total_size);
1006 addr -= size;
1007 assert(addr <= gpu_min_addr);
1008 npu2_write_mcd(p, MCD1_BANK0_CN3, addr, size);
1009 }
1010 }
1011
npu2_hw_init(struct npu2 * p)1012 static void npu2_hw_init(struct npu2 *p)
1013 {
1014 uint64_t reg, val;
1015 int s, b;
1016
1017 npu2_ioda_reset(&p->phb_nvlink, false);
1018
1019 /* Enable XTS retry mode */
1020 val = npu2_read(p, NPU2_XTS_CFG);
1021 npu2_write(p, NPU2_XTS_CFG, val | NPU2_XTS_CFG_MMIOSD | NPU2_XTS_CFG_TRY_ATR_RO);
1022
1023 val = npu2_read(p, NPU2_XTS_CFG2);
1024 npu2_write(p, NPU2_XTS_CFG2, val | NPU2_XTS_CFG2_NO_FLUSH_ENA);
1025
1026 /*
1027 * There are three different ways we configure the MCD and memory map.
1028 * 1) Old way
1029 * Skiboot configures the MCD and puts GPUs at 4TB and below
1030 * 2) New way with MCD
1031 * Hostboot configures the MCD and skiboot puts GPU at 4TB and above
1032 * 3) New way without MCD
1033 * No one configures the MCD and skiboot puts GPU at 4TB and below
1034 *
1035 * 1) Will go away evenutally as it's a configuration that can
1036 * cause an xstop or data integrity problems. We are keeping
1037 * it around to support existing hostboot. Print error
1038 * message if used.
1039 * 2) Is for smaller memory configurations and will be used
1040 * initially for GPUs on Witherspoon. Supports only to
1041 * 512GB of memory and 4 GPUs per socket.
1042 * 3) Is for fully populated configurations of 4TB of memory
1043 * and 6GPUs per socket. May have performance impacts.
1044 *
1045 * The different configurations can be detected via the following scoms:
1046 * 1) 0x5011c0c bit 2 = 1, 0x5011c0a bits 42:48 = 0
1047 * 2) 0x5011c0c bit 2 = 1, 0x5011c0a bits 42:48 = 7
1048 * 3) 0x5011c0c bit 2 = 0, 0x5011c0a bits 42:48 = 0
1049 */
1050
1051 /* Get 0x05011c0c bit 2 = 1 */
1052 xscom_read(p->chip_id, PB_CENT_HP_MODE_CURR, &val);
1053 if ((val & PB_CFG_CHG_RATE_GP_MASTER) != 0) {
1054 /* Get 0x05011c0a bits 42:48 */
1055 xscom_read(p->chip_id, PB_CENT_MODE, &val);
1056 if (GETFIELD(PB_CFG_CHIP_ADDR_EXTENSION_MASK_CENT, val) == 0) {
1057 /* 1) */
1058 NPU2DBG(p, "Using old memory map + MCD enabled in skiboot\n");
1059 NPU2ERR(p, "!!! Old firmware detected. Update hostboot for new MCD mapping !!!\n");
1060 p->gpu_map_type = GPU_MEM_4T_DOWN;
1061 npu2_mcd_init(p);
1062 } else if (GETFIELD(PB_CFG_CHIP_ADDR_EXTENSION_MASK_CENT, val) == 7) {
1063 /* 2) */
1064 NPU2DBG(p, "Using small memory map + MCD enabled\n");
1065 p->gpu_map_type = GPU_MEM_4T_UP;
1066 } else
1067 NPU2ERR(p, "!!! Unsupported NPU2 configuration. "
1068 "0x%llx!!!\n", val);
1069 } else {
1070 /* 3) */
1071 NPU2DBG(p, "Using large memory map + MCD disabled\n");
1072 p->gpu_map_type = GPU_MEM_4T_DOWN;
1073 }
1074
1075 /* Static initialization of every relaxed-ordering cfg[2] register */
1076 val = NPU2_RELAXED_ORDERING_CMD_CL_DMA_W |
1077 NPU2_RELAXED_ORDERING_CMD_CL_DMA_W_HP |
1078 NPU2_RELAXED_ORDERING_CMD_CL_DMA_INJ |
1079 NPU2_RELAXED_ORDERING_CMD_PR_DMA_INJ |
1080 NPU2_RELAXED_ORDERING_CMD_DMA_PR_W |
1081 NPU2_RELAXED_ORDERING_CMD_CL_RD_NC_F0 |
1082 NPU2_RELAXED_ORDERING_SOURCE4_RDENA;
1083
1084 for (s = NPU2_STACK_STCK_0; s <= NPU2_STACK_STCK_2; s++) {
1085 for (b = NPU2_BLOCK_SM_0; b <= NPU2_BLOCK_SM_3; b++) {
1086 reg = NPU2_REG_OFFSET(s, b, NPU2_RELAXED_ORDERING_CFG(2));
1087 npu2_write(p, reg, val);
1088 }
1089 }
1090 }
1091
npu2_map_pe_dma_window_real(struct phb * phb,uint64_t pe_num,uint16_t window_id,uint64_t pci_start_addr __unused,uint64_t pci_mem_size __unused)1092 static int64_t npu2_map_pe_dma_window_real(struct phb *phb,
1093 uint64_t pe_num,
1094 uint16_t window_id,
1095 uint64_t pci_start_addr __unused,
1096 uint64_t pci_mem_size __unused)
1097 {
1098 struct npu2 *p = phb_to_npu2_nvlink(phb);
1099 uint64_t tve;
1100
1101 /* Sanity check. Each PE has one corresponding TVE */
1102 if (pe_num >= NPU2_MAX_PE_NUM ||
1103 window_id != pe_num)
1104 return OPAL_PARAMETER;
1105
1106 if (pci_mem_size) {
1107 /* GPUs need to be able to access the MMIO memory space as well.
1108 * On POWER9 this is above the top of ram so disable the TVT
1109 * range check allowing access to all memory addresses. */
1110 tve = 0;
1111 } else {
1112 /* Disable */
1113 tve = PPC_BIT(51);
1114 }
1115
1116 npu2_ioda_sel(p, NPU2_ATS_IODA_TBL_TVT, window_id, false);
1117 out_be64(p->regs + NPU2_ATS_IODA_DATA, tve);
1118 p->tve_cache[window_id] = tve;
1119
1120 return OPAL_SUCCESS;
1121 }
1122
npu2_map_pe_dma_window(struct phb * phb,uint64_t pe_num,uint16_t window_id,uint16_t tce_levels,uint64_t tce_table_addr,uint64_t tce_table_size,uint64_t tce_page_size)1123 static int64_t npu2_map_pe_dma_window(struct phb *phb,
1124 uint64_t pe_num,
1125 uint16_t window_id,
1126 uint16_t tce_levels,
1127 uint64_t tce_table_addr,
1128 uint64_t tce_table_size,
1129 uint64_t tce_page_size)
1130 {
1131 struct npu2 *p = phb_to_npu2_nvlink(phb);
1132 uint64_t tts_encoded;
1133 uint64_t data64 = 0;
1134
1135 /* Sanity check. Each PE has one corresponding TVE */
1136 if (pe_num >= NPU2_MAX_PE_NUM ||
1137 window_id != pe_num)
1138 return OPAL_PARAMETER;
1139
1140 /*
1141 * Special condition, zero TCE table size used to disable
1142 * the TVE.
1143 */
1144 if (!tce_table_size) {
1145 npu2_ioda_sel(p, NPU2_ATS_IODA_TBL_TVT, window_id, false);
1146 out_be64(p->regs + NPU2_ATS_IODA_DATA, 0ul);
1147 p->tve_cache[window_id] = 0ul;
1148 return OPAL_SUCCESS;
1149 }
1150
1151 /* Additional arguments validation */
1152 if (tce_levels < 1 ||
1153 tce_levels > 4 ||
1154 !is_pow2(tce_table_size) ||
1155 tce_table_size < 0x1000)
1156 return OPAL_PARAMETER;
1157
1158 /* TCE table size */
1159 data64 = SETFIELD(NPU2_ATS_IODA_TBL_TVT_TTA, 0ul, tce_table_addr >> 12);
1160 tts_encoded = ilog2(tce_table_size) - 11;
1161 if (tts_encoded > 39)
1162 return OPAL_PARAMETER;
1163 data64 = SETFIELD(NPU2_ATS_IODA_TBL_TVT_SIZE, data64, tts_encoded);
1164
1165 /* TCE page size */
1166 switch (tce_page_size) {
1167 case 0x10000: /* 64K */
1168 data64 = SETFIELD(NPU2_ATS_IODA_TBL_TVT_PSIZE, data64, 5);
1169 break;
1170 case 0x1000000: /* 16M */
1171 data64 = SETFIELD(NPU2_ATS_IODA_TBL_TVT_PSIZE, data64, 13);
1172 break;
1173 case 0x10000000: /* 256M */
1174 data64 = SETFIELD(NPU2_ATS_IODA_TBL_TVT_PSIZE, data64, 17);
1175 break;
1176 case 0x1000: /* 4K */
1177 default:
1178 data64 = SETFIELD(NPU2_ATS_IODA_TBL_TVT_PSIZE, data64, 1);
1179 }
1180
1181 /* Number of levels */
1182 data64 = SETFIELD(NPU2_ATS_IODA_TBL_TVT_LEVEL, data64, tce_levels - 1);
1183
1184 /* Update to hardware */
1185 npu2_ioda_sel(p, NPU2_ATS_IODA_TBL_TVT, window_id, false);
1186 out_be64(p->regs + NPU2_ATS_IODA_DATA, data64);
1187 p->tve_cache[window_id] = data64;
1188
1189 return OPAL_SUCCESS;
1190 }
1191
npu2_set_pe(struct phb * phb,uint64_t pe_num,uint64_t bdfn,uint8_t bcompare,uint8_t dcompare,uint8_t fcompare,uint8_t action)1192 static int64_t npu2_set_pe(struct phb *phb,
1193 uint64_t pe_num,
1194 uint64_t bdfn,
1195 uint8_t bcompare,
1196 uint8_t dcompare,
1197 uint8_t fcompare,
1198 uint8_t action)
1199 {
1200 struct npu2 *p;
1201 struct npu2_dev *dev;
1202 uint64_t reg, val;
1203
1204 /* Sanity check */
1205 if (action != OPAL_MAP_PE && action != OPAL_UNMAP_PE)
1206 return OPAL_PARAMETER;
1207 if (pe_num >= NPU2_MAX_PE_NUM)
1208 return OPAL_PARAMETER;
1209 if (bdfn >> 8)
1210 return OPAL_PARAMETER;
1211 if (bcompare != OpalPciBusAll ||
1212 dcompare != OPAL_COMPARE_RID_DEVICE_NUMBER ||
1213 fcompare != OPAL_COMPARE_RID_FUNCTION_NUMBER)
1214 return OPAL_UNSUPPORTED;
1215 if (phb->phb_type != phb_type_npu_v2)
1216 return OPAL_PARAMETER;
1217
1218 p = phb_to_npu2_nvlink(phb);
1219 if (!p)
1220 return OPAL_PARAMETER;
1221
1222 dev = npu2_bdf_to_dev(p, bdfn);
1223 if (!dev)
1224 return OPAL_PARAMETER;
1225
1226 val = NPU2_CQ_BRICK_BDF2PE_MAP_ENABLE;
1227 val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_PE, val, pe_num);
1228 val = SETFIELD(NPU2_CQ_BRICK_BDF2PE_MAP_BDF, val, dev->nvlink.gpu_bdfn);
1229
1230 if (!NPU2DEV_BRICK(dev))
1231 reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->brick_index/2,
1232 NPU2_BLOCK_CTL, NPU2_CQ_BRICK0_BDF2PE_MAP0);
1233 else
1234 reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0 + dev->brick_index/2,
1235 NPU2_BLOCK_CTL, NPU2_CQ_BRICK1_BDF2PE_MAP0);
1236
1237 npu2_write(p, reg, val);
1238 val = NPU2_MISC_BRICK_BDF2PE_MAP_ENABLE;
1239 val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_PE, val, pe_num);
1240 val = SETFIELD(NPU2_MISC_BRICK_BDF2PE_MAP_BDF, val, dev->nvlink.gpu_bdfn);
1241 reg = NPU2_REG_OFFSET(NPU2_STACK_MISC, NPU2_BLOCK_MISC,
1242 NPU2_MISC_BRICK0_BDF2PE_MAP0 + (dev->brick_index * 0x18));
1243 npu2_write(p, reg, val);
1244
1245 return OPAL_SUCCESS;
1246 }
1247
npu2_get_link_state(struct pci_slot * slot __unused,uint8_t * val)1248 static int64_t npu2_get_link_state(struct pci_slot *slot __unused, uint8_t *val)
1249 {
1250 /*
1251 * As we're emulating all PCI stuff, the link bandwidth
1252 * isn't big deal anyway.
1253 */
1254 *val = OPAL_SHPC_LINK_UP_x1;
1255 return OPAL_SUCCESS;
1256 }
1257
npu2_get_power_state(struct pci_slot * slot __unused,uint8_t * val)1258 static int64_t npu2_get_power_state(struct pci_slot *slot __unused, uint8_t *val)
1259 {
1260 *val = PCI_SLOT_POWER_ON;
1261 return OPAL_SUCCESS;
1262 }
1263
npu2_hreset(struct pci_slot * slot __unused)1264 static int64_t npu2_hreset(struct pci_slot *slot __unused)
1265 {
1266 struct npu2 *p;
1267 int i;
1268 struct npu2_dev *ndev;
1269
1270 p = phb_to_npu2_nvlink(slot->phb);
1271 NPU2INF(p, "Hreset PHB state\n");
1272
1273 for (i = 0; i < p->total_devices; i++) {
1274 ndev = &p->devices[i];
1275 if (ndev) {
1276 NPU2DEVINF(ndev, "Resetting device\n");
1277 reset_ntl(ndev);
1278 }
1279 }
1280 return purge_l2_l3_caches();
1281 }
1282
npu2_freset(struct pci_slot * slot __unused)1283 static int64_t npu2_freset(struct pci_slot *slot __unused)
1284 {
1285 return OPAL_SUCCESS;
1286 }
1287
npu2_creset(struct pci_slot * slot)1288 static int64_t npu2_creset(struct pci_slot *slot)
1289 {
1290 struct npu2 *p;
1291 int i;
1292 struct npu2_dev *ndev;
1293
1294 p = phb_to_npu2_nvlink(slot->phb);
1295 NPU2INF(p, "Creset PHB state\n");
1296
1297 for (i = 0; i < p->total_devices; i++) {
1298 ndev = &p->devices[i];
1299 if (ndev) {
1300 NPU2DEVINF(ndev, "Resetting device\n");
1301 reset_ntl(ndev);
1302 }
1303 }
1304 return OPAL_SUCCESS;
1305 }
1306
npu2_slot_create(struct phb * phb)1307 static struct pci_slot *npu2_slot_create(struct phb *phb)
1308 {
1309 struct pci_slot *slot;
1310
1311 slot = pci_slot_alloc(phb, NULL);
1312 if (!slot)
1313 return slot;
1314
1315 /* Elementary functions */
1316 slot->ops.get_presence_state = NULL;
1317 slot->ops.get_link_state = npu2_get_link_state;
1318 slot->ops.get_power_state = npu2_get_power_state;
1319 slot->ops.get_attention_state = NULL;
1320 slot->ops.get_latch_state = NULL;
1321 slot->ops.set_power_state = NULL;
1322 slot->ops.set_attention_state = NULL;
1323
1324 slot->ops.prepare_link_change = NULL;
1325 slot->ops.poll_link = NULL;
1326 slot->ops.hreset = npu2_hreset;
1327 slot->ops.freset = npu2_freset;
1328 slot->ops.creset = npu2_creset;
1329
1330 return slot;
1331 }
1332
npu2_freeze_status(struct phb * phb __unused,uint64_t pe_number __unused,uint8_t * freeze_state,uint16_t * pci_error_type,uint16_t * severity)1333 int64_t npu2_freeze_status(struct phb *phb __unused,
1334 uint64_t pe_number __unused,
1335 uint8_t *freeze_state,
1336 uint16_t *pci_error_type,
1337 uint16_t *severity)
1338 {
1339 /*
1340 * FIXME: When it's called by skiboot PCI config accessor,
1341 * the PE number is fixed to 0, which is incorrect. We need
1342 * introduce another PHB callback to translate it. For now,
1343 * it keeps the skiboot PCI enumeration going.
1344 */
1345 *freeze_state = OPAL_EEH_STOPPED_NOT_FROZEN;
1346 *pci_error_type = OPAL_EEH_NO_ERROR;
1347 if (severity)
1348 *severity = OPAL_EEH_SEV_NO_ERROR;
1349
1350 return OPAL_SUCCESS;
1351 }
1352
npu2_eeh_next_error(struct phb * phb,uint64_t * first_frozen_pe,uint16_t * pci_error_type,uint16_t * severity)1353 static int64_t npu2_eeh_next_error(struct phb *phb,
1354 uint64_t *first_frozen_pe,
1355 uint16_t *pci_error_type,
1356 uint16_t *severity)
1357 {
1358 struct npu2 *p = phb_to_npu2_nvlink(phb);
1359 int i;
1360 uint64_t result = 0;
1361
1362 if (!first_frozen_pe || !pci_error_type || !severity)
1363 return OPAL_PARAMETER;
1364
1365 *first_frozen_pe = -1;
1366 *pci_error_type = OPAL_EEH_NO_ERROR;
1367 *severity = OPAL_EEH_SEV_NO_ERROR;
1368
1369 for (i = 0; i < NPU2_MAX_PE_NUM; i++) {
1370 result = npu2_read(p, NPU2_MISC_PESTB(i));
1371 if (result > 0) {
1372 *first_frozen_pe = i;
1373 *pci_error_type = OPAL_EEH_PE_ERROR;
1374 *severity = OPAL_EEH_SEV_PE_ER;
1375 break;
1376 }
1377 }
1378
1379 return OPAL_SUCCESS;
1380 }
1381
npu2_tce_kill(struct phb * phb,uint32_t kill_type,uint64_t pe_number,uint32_t tce_size,uint64_t dma_addr,uint32_t npages)1382 static int64_t npu2_tce_kill(struct phb *phb, uint32_t kill_type,
1383 uint64_t pe_number, uint32_t tce_size,
1384 uint64_t dma_addr, uint32_t npages)
1385 {
1386 struct npu2 *npu = phb_to_npu2_nvlink(phb);
1387 uint32_t tce_page_size;
1388 uint64_t val;
1389
1390 if (pe_number > NPU2_MAX_PE_NUM)
1391 return OPAL_PARAMETER;
1392
1393 sync();
1394 switch(kill_type) {
1395 case OPAL_PCI_TCE_KILL_PAGES:
1396 tce_page_size = 1ULL << (
1397 11 + GETFIELD(npu->tve_cache[pe_number],
1398 NPU2_ATS_IODA_TBL_TVT_PSIZE));
1399 if (tce_page_size != tce_size) {
1400 NPU2ERR(npu, "npu2_tce_kill: Unexpected TCE size (got 0x%x expected 0x%x)\n",
1401 tce_size, tce_page_size);
1402 return OPAL_PARAMETER;
1403 }
1404
1405 while (npages--) {
1406 val = SETFIELD(NPU2_ATS_TCE_KILL_PENUM, dma_addr, pe_number);
1407 npu2_write(npu, NPU2_ATS_TCE_KILL, NPU2_ATS_TCE_KILL_ONE | val);
1408 dma_addr += tce_size;
1409 }
1410 break;
1411 case OPAL_PCI_TCE_KILL_PE:
1412 /*
1413 * NPU2 doesn't support killing a PE so fall through
1414 * and do a kill all instead.
1415 */
1416 case OPAL_PCI_TCE_KILL_ALL:
1417 npu2_write(npu, NPU2_ATS_TCE_KILL, NPU2_ATS_TCE_KILL_ALL);
1418 break;
1419 default:
1420 return OPAL_PARAMETER;
1421 }
1422
1423 return OPAL_SUCCESS;
1424 }
1425
1426 static const struct phb_ops npu_ops = {
1427 .cfg_read8 = npu2_cfg_read8,
1428 .cfg_read16 = npu2_cfg_read16,
1429 .cfg_read32 = npu2_cfg_read32,
1430 .cfg_write8 = npu2_cfg_write8,
1431 .cfg_write16 = npu2_cfg_write16,
1432 .cfg_write32 = npu2_cfg_write32,
1433 .choose_bus = NULL,
1434 .device_init = NULL,
1435 .phb_final_fixup = npu2_phb_final_fixup,
1436 .ioda_reset = npu2_ioda_reset,
1437 .papr_errinjct_reset = NULL,
1438 .pci_reinit = NULL,
1439 .set_phb_mem_window = NULL,
1440 .phb_mmio_enable = NULL,
1441 .map_pe_mmio_window = NULL,
1442 .map_pe_dma_window = npu2_map_pe_dma_window,
1443 .map_pe_dma_window_real = npu2_map_pe_dma_window_real,
1444 .pci_msi_eoi = NULL,
1445 .set_xive_pe = NULL,
1446 .get_msi_32 = NULL,
1447 .get_msi_64 = NULL,
1448 .set_pe = npu2_set_pe,
1449 .set_peltv = NULL,
1450 .eeh_freeze_status = npu2_freeze_status,
1451 .eeh_freeze_clear = NULL,
1452 .eeh_freeze_set = NULL,
1453 .next_error = npu2_eeh_next_error,
1454 .err_inject = NULL,
1455 .get_diag_data2 = NULL,
1456 .set_capi_mode = NULL,
1457 .set_capp_recovery = NULL,
1458 .tce_kill = npu2_tce_kill,
1459 };
1460
assign_mmio_bars(uint64_t gcid,uint32_t scom,uint64_t reg[2],uint64_t mm_win[2])1461 static void assign_mmio_bars(uint64_t gcid, uint32_t scom, uint64_t reg[2], uint64_t mm_win[2])
1462 {
1463 uint32_t i;
1464 struct npu2_bar *bar;
1465 struct npu2_bar npu2_bars[] = {
1466 /* NPU_REGS must be first in this list */
1467 { .type = NPU_REGS, .index = 0,
1468 .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_PHY_BAR),
1469 .flags = NPU2_BAR_FLAG_ENABLED },
1470 { .type = NPU_PHY, .index = 0,
1471 .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_PHY_BAR),
1472 .flags = NPU2_BAR_FLAG_ENABLED },
1473 { .type = NPU_PHY, .index = 1,
1474 .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_PHY_BAR),
1475 .flags = NPU2_BAR_FLAG_ENABLED },
1476 { .type = NPU_NTL, .index = 0,
1477 .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_NTL0_BAR) },
1478 { .type = NPU_NTL, .index = 1,
1479 .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_NTL1_BAR) },
1480 { .type = NPU_NTL, .index = 2,
1481 .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_NTL0_BAR) },
1482 { .type = NPU_NTL, .index = 3,
1483 .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_NTL1_BAR) },
1484 { .type = NPU_NTL, .index = 4,
1485 .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_NTL0_BAR) },
1486 { .type = NPU_NTL, .index = 5,
1487 .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_NTL1_BAR) },
1488 { .type = NPU_GENID, .index = 0,
1489 .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_0, 0, NPU2_GENID_BAR) },
1490 { .type = NPU_GENID, .index = 1,
1491 .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_1, 0, NPU2_GENID_BAR) },
1492 { .type = NPU_GENID, .index = 2,
1493 .reg = NPU2_REG_OFFSET(NPU2_STACK_STCK_2, 0, NPU2_GENID_BAR) },
1494 };
1495
1496 for (i = 0; i < ARRAY_SIZE(npu2_bars); i++) {
1497 bar = &npu2_bars[i];
1498 npu2_get_bar(gcid, bar);
1499 npu2_write_bar(NULL, bar, gcid, scom);
1500 }
1501
1502 /* Global MMIO BAR */
1503 reg[0] = npu2_bars[0].base;
1504 reg[1] = npu2_bars[0].size;
1505
1506 /* NTL and GENID BARs are exposed to kernel via the mm
1507 * window */
1508 mm_win[0] = npu2_bars[3].base;
1509 mm_win[1] = npu2_bars[ARRAY_SIZE(npu2_bars) - 1].base +
1510 npu2_bars[ARRAY_SIZE(npu2_bars) - 1].size -
1511 mm_win[0];
1512 }
1513
1514 /*
1515 * Set up NPU for NVLink and create PCI root device node
1516 * accordingly.
1517 */
npu2_nvlink_init_npu(struct npu2 * npu)1518 int npu2_nvlink_init_npu(struct npu2 *npu)
1519 {
1520 struct dt_node *np;
1521 uint64_t reg[2], mm_win[2], val, mask;
1522
1523 /* TODO: Clean this up with register names, etc. when we get
1524 * time. This just turns NVLink mode on in each brick and should
1525 * get replaced with a patch from ajd once we've worked out how
1526 * things are going to work there.
1527 *
1528 * Obviously if the year is now 2020 that didn't happen and you
1529 * should fix this :-) */
1530
1531 val = PPC_BIT(58);
1532 mask = PPC_BIT(58) | /* CONFIG_NVLINK_MODE */
1533 PPC_BIT(40); /* CONFIG_ENABLE_SNARF_CPM */
1534
1535 /*
1536 * V100 GPUs are known to violate NVLink2 protocol if some GPU memory
1537 * mapped by a CPU was also "linear-block" mapped by a GPU. When this
1538 * happens, it breaks the NPU2 cache coherency state machine and
1539 * it throws machine checkstop. Disabling snarfing fixes this so let's
1540 * disable it by default.
1541 */
1542 if (nvram_query_eq_dangerous("opal-npu2-snarf-cpm", "enable")) {
1543 prlog(PR_WARNING, "NPU2#%d: enabling Probe.I.MO snarfing, a bad GPU driver may crash the system!\n",
1544 npu->index);
1545 val |= PPC_BIT(40); /* CONFIG_ENABLE_SNARF_CPM */
1546 }
1547
1548 xscom_write_mask(npu->chip_id, NPU_STCK0_CS_SM0_MISC_CONFIG0,
1549 val, mask);
1550 xscom_write_mask(npu->chip_id, NPU_STCK0_CS_SM1_MISC_CONFIG0,
1551 val, mask);
1552 xscom_write_mask(npu->chip_id, NPU_STCK0_CS_SM2_MISC_CONFIG0,
1553 val, mask);
1554 xscom_write_mask(npu->chip_id, NPU_STCK0_CS_SM3_MISC_CONFIG0,
1555 val, mask);
1556 xscom_write_mask(npu->chip_id, NPU_STCK1_CS_SM0_MISC_CONFIG0,
1557 val, mask);
1558 xscom_write_mask(npu->chip_id, NPU_STCK1_CS_SM1_MISC_CONFIG0,
1559 val, mask);
1560 xscom_write_mask(npu->chip_id, NPU_STCK1_CS_SM2_MISC_CONFIG0,
1561 val, mask);
1562 xscom_write_mask(npu->chip_id, NPU_STCK1_CS_SM3_MISC_CONFIG0,
1563 val, mask);
1564 xscom_write_mask(npu->chip_id, NPU_STCK2_CS_SM0_MISC_CONFIG0,
1565 val, mask);
1566 xscom_write_mask(npu->chip_id, NPU_STCK2_CS_SM1_MISC_CONFIG0,
1567 val, mask);
1568 xscom_write_mask(npu->chip_id, NPU_STCK2_CS_SM2_MISC_CONFIG0,
1569 val, mask);
1570 xscom_write_mask(npu->chip_id, NPU_STCK2_CS_SM3_MISC_CONFIG0,
1571 val, mask);
1572
1573 xscom_write_mask(npu->chip_id, 0x50110c0, PPC_BIT(53), PPC_BIT(53));
1574 xscom_write_mask(npu->chip_id, 0x50112c0, PPC_BIT(53), PPC_BIT(53));
1575 xscom_write_mask(npu->chip_id, 0x50114c0, PPC_BIT(53), PPC_BIT(53));
1576 xscom_write_mask(npu->chip_id, 0x50110f1, PPC_BIT(41), PPC_BIT(41));
1577 xscom_write_mask(npu->chip_id, 0x50112f1, PPC_BIT(41), PPC_BIT(41));
1578 xscom_write_mask(npu->chip_id, 0x50114f1, PPC_BIT(41), PPC_BIT(41));
1579
1580 val = NPU2_NTL_MISC_CFG2_BRICK_ENABLE |
1581 NPU2_NTL_MISC_CFG2_NDL_TX_PARITY_ENA |
1582 NPU2_NTL_MISC_CFG2_NDL_PRI_PARITY_ENA |
1583 NPU2_NTL_MISC_CFG2_RCV_CREDIT_OVERFLOW_ENA;
1584 xscom_write_mask(npu->chip_id, 0x5011110, val, val);
1585 xscom_write_mask(npu->chip_id, 0x5011130, val, val);
1586 xscom_write_mask(npu->chip_id, 0x5011310, val, val);
1587 xscom_write_mask(npu->chip_id, 0x5011330, val, val);
1588 xscom_write_mask(npu->chip_id, 0x5011510, val, val);
1589 xscom_write_mask(npu->chip_id, 0x5011530, val, val);
1590
1591 val = PPC_BIT(6) | PPC_BIT(7) | PPC_BIT(11);
1592 xscom_write_mask(npu->chip_id, 0x5011009, val, PPC_BITMASK(6,11));
1593 xscom_write_mask(npu->chip_id, 0x5011039, val, PPC_BITMASK(6,11));
1594 xscom_write_mask(npu->chip_id, 0x5011069, val, PPC_BITMASK(6,11));
1595 xscom_write_mask(npu->chip_id, 0x5011099, val, PPC_BITMASK(6,11));
1596 xscom_write_mask(npu->chip_id, 0x5011209, val, PPC_BITMASK(6,11));
1597 xscom_write_mask(npu->chip_id, 0x5011239, val, PPC_BITMASK(6,11));
1598 xscom_write_mask(npu->chip_id, 0x5011269, val, PPC_BITMASK(6,11));
1599 xscom_write_mask(npu->chip_id, 0x5011299, val, PPC_BITMASK(6,11));
1600 xscom_write_mask(npu->chip_id, 0x5011409, val, PPC_BITMASK(6,11));
1601 xscom_write_mask(npu->chip_id, 0x5011439, val, PPC_BITMASK(6,11));
1602 xscom_write_mask(npu->chip_id, 0x5011469, val, PPC_BITMASK(6,11));
1603 xscom_write_mask(npu->chip_id, 0x5011499, val, PPC_BITMASK(6,11));
1604
1605 /* Reassign the BARs */
1606 assign_mmio_bars(npu->chip_id, npu->xscom_base, reg, mm_win);
1607 npu->regs = (void *)reg[0];
1608 npu->mm_base = mm_win[0];
1609 npu->mm_size = mm_win[1];
1610
1611 if (reg[0] && reg[1])
1612 prlog(PR_INFO, " Global MMIO BAR: %016llx (%lldMB)\n",
1613 reg[0], reg[1] >> 20);
1614 else
1615 prlog(PR_ERR, " Global MMIO BAR: Disabled\n");
1616
1617 /* Populate PCI root device node */
1618 np = dt_new_addr(dt_root, "pciex", reg[0]);
1619 assert(np);
1620 dt_add_property_strings(np,
1621 "compatible",
1622 "ibm,power9-npu-pciex",
1623 "ibm,ioda2-npu2-phb");
1624 dt_add_property_strings(np, "device_type", "pciex");
1625 dt_add_property(np, "reg", reg, sizeof(reg));
1626 dt_add_property_cells(np, "ibm,phb-index", npu->phb_index);
1627 dt_add_property_cells(np, "ibm,npu-index", npu->index);
1628 dt_add_property_cells(np, "ibm,chip-id", npu->chip_id);
1629 dt_add_property_cells(np, "ibm,xscom-base", npu->xscom_base);
1630 dt_add_property_cells(np, "ibm,npcq", npu->dt_node->phandle);
1631 dt_add_property_cells(np, "ibm,links", npu->total_devices);
1632 dt_add_property(np, "ibm,mmio-window", mm_win, sizeof(mm_win));
1633 dt_add_property_cells(np, "ibm,phb-diag-data-size", 0);
1634
1635 /* Disable fast reboot - not currently supported */
1636 disable_fast_reboot("NVLink device enabled");
1637
1638 npu2_nvlink_create_phb(npu, np);
1639
1640 return 0;
1641 }
1642
npu2_populate_pcie_cap(struct npu2_dev * dev,uint32_t start,uint32_t prev_cap)1643 static uint32_t npu2_populate_pcie_cap(struct npu2_dev *dev,
1644 uint32_t start,
1645 uint32_t prev_cap)
1646 {
1647 struct pci_virt_device *pvd = dev->nvlink.pvd;
1648 uint32_t val;
1649
1650 /* Add capability list */
1651 PCI_VIRT_CFG_INIT_RO(pvd, prev_cap, 1, start);
1652 PCI_VIRT_CFG_INIT_RO(pvd, start, 1, PCI_CFG_CAP_ID_EXP);
1653
1654 /* 0x00 - ID/PCIE capability */
1655 val = PCI_CFG_CAP_ID_EXP;
1656 val |= ((0x2 << 16) | (PCIE_TYPE_ENDPOINT << 20));
1657 PCI_VIRT_CFG_INIT_RO(pvd, start, 4, val);
1658
1659 /* 0x04 - Device capability
1660 *
1661 * We should support FLR. Otherwise, it might have
1662 * problem passing it through to userland via Linux
1663 * VFIO infrastructure
1664 */
1665 val = ((PCIE_MPSS_128) |
1666 (PCIE_PHANTOM_NONE << 3) |
1667 (PCIE_L0SL_MAX_NO_LIMIT << 6) |
1668 (PCIE_L1L_MAX_NO_LIMIT << 9) |
1669 (PCICAP_EXP_DEVCAP_FUNC_RESET));
1670 PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_DEVCAP, 4, val);
1671
1672 pci_virt_add_filter(pvd, start + PCICAP_EXP_DEVCTL, 2,
1673 PCI_REG_FLAG_WRITE,
1674 npu2_dev_cfg_exp_devcap, NULL);
1675
1676 /* 0x08 - Device control and status */
1677 PCI_VIRT_CFG_INIT(pvd, start + PCICAP_EXP_DEVCTL, 4, 0x00002810,
1678 0xffff0000, 0x000f0000);
1679
1680 /* 0x0c - Link capability */
1681 val = (PCIE_LSPEED_VECBIT_2 | (PCIE_LWIDTH_1X << 4));
1682 PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_LCAP, 4, val);
1683
1684 /* 0x10 - Link control and status */
1685 PCI_VIRT_CFG_INIT(pvd, start + PCICAP_EXP_LCTL, 4, 0x00130000,
1686 0xfffff000, 0xc0000000);
1687
1688 /* 0x14 - Slot capability */
1689 PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_SLOTCAP, 4, 0x00000000);
1690
1691 /* 0x18 - Slot control and status */
1692 PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_SLOTCTL, 4, 0x00000000);
1693
1694 /* 0x1c - Root control and capability */
1695 PCI_VIRT_CFG_INIT(pvd, start + PCICAP_EXP_RC, 4, 0x00000000,
1696 0xffffffe0, 0x00000000);
1697
1698 /* 0x20 - Root status */
1699 PCI_VIRT_CFG_INIT(pvd, start + PCICAP_EXP_RSTAT, 4, 0x00000000,
1700 0xffffffff, 0x00010000);
1701
1702 /* 0x24 - Device capability 2 */
1703 PCI_VIRT_CFG_INIT_RO(pvd, start + PCIECAP_EXP_DCAP2, 4, 0x00000000);
1704
1705 /* 0x28 - Device Control and status 2 */
1706 PCI_VIRT_CFG_INIT(pvd, start + PCICAP_EXP_DCTL2, 4, 0x00070000,
1707 0xffff0000, 0x00000000);
1708
1709 /* 0x2c - Link capability 2 */
1710 PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_LCAP2, 4, 0x00000007);
1711
1712 /* 0x30 - Link control and status 2 */
1713 PCI_VIRT_CFG_INIT(pvd, start + PCICAP_EXP_LCTL2, 4, 0x00000003,
1714 0xffff0000, 0x00200000);
1715
1716 /* 0x34 - Slot capability 2 */
1717 PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_SCAP2, 4, 0x00000000);
1718
1719 /* 0x38 - Slot control and status 2 */
1720 PCI_VIRT_CFG_INIT_RO(pvd, start + PCICAP_EXP_SCTL2, 4, 0x00000000);
1721
1722 return start + PCICAP_EXP_SCTL2 + 8;
1723 }
1724
npu2_populate_vendor_cap(struct npu2_dev * dev,uint32_t start,uint32_t prev_cap)1725 static uint32_t npu2_populate_vendor_cap(struct npu2_dev *dev,
1726 uint32_t start,
1727 uint32_t prev_cap)
1728 {
1729 struct pci_virt_device *pvd = dev->nvlink.pvd;
1730
1731 /* Capbility list */
1732 PCI_VIRT_CFG_INIT_RO(pvd, prev_cap, 1, start);
1733 PCI_VIRT_CFG_INIT_RO(pvd, start, 1, PCI_CFG_CAP_ID_VENDOR);
1734
1735 /* Length and version */
1736 PCI_VIRT_CFG_INIT_RO(pvd, start + 2, 1, VENDOR_CAP_LEN);
1737 PCI_VIRT_CFG_INIT_RO(pvd, start + 3, 1, VENDOR_CAP_VERSION);
1738
1739 /*
1740 * Defaults when the trap can't handle the read/write (eg. due
1741 * to reading/writing less than 4 bytes).
1742 */
1743 PCI_VIRT_CFG_INIT_RO(pvd, start + 4, 4, 0);
1744 PCI_VIRT_CFG_INIT_RO(pvd, start + 8, 4, 0);
1745
1746 /* Add NVLink2 PHY procedures trap */
1747 pci_virt_add_filter(pvd, start + 4, 8,
1748 PCI_REG_FLAG_READ | PCI_REG_FLAG_WRITE,
1749 npu2_dev_procedure,
1750 NULL);
1751
1752 /* Link index */
1753 PCI_VIRT_CFG_INIT_RO(pvd, start + 0xc, 1, dev->link_index);
1754
1755 return start + VENDOR_CAP_LEN;
1756 }
1757
npu2_populate_cfg(struct npu2_dev * dev)1758 static void npu2_populate_cfg(struct npu2_dev *dev)
1759 {
1760 struct pci_virt_device *pvd = dev->nvlink.pvd;
1761 struct npu2_pcie_bar *bar;
1762 uint32_t pos;
1763
1764 /* 0x00 - Vendor/Device ID */
1765 PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_VENDOR_ID, 4, 0x04ea1014);
1766
1767 /* 0x04 - Command/Status */
1768 PCI_VIRT_CFG_INIT(pvd, PCI_CFG_CMD, 4, 0x00100000, 0xffb802b8,
1769 0xf9000000);
1770
1771 pci_virt_add_filter(pvd, PCI_CFG_CMD, 1, PCI_REG_FLAG_WRITE,
1772 npu2_cfg_write_cmd, NULL);
1773
1774 /* 0x08 - Rev/Class/Cache */
1775 PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_REV_ID, 4, 0x06800101);
1776
1777 /* 0x0c - CLS/Latency Timer/Header/BIST */
1778 PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_CACHE_LINE_SIZE, 4, 0x00800000);
1779
1780 /* 0x10/14 - BAR#0, NTL BAR */
1781 bar = &dev->bars[0];
1782 PCI_VIRT_CFG_INIT(pvd, PCI_CFG_BAR0, 4,
1783 (bar->npu2_bar.base & 0xfffffff0) | (bar->flags & 0xF),
1784 0x0000000f, 0x00000000);
1785 PCI_VIRT_CFG_INIT(pvd, PCI_CFG_BAR1, 4, (bar->npu2_bar.base >> 32),
1786 0x00000000, 0x00000000);
1787 pci_virt_add_filter(pvd, PCI_CFG_BAR0, 8,
1788 PCI_REG_FLAG_READ | PCI_REG_FLAG_WRITE,
1789 npu2_dev_cfg_bar, bar);
1790
1791 /* 0x18/1c - BAR#1, GENID BAR */
1792 bar = &dev->bars[1];
1793 if (NPU2DEV_BRICK(dev) == 0)
1794 PCI_VIRT_CFG_INIT(pvd, PCI_CFG_BAR2, 4, (bar->npu2_bar.base & 0xfffffff0) |
1795 (bar->flags & 0xF),
1796 0x0000000f, 0x00000000);
1797 else
1798 /* Brick 1 gets the upper portion of the generation id register */
1799 PCI_VIRT_CFG_INIT(pvd, PCI_CFG_BAR2, 4, ((bar->npu2_bar.base + 0x10000) & 0xfffffff0) |
1800 (bar->flags & 0xF),
1801 0x0000000f, 0x00000000);
1802
1803 PCI_VIRT_CFG_INIT(pvd, PCI_CFG_BAR3, 4, (bar->npu2_bar.base >> 32), 0x00000000,
1804 0x00000000);
1805 pci_virt_add_filter(pvd, PCI_CFG_BAR2, 8,
1806 PCI_REG_FLAG_READ | PCI_REG_FLAG_WRITE,
1807 npu2_dev_cfg_bar, bar);
1808
1809 /* 0x20/0x24 - BARs, disabled */
1810 PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_BAR4, 4, 0x00000000);
1811 PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_BAR5, 4, 0x00000000);
1812
1813 /* 0x28 - Cardbus CIS pointer */
1814 PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_CARDBUS_CIS, 4, 0x00000000);
1815
1816 /* 0x2c - Subsystem ID */
1817 PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_SUBSYS_VENDOR_ID, 4, 0x00000000);
1818
1819 /* 0x30 - ROM BAR, zero sized */
1820 PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_ROMBAR, 4, 0xffffffff);
1821
1822 /* 0x34 - PCI Capability */
1823 PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_CAP, 4, 0x00000000);
1824
1825 /* 0x38 - Reserved */
1826 PCI_VIRT_CFG_INIT_RO(pvd, 0x38, 4, 0x00000000);
1827
1828 /* 0x3c - INT line/pin/Minimal grant/Maximal latency */
1829 PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_INT_LINE, 4, 0x00000100); /* INT A */
1830
1831 /* PCIE and vendor specific capability */
1832 pos = npu2_populate_pcie_cap(dev, 0x40, PCI_CFG_CAP);
1833 pos = npu2_populate_vendor_cap(dev, pos, 0x41);
1834 PCI_VIRT_CFG_INIT_RO(pvd, pos + 1, 1, 0);
1835 }
1836
npu_allocate_bdfn(struct npu2 * p,uint32_t group)1837 static uint32_t npu_allocate_bdfn(struct npu2 *p, uint32_t group)
1838 {
1839 int i;
1840 int bdfn = (group << 3);
1841
1842 for (i = 0; i < p->total_devices; i++) {
1843 if ((p->devices[i].bdfn & 0xf8) == (bdfn & 0xf8))
1844 bdfn++;
1845 }
1846
1847 return bdfn;
1848 }
1849
npu2_populate_devices(struct npu2 * p,struct dt_node * dn)1850 static void npu2_populate_devices(struct npu2 *p,
1851 struct dt_node *dn)
1852 {
1853 struct npu2_dev *dev;
1854 struct dt_node *npu2_dn, *link;
1855 uint32_t npu_phandle, index = 0;
1856 int stack;
1857
1858 /*
1859 * Get the npu node which has the links which we expand here
1860 * into pci like devices attached to our emulated phb.
1861 */
1862 npu_phandle = dt_prop_get_u32(dn, "ibm,npcq");
1863 npu2_dn = dt_find_by_phandle(dt_root, npu_phandle);
1864 assert(npu2_dn);
1865
1866 /* Walk the link@x nodes to initialize devices */
1867 p->total_devices = 0;
1868 p->phb_nvlink.scan_map = 0;
1869 dt_for_each_compatible(npu2_dn, link, "ibm,npu-link") {
1870 uint32_t group_id;
1871 struct npu2_bar *npu2_bar;
1872
1873 dev = &p->devices[index];
1874 dev->type = NPU2_DEV_TYPE_NVLINK;
1875 dev->npu = p;
1876 dev->dt_node = link;
1877 dev->link_index = dt_prop_get_u32(link, "ibm,npu-link-index");
1878 dev->brick_index = dev->link_index;
1879
1880 group_id = dt_prop_get_u32(link, "ibm,npu-group-id");
1881 dev->bdfn = npu_allocate_bdfn(p, group_id);
1882
1883 /* This must be done after calling
1884 * npu_allocate_bdfn() */
1885 p->total_devices++;
1886 p->phb_nvlink.scan_map |= 0x1 << ((dev->bdfn & 0xf8) >> 3);
1887
1888 dev->pl_xscom_base = dt_prop_get_u64(link, "ibm,npu-phy");
1889 dev->lane_mask = dt_prop_get_u32(link, "ibm,npu-lane-mask");
1890
1891 /* Populate BARs. BAR0/1 is the NTL bar. */
1892 stack = NPU2_STACK_STCK_0 + NPU2DEV_STACK(dev);
1893 npu2_bar = &dev->bars[0].npu2_bar;
1894 npu2_bar->type = NPU_NTL;
1895 npu2_bar->index = dev->brick_index;
1896 npu2_bar->reg = NPU2_REG_OFFSET(stack, 0, NPU2DEV_BRICK(dev) == 0 ?
1897 NPU2_NTL0_BAR : NPU2_NTL1_BAR);
1898 npu2_get_bar(p->chip_id, npu2_bar);
1899
1900 dev->bars[0].flags = PCI_CFG_BAR_TYPE_MEM | PCI_CFG_BAR_MEM64;
1901
1902 /* BAR2/3 is the GENID bar. */
1903 npu2_bar = &dev->bars[1].npu2_bar;
1904 npu2_bar->type = NPU_GENID;
1905 npu2_bar->index = NPU2DEV_STACK(dev);
1906 npu2_bar->reg = NPU2_REG_OFFSET(stack, 0, NPU2_GENID_BAR);
1907 npu2_get_bar(p->chip_id, npu2_bar);
1908
1909 /* The GENID is a single physical BAR that we split
1910 * for each emulated device */
1911 npu2_bar->size = 0x10000;
1912 if (NPU2DEV_BRICK(dev))
1913 npu2_bar->base += 0x10000;
1914 dev->bars[1].flags = PCI_CFG_BAR_TYPE_MEM | PCI_CFG_BAR_MEM64;
1915
1916 /* Initialize PCI virtual device */
1917 dev->nvlink.pvd = pci_virt_add_device(&p->phb_nvlink, dev->bdfn, 0x100, dev);
1918 if (dev->nvlink.pvd)
1919 npu2_populate_cfg(dev);
1920
1921 index++;
1922 }
1923 }
1924
npu2_add_interrupt_map(struct npu2 * p,struct dt_node * dn)1925 static void npu2_add_interrupt_map(struct npu2 *p,
1926 struct dt_node *dn)
1927 {
1928 struct dt_node *npu2_dn, *link, *phb_dn;
1929 uint32_t npu2_phandle, index = 0, i;
1930 uint32_t icsp = get_ics_phandle();
1931 uint32_t *map;
1932 size_t map_size;
1933 uint32_t mask[] = {0xff00, 0x0, 0x0, 0x7};
1934
1935 assert(p->phb_nvlink.dt_node);
1936 phb_dn = p->phb_nvlink.dt_node;
1937
1938 npu2_phandle = dt_prop_get_u32(dn, "ibm,npcq");
1939 npu2_dn = dt_find_by_phandle(dt_root, npu2_phandle);
1940 assert(npu2_dn);
1941 map_size = 7 * sizeof(*map) * p->total_devices;
1942 map = malloc(map_size);
1943 index = 0;
1944 dt_for_each_compatible(npu2_dn, link, "ibm,npu-link") {
1945 i = index * 7;
1946 map[i + 0] = (p->devices[index].bdfn << 8);
1947 map[i + 1] = 0;
1948 map[i + 2] = 0;
1949
1950 map[i + 3] = 1; /* INT A */
1951 map[i + 4] = icsp; /* interrupt-parent */
1952 map[i + 5] = p->base_lsi + (index * 2) + 1; /* NDL No-Stall Event */
1953 map[i + 6] = 0; /* 0 = EDGE, 1 = LEVEL. */
1954 index++;
1955 }
1956 dt_add_property(phb_dn, "interrupt-map", map, map_size);
1957 free(map);
1958 dt_add_property(phb_dn, "interrupt-map-mask", mask, sizeof(mask));
1959 }
1960
npu2_add_phb_properties(struct npu2 * p)1961 static void npu2_add_phb_properties(struct npu2 *p)
1962 {
1963 struct dt_node *np = p->phb_nvlink.dt_node;
1964 uint32_t icsp = get_ics_phandle();
1965 uint64_t mm_base, mm_size;
1966
1967 /*
1968 * Add various properties that HB doesn't have to
1969 * add, some of them simply because they result from
1970 * policy decisions made in skiboot rather than in HB
1971 * such as the MMIO windows going to PCI, interrupts,
1972 * etc.
1973 */
1974 dt_add_property_cells(np, "#address-cells", 3);
1975 dt_add_property_cells(np, "#size-cells", 2);
1976 dt_add_property_cells(np, "#interrupt-cells", 1);
1977 dt_add_property_cells(np, "bus-range", 0, 0xff);
1978 dt_add_property_cells(np, "clock-frequency", 0x200, 0);
1979 dt_add_property_cells(np, "interrupt-parent", icsp);
1980
1981 /* NPU2 PHB properties */
1982 dt_add_property_cells(np, "ibm,opal-num-pes",
1983 NPU2_MAX_PE_NUM);
1984 dt_add_property_cells(np, "ibm,opal-reserved-pe",
1985 NPU2_RESERVED_PE_NUM);
1986 dt_add_property_cells(np, "ibm,supported-tce-sizes",
1987 12, // 4K
1988 16, // 64K
1989 24, // 16M
1990 28); // 256M
1991
1992 dt_add_property_u64s(np, "ibm,mmio-atsd",
1993 MMIO_ATSD_ADDR(p->regs, 0),
1994 MMIO_ATSD_ADDR(p->regs, 1),
1995 MMIO_ATSD_ADDR(p->regs, 2),
1996 MMIO_ATSD_ADDR(p->regs, 3),
1997 MMIO_ATSD_ADDR(p->regs, 4),
1998 MMIO_ATSD_ADDR(p->regs, 5),
1999 MMIO_ATSD_ADDR(p->regs, 6),
2000 MMIO_ATSD_ADDR(p->regs, 7));
2001
2002 /*
2003 * Memory window is exposed as 64-bits non-prefetchable
2004 * one because 64-bits prefetchable one is kind of special
2005 * to kernel.
2006 */
2007 mm_base = p->mm_base;
2008 mm_size = p->mm_size;
2009 dt_add_property_cells(np, "ranges", 0x02000000,
2010 hi32(mm_base), lo32(mm_base),
2011 hi32(mm_base), lo32(mm_base),
2012 hi32(mm_size), lo32(mm_size));
2013 }
2014
npu2_nvlink_create_phb(struct npu2 * npu,struct dt_node * dn)2015 void npu2_nvlink_create_phb(struct npu2 *npu, struct dt_node *dn)
2016 {
2017 struct pci_slot *slot;
2018
2019 /* Generic PHB */
2020 npu->phb_nvlink.dt_node = dn;
2021 npu->phb_nvlink.ops = &npu_ops;
2022 npu->phb_nvlink.phb_type = phb_type_npu_v2;
2023 init_lock(&npu->lock);
2024 init_lock(&npu->phb_nvlink.lock);
2025 list_head_init(&npu->phb_nvlink.devices);
2026 list_head_init(&npu->phb_nvlink.virt_devices);
2027
2028 npu2_populate_devices(npu, dn);
2029 npu2_add_interrupt_map(npu, dn);
2030 npu2_add_phb_properties(npu);
2031
2032 slot = npu2_slot_create(&npu->phb_nvlink);
2033 if (!slot)
2034 {
2035 /**
2036 * @fwts-label NPUCannotCreatePHBSlot
2037 * @fwts-advice Firmware probably ran out of memory creating
2038 * NPU2 slot. NVLink functionality could be broken.
2039 */
2040 prlog(PR_ERR, "NPU: Cannot create PHB slot\n");
2041 }
2042
2043 pci_register_phb(&npu->phb_nvlink, OPAL_DYNAMIC_PHB_ID);
2044
2045 npu2_init_ioda_cache(npu);
2046 npu2_hw_init(npu);
2047 }
2048
2049 /*
2050 * Search a table for an entry with matching value under mask. Returns
2051 * the index and the current value in *value.
2052 */
npu_table_search(struct npu2 * p,uint64_t table_addr,int stride,int table_size,uint64_t * value,uint64_t mask)2053 static int npu_table_search(struct npu2 *p, uint64_t table_addr, int stride,
2054 int table_size, uint64_t *value, uint64_t mask)
2055 {
2056 int i;
2057 uint64_t val;
2058
2059 assert(value);
2060
2061 for (i = 0; i < table_size; i++) {
2062 val = npu2_read(p, table_addr + i*stride);
2063 if ((val & mask) == *value) {
2064 *value = val;
2065 return i;
2066 }
2067 }
2068
2069 return -1;
2070 }
2071
2072 /*
2073 * Allocate a context ID and initialise the tables with the relevant
2074 * information. Returns the ID on or error if one couldn't be
2075 * allocated.
2076 */
2077 #define NPU2_VALID_ATS_MSR_BITS (MSR_DR | MSR_HV | MSR_PR | MSR_SF)
opal_npu_init_context(uint64_t phb_id,int pasid __unused,uint64_t msr,uint64_t bdf)2078 static int64_t opal_npu_init_context(uint64_t phb_id, int pasid __unused,
2079 uint64_t msr, uint64_t bdf)
2080 {
2081 struct phb *phb = pci_get_phb(phb_id);
2082 struct npu2 *p;
2083 uint64_t xts_bdf, old_xts_bdf_pid, xts_bdf_pid;
2084 int id;
2085
2086 if (!phb || phb->phb_type != phb_type_npu_v2)
2087 return OPAL_PARAMETER;
2088
2089 /*
2090 * MSR bits should be masked by the caller to allow for future
2091 * expansion if required.
2092 */
2093 if (msr & ~NPU2_VALID_ATS_MSR_BITS)
2094 return OPAL_UNSUPPORTED;
2095
2096 /*
2097 * Need to get LPARSHORT.
2098 */
2099 p = phb_to_npu2_nvlink(phb);
2100 lock(&p->lock);
2101 xts_bdf = SETFIELD(NPU2_XTS_BDF_MAP_BDF, 0ul, bdf);
2102 if (npu_table_search(p, NPU2_XTS_BDF_MAP, 8, NPU2_XTS_BDF_MAP_SIZE,
2103 &xts_bdf, NPU2_XTS_BDF_MAP_BDF) < 0) {
2104 NPU2ERR(p, "LPARID not associated with any GPU\n");
2105 id = OPAL_PARAMETER;
2106 goto out;
2107 }
2108
2109 id = GETFIELD(NPU2_XTS_BDF_MAP_LPARSHORT, xts_bdf);
2110 NPU2DBG(p, "Found LPARSHORT = 0x%x for BDF = 0x%03llx\n", id, bdf);
2111
2112 /* Enable this mapping for both real and virtual addresses */
2113 xts_bdf_pid = SETFIELD(NPU2_XTS_PID_MAP_VALID_ATRGPA0, 0UL, 1);
2114 xts_bdf_pid = SETFIELD(NPU2_XTS_PID_MAP_VALID_ATRGPA1, xts_bdf_pid, 1);
2115
2116 /* Enables TLBIE/MMIOSD forwarding for this entry */
2117 xts_bdf_pid = SETFIELD(NPU2_XTS_PID_MAP_VALID_ATSD, xts_bdf_pid, 1);
2118 xts_bdf_pid = SETFIELD(NPU2_XTS_PID_MAP_LPARSHORT, xts_bdf_pid, id);
2119
2120 /* Set the relevant MSR bits */
2121 xts_bdf_pid = SETFIELD(NPU2_XTS_PID_MAP_MSR_DR, xts_bdf_pid,
2122 !!(msr & MSR_DR));
2123 xts_bdf_pid = SETFIELD(NPU2_XTS_PID_MAP_MSR_HV, xts_bdf_pid,
2124 !!(msr & MSR_HV));
2125 xts_bdf_pid = SETFIELD(NPU2_XTS_PID_MAP_MSR_PR, xts_bdf_pid,
2126 !!(msr & MSR_PR));
2127
2128 /* We don't support anything other than 64-bit so we can safely hardcode
2129 * it here */
2130 xts_bdf_pid = SETFIELD(NPU2_XTS_PID_MAP_MSR_SF, xts_bdf_pid, 1);
2131
2132 /*
2133 * Throw an error if the wildcard entry for this bdf is already set
2134 * with different msr bits.
2135 */
2136 old_xts_bdf_pid = npu2_read(p, NPU2_XTS_PID_MAP + id*0x20);
2137 if (old_xts_bdf_pid) {
2138 if (GETFIELD(NPU2_XTS_PID_MAP_MSR, old_xts_bdf_pid) !=
2139 GETFIELD(NPU2_XTS_PID_MAP_MSR, xts_bdf_pid)) {
2140 NPU2ERR(p, "%s: Unexpected MSR value\n", __func__);
2141 id = OPAL_PARAMETER;
2142 goto out;
2143 } else if (!p->ctx_ref[id]) {
2144 NPU2ERR(p, "%s: Unexpected mapping\n", __func__);
2145 id = OPAL_INTERNAL_ERROR;
2146 goto out;
2147 }
2148 }
2149
2150 /* Write the entry */
2151 if (!p->ctx_ref[id]) {
2152 NPU2DBG(p, "XTS_PID_MAP[%03d] = 0x%08llx\n", id, xts_bdf_pid);
2153 npu2_write(p, NPU2_XTS_PID_MAP + id*0x20, xts_bdf_pid);
2154
2155 if (!GETFIELD(NPU2_XTS_BDF_MAP_VALID, xts_bdf)) {
2156 xts_bdf = SETFIELD(NPU2_XTS_BDF_MAP_VALID, xts_bdf, 1);
2157 npu2_write(p, NPU2_XTS_BDF_MAP + id*8, xts_bdf);
2158 }
2159 }
2160 ++p->ctx_ref[id];
2161
2162 out:
2163 unlock(&p->lock);
2164 return id;
2165 }
2166 opal_call(OPAL_NPU_INIT_CONTEXT, opal_npu_init_context, 4);
2167
opal_npu_destroy_context(uint64_t phb_id,uint64_t pid __unused,uint64_t bdf)2168 static int opal_npu_destroy_context(uint64_t phb_id, uint64_t pid __unused,
2169 uint64_t bdf)
2170 {
2171 struct phb *phb = pci_get_phb(phb_id);
2172 struct npu2 *p;
2173 uint64_t xts_bdf;
2174 int rc = OPAL_PARAMETER, id;
2175
2176 if (!phb || phb->phb_type != phb_type_npu_v2)
2177 return OPAL_PARAMETER;
2178
2179 p = phb_to_npu2_nvlink(phb);
2180 lock(&p->lock);
2181
2182 /* Need to find lparshort for this bdf */
2183 xts_bdf = SETFIELD(NPU2_XTS_BDF_MAP_BDF, 0ul, bdf);
2184 if (npu_table_search(p, NPU2_XTS_BDF_MAP, 8, NPU2_XTS_BDF_MAP_SIZE,
2185 &xts_bdf, NPU2_XTS_BDF_MAP_BDF) < 0) {
2186 NPU2ERR(p, "LPARID not associated with any GPU\n");
2187 } else {
2188 /*
2189 * The bdf/pid table contains wildcard entries and MSR bits
2190 * which we need to clear between switching a device from
2191 * a host to a guest or vice versa.
2192 */
2193 id = GETFIELD(NPU2_XTS_BDF_MAP_LPARSHORT, xts_bdf);
2194 if (p->ctx_ref[id]) {
2195 --p->ctx_ref[id];
2196 if (!p->ctx_ref[id]) {
2197 NPU2DBG(p, "XTS_PID_MAP[%03d] = 0 (destroy)\n",
2198 id);
2199 npu2_write(p, NPU2_XTS_PID_MAP + id*0x20, 0);
2200 }
2201 rc = OPAL_SUCCESS;
2202 }
2203 }
2204 unlock(&p->lock);
2205 return rc;
2206 }
2207 opal_call(OPAL_NPU_DESTROY_CONTEXT, opal_npu_destroy_context, 3);
2208
2209 /*
2210 * Map the given virtual bdf to lparid with given lpcr.
2211 */
opal_npu_map_lpar(uint64_t phb_id,uint64_t bdf,uint64_t lparid,uint64_t lpcr)2212 static int opal_npu_map_lpar(uint64_t phb_id, uint64_t bdf, uint64_t lparid,
2213 uint64_t lpcr)
2214 {
2215 struct phb *phb = pci_get_phb(phb_id);
2216 struct npu2 *p;
2217 struct npu2_dev *ndev = NULL;
2218 uint64_t xts_bdf_lpar, atsd_lpar, rc = OPAL_SUCCESS;
2219 int i;
2220 int id;
2221 static uint64_t atsd_lpar_regs[] = {
2222 NPU2_XTS_MMIO_ATSD0_LPARID, NPU2_XTS_MMIO_ATSD1_LPARID,
2223 NPU2_XTS_MMIO_ATSD2_LPARID, NPU2_XTS_MMIO_ATSD3_LPARID,
2224 NPU2_XTS_MMIO_ATSD4_LPARID, NPU2_XTS_MMIO_ATSD5_LPARID,
2225 NPU2_XTS_MMIO_ATSD6_LPARID, NPU2_XTS_MMIO_ATSD7_LPARID
2226 };
2227
2228 if (!phb || phb->phb_type != phb_type_npu_v2)
2229 return OPAL_PARAMETER;
2230
2231 if (lpcr)
2232 /* The LPCR bits are only required for hash based ATS,
2233 * which we don't currently support but may need to in
2234 * future. */
2235 return OPAL_UNSUPPORTED;
2236
2237 p = phb_to_npu2_nvlink(phb);
2238 lock(&p->lock);
2239
2240 /* Find any existing entries and update them */
2241 xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_BDF, 0L, bdf);
2242 id = npu_table_search(p, NPU2_XTS_BDF_MAP, 8, NPU2_XTS_BDF_MAP_SIZE,
2243 &xts_bdf_lpar, NPU2_XTS_BDF_MAP_BDF);
2244 if (id < 0) {
2245 /* No existing mapping found, find space for a new one */
2246 xts_bdf_lpar = 0;
2247 id = npu_table_search(p, NPU2_XTS_BDF_MAP, 8, NPU2_XTS_BDF_MAP_SIZE,
2248 &xts_bdf_lpar, -1UL);
2249 }
2250
2251 if (id < 0) {
2252 /* Unable to find a free mapping */
2253 NPU2ERR(p, "No free XTS_BDF[] entry\n");
2254 rc = OPAL_RESOURCE;
2255 goto out;
2256 }
2257
2258 xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_UNFILT, 0UL, 1);
2259 xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_BDF, xts_bdf_lpar, bdf);
2260
2261 /* We only support radix for the moment */
2262 xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_XLAT, xts_bdf_lpar, 0x3);
2263 xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_LPARID, xts_bdf_lpar, lparid);
2264 xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_LPARSHORT, xts_bdf_lpar, id);
2265
2266 /* Need to find an NVLink to send the ATSDs for this device over */
2267 for (i = 0; i < p->total_devices; i++) {
2268 if (p->devices[i].nvlink.gpu_bdfn == bdf) {
2269 ndev = &p->devices[i];
2270 break;
2271 }
2272 }
2273
2274 if (!ndev) {
2275 NPU2ERR(p, "Unable to find nvlink for bdf %llx\n", bdf);
2276 rc = OPAL_PARAMETER;
2277 goto out;
2278 }
2279
2280 /*
2281 * We need to allocate an ATSD per NVLink bridge if possible,
2282 * use the ibm,npu-link-index property for that.
2283 */
2284 atsd_lpar = SETFIELD(NPU2_XTS_MMIO_ATSD_LPARID, 0, lparid);
2285 if (!lparid)
2286 atsd_lpar = SETFIELD(NPU2_XTS_MMIO_ATSD_MSR_HV, atsd_lpar, 1);
2287
2288 if (ndev->link_index < ARRAY_SIZE(atsd_lpar_regs))
2289 npu2_write(p, atsd_lpar_regs[ndev->link_index], atsd_lpar);
2290 else
2291 NPU2ERR(p, "Unable to assign ATSD for link index %u\n",
2292 ndev->link_index);
2293
2294 xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_STACK, xts_bdf_lpar,
2295 0x4 >> (ndev->brick_index / 2));
2296 xts_bdf_lpar = SETFIELD(NPU2_XTS_BDF_MAP_BRICK, xts_bdf_lpar,
2297 (ndev->brick_index % 2));
2298
2299 NPU2DBG(p, "XTS_BDF_MAP[%03d] = 0x%08llx\n", id, xts_bdf_lpar);
2300 npu2_write(p, NPU2_XTS_BDF_MAP + id*8, xts_bdf_lpar);
2301
2302 /* Reset wildcard in the PID map and the refcounter */
2303 if (npu2_read(p, NPU2_XTS_PID_MAP + id*0x20) || p->ctx_ref[id]) {
2304 prlog(PR_INFO, "Resetting PID MAP for LPID %lld\n", lparid);
2305 p->ctx_ref[id] = 0;
2306 npu2_write(p, NPU2_XTS_PID_MAP + id*0x20, 0);
2307 }
2308
2309 out:
2310 unlock(&p->lock);
2311 return rc;
2312 }
2313 opal_call(OPAL_NPU_MAP_LPAR, opal_npu_map_lpar, 4);
2314
npu2_relaxed_ordering_source_grpchp(uint32_t gcid)2315 static inline uint32_t npu2_relaxed_ordering_source_grpchp(uint32_t gcid)
2316 {
2317 if (gcid & ~0x1b)
2318 return OPAL_PARAMETER;
2319
2320 /* Repack 0bGGGGCCC to 0bGGCC */
2321 return ((gcid & 0x18) >> 1) | (gcid & 0x3);
2322 }
2323
npu2_relaxed_ordering_cfg_read(struct npu2_dev * ndev,int n)2324 static uint64_t npu2_relaxed_ordering_cfg_read(struct npu2_dev *ndev, int n)
2325 {
2326 uint64_t reg = NPU2_SM_REG_OFFSET(ndev, 0, NPU2_RELAXED_ORDERING_CFG(n));
2327
2328 return npu2_read(ndev->npu, reg);
2329 }
2330
npu2_relaxed_ordering_cfg_write(struct npu2_dev * ndev,int n,uint64_t val)2331 static void npu2_relaxed_ordering_cfg_write(struct npu2_dev *ndev, int n,
2332 uint64_t val)
2333 {
2334 uint64_t reg;
2335 int sm;
2336
2337 /* Set every register on our stack */
2338 for (sm = NPU2_BLOCK_SM_0; sm <= NPU2_BLOCK_SM_3; sm++) {
2339 reg = NPU2_SM_REG_OFFSET(ndev, sm, NPU2_RELAXED_ORDERING_CFG(n));
2340 npu2_write(ndev->npu, reg, val);
2341 }
2342 }
2343
2344 /*
2345 * Parse the value of a relaxed ordering config register. Returns SOURCE0 or
2346 * SOURCE1 register mask if relaxed ordering is set for the given chip/pec.
2347 * Returns 0 if unset.
2348 */
npu2_relaxed_ordering_cfg_enabled(uint64_t val,uint32_t gcid,int pec)2349 static uint64_t npu2_relaxed_ordering_cfg_enabled(uint64_t val, uint32_t gcid,
2350 int pec)
2351 {
2352 uint32_t src, grpchp;
2353 uint64_t mask;
2354 int i;
2355
2356 for (i = 0; i < 2; i++) {
2357 mask = NPU2_RELAXED_ORDERING_SOURCE(i);
2358 src = GETFIELD(mask, val);
2359
2360 if (!GETFIELD(NPU2_RELAXED_ORDERING_SOURCE_ENA, src))
2361 continue;
2362
2363 if (GETFIELD(NPU2_RELAXED_ORDERING_SOURCE_PECSEL, src) != pec)
2364 continue;
2365
2366 grpchp = GETFIELD(NPU2_RELAXED_ORDERING_SOURCE_GRPCHP, src);
2367 if (grpchp == npu2_relaxed_ordering_source_grpchp(gcid))
2368 return mask;
2369
2370 if (grpchp == 0xf) /* match all */
2371 return mask;
2372 }
2373
2374 return 0;
2375 }
2376
npu2_enable_relaxed_ordering(struct npu2_dev * ndev,uint32_t gcid,int pec)2377 static int npu2_enable_relaxed_ordering(struct npu2_dev *ndev, uint32_t gcid,
2378 int pec)
2379 {
2380 uint64_t val, mask;
2381 uint32_t src;
2382 int rc = OPAL_RESOURCE;
2383 int i;
2384
2385 NPU2DEVINF(ndev, "Enabling relaxed ordering for PEC %d on chip %d\n", pec, gcid);
2386 lock(&ndev->npu->lock);
2387
2388 for (i = 0; i < 2; i++) {
2389 val = npu2_relaxed_ordering_cfg_read(ndev, i);
2390 if (!npu2_relaxed_ordering_cfg_enabled(val, gcid, pec))
2391 continue;
2392
2393 /* Already enabled */
2394 rc = OPAL_SUCCESS;
2395 goto out;
2396 }
2397
2398 src = NPU2_RELAXED_ORDERING_SOURCE_WRENA |
2399 NPU2_RELAXED_ORDERING_SOURCE_RDENA;
2400 src = SETFIELD(NPU2_RELAXED_ORDERING_SOURCE_PECSEL, src, pec);
2401 src = SETFIELD(NPU2_RELAXED_ORDERING_SOURCE_GRPCHP, src,
2402 npu2_relaxed_ordering_source_grpchp(gcid));
2403 src = SETFIELD(NPU2_RELAXED_ORDERING_SOURCE_WRMIN, src, 0);
2404 src = SETFIELD(NPU2_RELAXED_ORDERING_SOURCE_WRMAX, src, 23);
2405 src = SETFIELD(NPU2_RELAXED_ORDERING_SOURCE_RDMIN, src, 0);
2406 src = SETFIELD(NPU2_RELAXED_ORDERING_SOURCE_RDMAX, src, 47);
2407
2408 /* Find somewhere to write this config */
2409 for (i = 0; i < 2; i++) {
2410 val = npu2_relaxed_ordering_cfg_read(ndev, i);
2411
2412 if (!GETFIELD(NPU2_RELAXED_ORDERING_SOURCE_ENA << 32, val))
2413 mask = NPU2_RELAXED_ORDERING_SOURCE(0);
2414 else if (!GETFIELD(NPU2_RELAXED_ORDERING_SOURCE_ENA, val))
2415 mask = NPU2_RELAXED_ORDERING_SOURCE(1);
2416 else
2417 continue;
2418
2419 val = SETFIELD(mask, val, src);
2420 npu2_relaxed_ordering_cfg_write(ndev, i, val);
2421
2422 rc = OPAL_SUCCESS;
2423 break;
2424 }
2425
2426 out:
2427 unlock(&ndev->npu->lock);
2428 return rc;
2429 }
2430
npu2_disable_relaxed_ordering(struct npu2_dev * ndev,uint32_t gcid,int pec)2431 static void npu2_disable_relaxed_ordering(struct npu2_dev *ndev, uint32_t gcid,
2432 int pec)
2433 {
2434 uint64_t val, mask;
2435 int i;
2436
2437 NPU2DEVINF(ndev, "Disabling relaxed ordering for PEC %d on chip %d\n", pec, gcid);
2438 lock(&ndev->npu->lock);
2439
2440 for (i = 0; i < 2; i++) {
2441 val = npu2_relaxed_ordering_cfg_read(ndev, i);
2442
2443 mask = npu2_relaxed_ordering_cfg_enabled(val, gcid, pec);
2444 if (!mask)
2445 continue;
2446
2447 val = SETFIELD(mask, val, 0);
2448 npu2_relaxed_ordering_cfg_write(ndev, i, val);
2449 }
2450
2451 unlock(&ndev->npu->lock);
2452 }
2453
2454 /*
2455 * Enable or disable relaxed ordering on all nvlinks for a given PEC. May leave
2456 * relaxed ordering partially enabled if there are insufficient HW resources to
2457 * enable it on all links.
2458 */
npu2_set_relaxed_ordering(uint32_t gcid,int pec,bool enable)2459 static int npu2_set_relaxed_ordering(uint32_t gcid, int pec, bool enable)
2460 {
2461 int rc = OPAL_SUCCESS;
2462 struct phb *phb;
2463 struct npu2 *npu;
2464 struct npu2_dev *ndev;
2465
2466 for_each_phb(phb) {
2467 if (phb->phb_type != phb_type_npu_v2)
2468 continue;
2469
2470 npu = phb_to_npu2_nvlink(phb);
2471 for (int i = 0; i < npu->total_devices; i++) {
2472 ndev = &npu->devices[i];
2473 if (enable)
2474 rc = npu2_enable_relaxed_ordering(ndev, gcid, pec);
2475 else
2476 npu2_disable_relaxed_ordering(ndev, gcid, pec);
2477
2478 if (rc != OPAL_SUCCESS) {
2479 NPU2DEVINF(ndev, "Insufficient resources to activate relaxed ordering mode\n");
2480 return OPAL_RESOURCE;
2481 }
2482 }
2483 }
2484
2485 return OPAL_SUCCESS;
2486 }
2487
npu2_check_relaxed_ordering(struct phb * phb __unused,struct pci_device * pd,void * enable)2488 static int npu2_check_relaxed_ordering(struct phb *phb __unused,
2489 struct pci_device *pd, void *enable)
2490 {
2491 /*
2492 * IBM PCIe bridge devices (ie. the root ports) can always allow relaxed
2493 * ordering
2494 */
2495 if (pd->vdid == 0x04c11014)
2496 pd->allow_relaxed_ordering = true;
2497
2498 PCIDBG(phb, pd->bdfn, "Checking relaxed ordering config\n");
2499 if (pd->allow_relaxed_ordering)
2500 return 0;
2501
2502 PCIDBG(phb, pd->bdfn, "Relaxed ordering not allowed\n");
2503 *(bool *) enable = false;
2504
2505 return 1;
2506 }
2507
opal_npu_set_relaxed_order(uint64_t phb_id,uint16_t bdfn,bool request_enabled)2508 static int64_t opal_npu_set_relaxed_order(uint64_t phb_id, uint16_t bdfn,
2509 bool request_enabled)
2510 {
2511 struct phb *phb = pci_get_phb(phb_id);
2512 struct phb4 *phb4;
2513 uint32_t chip_id, pec;
2514 struct pci_device *pd;
2515 bool enable = true;
2516
2517 if (!phb || phb->phb_type != phb_type_pcie_v4)
2518 return OPAL_PARAMETER;
2519
2520 phb4 = phb_to_phb4(phb);
2521 pec = phb4->pec;
2522 chip_id = phb4->chip_id;
2523
2524 if (npu2_relaxed_ordering_source_grpchp(chip_id) == OPAL_PARAMETER)
2525 return OPAL_PARAMETER;
2526
2527 pd = pci_find_dev(phb, bdfn);
2528 if (!pd)
2529 return OPAL_PARAMETER;
2530
2531 /*
2532 * Not changing state, so no need to rescan PHB devices to determine if
2533 * we need to enable/disable it
2534 */
2535 if (pd->allow_relaxed_ordering == request_enabled)
2536 return OPAL_SUCCESS;
2537
2538 pd->allow_relaxed_ordering = request_enabled;
2539
2540 /*
2541 * Walk all devices on this PHB to ensure they all support relaxed
2542 * ordering
2543 */
2544 pci_walk_dev(phb, NULL, npu2_check_relaxed_ordering, &enable);
2545
2546 if (request_enabled && !enable) {
2547 /*
2548 * Not all devices on this PHB support relaxed-ordering
2549 * mode so we can't enable it as requested
2550 */
2551 prlog(PR_INFO, "Cannot set relaxed ordering for PEC %d on chip %d\n",
2552 pec, chip_id);
2553 return OPAL_CONSTRAINED;
2554 }
2555
2556 if (npu2_set_relaxed_ordering(chip_id, pec, request_enabled) != OPAL_SUCCESS) {
2557 npu2_set_relaxed_ordering(chip_id, pec, false);
2558 return OPAL_RESOURCE;
2559 }
2560
2561 phb4->ro_state = request_enabled;
2562 return OPAL_SUCCESS;
2563 }
2564 opal_call(OPAL_NPU_SET_RELAXED_ORDER, opal_npu_set_relaxed_order, 3);
2565
opal_npu_get_relaxed_order(uint64_t phb_id,uint16_t bdfn __unused)2566 static int64_t opal_npu_get_relaxed_order(uint64_t phb_id,
2567 uint16_t bdfn __unused)
2568 {
2569 struct phb *phb = pci_get_phb(phb_id);
2570 struct phb4 *phb4;
2571
2572 if (!phb || phb->phb_type != phb_type_pcie_v4)
2573 return OPAL_PARAMETER;
2574
2575 phb4 = phb_to_phb4(phb);
2576 return phb4->ro_state;
2577 }
2578 opal_call(OPAL_NPU_GET_RELAXED_ORDER, opal_npu_get_relaxed_order, 2);
2579