1 /* Copyright 2013-2015 IBM Corp.
2 *
3 * Licensed under the Apache License, Version 2.0 (the "License");
4 * you may not use this file except in compliance with the License.
5 * You may obtain a copy of the License at
6 *
7 * http://www.apache.org/licenses/LICENSE-2.0
8 *
9 * Unless required by applicable law or agreed to in writing, software
10 * distributed under the License is distributed on an "AS IS" BASIS,
11 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12 * implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16 #include <skiboot.h>
17 #include <io.h>
18 #include <timebase.h>
19 #include <pci.h>
20 #include <pci-cfg.h>
21 #include <pci-virt.h>
22 #include <pci-slot.h>
23 #include <interrupts.h>
24 #include <opal.h>
25 #include <opal-api.h>
26 #include <cpu.h>
27 #include <device.h>
28 #include <ccan/str/str.h>
29 #include <ccan/array_size/array_size.h>
30 #include <ccan/build_assert/build_assert.h>
31 #include <affinity.h>
32 #include <npu-regs.h>
33 #include <npu.h>
34 #include <xscom.h>
35 #include <string.h>
36
37 /*
38 * Terminology:
39 *
40 * Brick - A group of either 8 TX or 8 RX lanes
41 * Link - A group of 8 TX and 8 RX lanes
42 *
43 * Each link is represented in system software as an emulated PCI
44 * device. Garrison has two chips each with 4 links, therefore there
45 * are 8 emulated PCI devices in total.
46 *
47 * +----------------------------------------------------------------+
48 * | PBCQ3 (SCOM Base Address 0x2012c00) |
49 * | PHB3 (SCOM Base Address 0x9012c00) |
50 * +----------------------------------------------------------------+
51 * |||||||| ||||||||
52 * |||||||| ||||||||
53 * |||||||| ||||||||
54 * |||||||| ||||||||
55 * +----------------------------------------------------------------+
56 * | PCIe x8 |
57 * +----------------------------------------------------------------+
58 * | GPU0 |
59 * +--------------------------------+-------------------------------+
60 * | NV Link 1 | NV Link 0 |
61 * +---------------+----------------+---------------+---------------+
62 * | RX | TX | RX | TX |
63 * +---------------+----------------+---------------+---------------+
64 * |||||||| |||||||| |||||||| ||||||||
65 * |||||||| |||||||| |||||||| ||||||||
66 * |||||||| |||||||| |||||||| ||||||||
67 * |||||||| |||||||| |||||||| ||||||||
68 * +---------------+----------------+---------------+---------------+
69 * | TX | RX | TX | RX |
70 * +---------------+----------------+---------------+---------------+
71 * | Lanes [0:7] PHY 0 Lanes [8:15] |
72 * | SCOM Base Address 0x8000080008010c3f |
73 * +--------------------------------+-------------------------------+
74 * | Link 0 NDL/NTL | Link 1 NTL/NDL |
75 * | SCOM Base Address 0x8013c00 | SCOM Base Address 0x8013c40 |
76 * +--------------------------------+-------------------------------+
77 * | |
78 * | Address Translation/AT (shared for all links) |
79 * | SCOM Base Address 0x8013d80 |
80 * | |
81 * +--------------------------------+-------------------------------+
82 * | Link 3 NDL/NTL | Link 4 NTL/NDL |
83 * | SCOM Base Address 0x8013d00 | SCOM Base Address 0x8013d40 |
84 * +--------------------------------+-------------------------------+
85 * | Lanes [8:15] PHY 1 Lanes [0:7] |
86 * | SCOM Base Address 0x8000080008010c7f |
87 * +---------------+----------------+---------------+---------------+
88 * | TX | RX | TX | RX |
89 * +---------------+----------------+---------------+---------------+
90 * |||||||| |||||||| |||||||| ||||||||
91 * |||||||| |||||||| |||||||| ||||||||
92 * |||||||| |||||||| |||||||| ||||||||
93 * |||||||| |||||||| |||||||| ||||||||
94 * +---------------+----------------+---------------+---------------+
95 * | RX | TX | RX | TX |
96 * +---------------+----------------+---------------+---------------+
97 * | NV Link 2 | NV Link 3 |
98 * +--------------------------------+-------------------------------+
99 * | GPU1 |
100 * +----------------------------------------------------------------+
101 * | PCIe x8 |
102 * +----------------------------------------------------------------+
103 * |||||||| ||||||||
104 * |||||||| ||||||||
105 * |||||||| ||||||||
106 * |||||||| ||||||||
107 * +----------------------------------------------------------------+
108 * | PHB2 (SCOM Base Address 0x9012800) |
109 * | PBCQ2 (SCOM Base Address 0x2012800) |
110 * +----------------------------------------------------------------+
111 *
112 */
113
114 static struct npu_dev_cap *npu_dev_find_capability(struct npu_dev *dev,
115 uint16_t id);
116
117 #define OPAL_NPU_VERSION 0x02
118
119 #define PCIE_CAP_START 0x40
120 #define PCIE_CAP_END 0x80
121 #define VENDOR_CAP_START 0x80
122 #define VENDOR_CAP_END 0x90
123
124 #define VENDOR_CAP_PCI_DEV_OFFSET 0x0d
125
126 /* Returns the scom base for the given link index */
npu_link_scom_base(struct dt_node * dn,uint32_t scom_base,int index)127 static uint64_t npu_link_scom_base(struct dt_node *dn, uint32_t scom_base,
128 int index)
129 {
130 struct dt_node *link;
131 uint32_t link_index;
132 char namebuf[32];
133
134 snprintf(namebuf, sizeof(namebuf), "link@%x", index);
135 link = dt_find_by_name(dn, namebuf);
136 assert(link);
137 link_index = dt_prop_get_u32(link, "ibm,npu-link-index");
138 return scom_base + (link_index * NPU_LINK_SIZE);
139 }
140
get_bar_size(uint64_t bar)141 static uint64_t get_bar_size(uint64_t bar)
142 {
143 return (1 << GETFIELD(NX_MMIO_BAR_SIZE, bar)) * 0x10000;
144 }
145
146 /* Update the changes of the device BAR to link BARs */
npu_dev_bar_update(uint32_t gcid,struct npu_dev_bar * bar,bool enable)147 static void npu_dev_bar_update(uint32_t gcid, struct npu_dev_bar *bar,
148 bool enable)
149 {
150 uint64_t val;
151
152 if (!bar->xscom)
153 return;
154
155 val = bar->base;
156 val = SETFIELD(NX_MMIO_BAR_SIZE, val, ilog2(bar->size / 0x10000));
157 if (enable)
158 val |= NX_MMIO_BAR_ENABLE;
159 xscom_write(gcid, bar->xscom, val);
160 }
161
162 /* Trap for PCI command (0x4) to enable or disable device's BARs */
npu_dev_cfg_write_cmd(void * dev,struct pci_cfg_reg_filter * pcrf __unused,uint32_t offset,uint32_t size,uint32_t * data,bool write)163 static int64_t npu_dev_cfg_write_cmd(void *dev,
164 struct pci_cfg_reg_filter *pcrf __unused,
165 uint32_t offset, uint32_t size,
166 uint32_t *data, bool write)
167 {
168 struct pci_virt_device *pvd = dev;
169 struct npu_dev *ndev = pvd->data;
170 bool enable;
171
172 if (!write)
173 return OPAL_PARTIAL;
174
175 if (offset != PCI_CFG_CMD)
176 return OPAL_PARAMETER;
177 if (size != 1 && size != 2 && size != 4)
178 return OPAL_PARAMETER;
179
180 /* Update device BARs and link BARs will be syncrhonized
181 * with hardware automatically.
182 */
183 enable = !!(*data & PCI_CFG_CMD_MEM_EN);
184 npu_dev_bar_update(ndev->npu->chip_id, &ndev->bar, enable);
185
186 /* Normal path to update PCI config buffer */
187 return OPAL_PARTIAL;
188 }
189
190 /*
191 * Trap for memory BARs: 0xFF's should be written to BAR register
192 * prior to getting its size.
193 */
npu_dev_cfg_bar_read(struct npu_dev * dev __unused,struct pci_cfg_reg_filter * pcrf,uint32_t offset,uint32_t size,uint32_t * data)194 static int64_t npu_dev_cfg_bar_read(struct npu_dev *dev __unused,
195 struct pci_cfg_reg_filter *pcrf,
196 uint32_t offset, uint32_t size,
197 uint32_t *data)
198 {
199 struct npu_dev_bar *bar = (struct npu_dev_bar *)(pcrf->data);
200
201 /* Revert to normal path if we weren't trapped for BAR size */
202 if (!bar->trapped)
203 return OPAL_PARTIAL;
204
205 if (offset != pcrf->start &&
206 offset != pcrf->start + 4)
207 return OPAL_PARAMETER;
208 if (size != 4)
209 return OPAL_PARAMETER;
210
211 bar->trapped = false;
212 *data = bar->bar_sz;
213 return OPAL_SUCCESS;
214 }
215
npu_dev_cfg_bar_write(struct npu_dev * dev,struct pci_cfg_reg_filter * pcrf,uint32_t offset,uint32_t size,uint32_t data)216 static int64_t npu_dev_cfg_bar_write(struct npu_dev *dev,
217 struct pci_cfg_reg_filter *pcrf,
218 uint32_t offset, uint32_t size,
219 uint32_t data)
220 {
221 struct pci_virt_device *pvd = dev->pvd;
222 struct npu_dev_bar *bar = (struct npu_dev_bar *)(pcrf->data);
223 uint32_t pci_cmd;
224
225 if (offset != pcrf->start &&
226 offset != pcrf->start + 4)
227 return OPAL_PARAMETER;
228 if (size != 4)
229 return OPAL_PARAMETER;
230
231 /* Return BAR size on next read */
232 if (data == 0xffffffff) {
233 bar->trapped = true;
234 if (offset == pcrf->start)
235 bar->bar_sz = (bar->size & 0xffffffff);
236 else
237 bar->bar_sz = (bar->size >> 32);
238
239 return OPAL_SUCCESS;
240 }
241
242 /* Update BAR base address */
243 if (offset == pcrf->start) {
244 bar->base &= 0xffffffff00000000UL;
245 bar->base |= (data & 0xfffffff0);
246 } else {
247 bar->base &= 0x00000000ffffffffUL;
248 bar->base |= ((uint64_t)data << 32);
249
250 PCI_VIRT_CFG_NORMAL_RD(pvd, PCI_CFG_CMD, 4, &pci_cmd);
251 npu_dev_bar_update(dev->npu->chip_id, bar,
252 !!(pci_cmd & PCI_CFG_CMD_MEM_EN));
253 }
254
255 /* We still depend on the normal path to update the
256 * cached config buffer.
257 */
258 return OPAL_PARAMETER;
259 }
260
npu_dev_cfg_bar(void * dev,struct pci_cfg_reg_filter * pcrf,uint32_t offset,uint32_t len,uint32_t * data,bool write)261 static int64_t npu_dev_cfg_bar(void *dev, struct pci_cfg_reg_filter *pcrf,
262 uint32_t offset, uint32_t len, uint32_t *data,
263 bool write)
264 {
265 struct pci_virt_device *pvd = dev;
266 struct npu_dev *ndev = pvd->data;
267
268 if (write)
269 return npu_dev_cfg_bar_write(ndev, pcrf, offset, len, *data);
270
271 return npu_dev_cfg_bar_read(ndev, pcrf, offset, len, data);
272 }
273
npu_dev_cfg_exp_devcap(void * dev,struct pci_cfg_reg_filter * pcrf __unused,uint32_t offset,uint32_t size,uint32_t * data,bool write)274 static int64_t npu_dev_cfg_exp_devcap(void *dev,
275 struct pci_cfg_reg_filter *pcrf __unused,
276 uint32_t offset, uint32_t size,
277 uint32_t *data, bool write)
278 {
279 struct pci_virt_device *pvd = dev;
280 struct npu_dev *ndev = pvd->data;
281
282 assert(write);
283
284 if ((size != 2) || (offset & 1)) {
285 /* Short config writes are not supported */
286 prlog(PR_ERR, "NPU%d: Unsupported write to pcie control register\n",
287 ndev->phb->opal_id);
288 return OPAL_PARAMETER;
289 }
290
291 if (*data & PCICAP_EXP_DEVCTL_FUNC_RESET)
292 npu_dev_procedure_reset(ndev);
293
294 return OPAL_PARTIAL;
295 }
296
bdfn_to_npu_dev(struct npu * p,uint32_t bdfn)297 static struct npu_dev *bdfn_to_npu_dev(struct npu *p, uint32_t bdfn)
298 {
299 struct pci_virt_device *pvd;
300
301 /* Sanity check */
302 if (bdfn & ~0xff)
303 return NULL;
304
305 pvd = pci_virt_find_device(&p->phb, bdfn);
306 if (pvd)
307 return pvd->data;
308
309 return NULL;
310 }
311
312 #define NPU_CFG_READ(size, type) \
313 static int64_t npu_cfg_read##size(struct phb *phb, uint32_t bdfn, \
314 uint32_t offset, type *data) \
315 { \
316 uint32_t val; \
317 int64_t ret; \
318 \
319 ret = pci_virt_cfg_read(phb, bdfn, offset, sizeof(*data), &val); \
320 *data = (type)val; \
321 return ret; \
322 }
323 #define NPU_CFG_WRITE(size, type) \
324 static int64_t npu_cfg_write##size(struct phb *phb, uint32_t bdfn, \
325 uint32_t offset, type data) \
326 { \
327 uint32_t val = data; \
328 \
329 return pci_virt_cfg_write(phb, bdfn, offset, sizeof(data), val); \
330 }
331
332 NPU_CFG_READ(8, u8);
333 NPU_CFG_READ(16, u16);
334 NPU_CFG_READ(32, u32);
335 NPU_CFG_WRITE(8, u8);
336 NPU_CFG_WRITE(16, u16);
337 NPU_CFG_WRITE(32, u32);
338
__npu_dev_bind_pci_dev(struct phb * phb __unused,struct pci_device * pd,void * data)339 static int __npu_dev_bind_pci_dev(struct phb *phb __unused,
340 struct pci_device *pd,
341 void *data)
342 {
343 struct npu_dev *dev = data;
344 struct dt_node *pci_dt_node;
345 char *pcislot;
346
347 /* Ignore non-nvidia PCI devices */
348 if ((pd->vdid & 0xffff) != 0x10de)
349 return 0;
350
351 /* Find the PCI device's slot location */
352 for (pci_dt_node = pd->dn;
353 pci_dt_node && !dt_find_property(pci_dt_node, "ibm,slot-label");
354 pci_dt_node = pci_dt_node->parent);
355
356 if (!pci_dt_node)
357 return 0;
358
359 pcislot = (char *)dt_prop_get(pci_dt_node, "ibm,slot-label");
360
361 prlog(PR_DEBUG, "NPU: comparing GPU %s and NPU %s\n",
362 pcislot, dev->slot_label);
363
364 if (streq(pcislot, dev->slot_label))
365 return 1;
366
367 return 0;
368 }
369
npu_dev_bind_pci_dev(struct npu_dev * dev)370 static void npu_dev_bind_pci_dev(struct npu_dev *dev)
371 {
372 struct phb *phb;
373 uint32_t i;
374
375 if (dev->pd)
376 return;
377
378 for (i = 0; i < 64; i++) {
379 if (dev->npu->phb.opal_id == i)
380 continue;
381
382 phb = pci_get_phb(i);
383 if (!phb)
384 continue;
385
386 dev->pd = pci_walk_dev(phb, NULL, __npu_dev_bind_pci_dev, dev);
387 if (dev->pd) {
388 dev->phb = phb;
389 /* Found the device, set the bit in config space */
390 PCI_VIRT_CFG_INIT_RO(dev->pvd, VENDOR_CAP_START +
391 VENDOR_CAP_PCI_DEV_OFFSET, 1, 0x01);
392 return;
393 }
394 }
395
396 prlog(PR_INFO, "%s: No PCI device for NPU device %04x:00:%02x.0 to bind to. If you expect a GPU to be there, this is a problem.\n",
397 __func__, dev->npu->phb.opal_id, dev->index);
398 }
399
400 static struct lock pci_npu_phandle_lock = LOCK_UNLOCKED;
401
402 /* Appends an NPU phandle to the given PCI device node ibm,npu
403 * property */
npu_append_pci_phandle(struct dt_node * dn,u32 phandle)404 static void npu_append_pci_phandle(struct dt_node *dn, u32 phandle)
405 {
406 uint32_t *npu_phandles;
407 struct dt_property *pci_npu_phandle_prop;
408 size_t prop_len;
409
410 /* Use a lock to make sure no one else has a reference to an
411 * ibm,npu property (this assumes this is the only function
412 * that holds a reference to it). */
413 lock(&pci_npu_phandle_lock);
414
415 /* This function shouldn't be called unless ibm,npu exists */
416 pci_npu_phandle_prop = (struct dt_property *)
417 dt_require_property(dn, "ibm,npu", -1);
418
419 /* Need to append to the properties */
420 prop_len = pci_npu_phandle_prop->len;
421 prop_len += sizeof(*npu_phandles);
422 dt_resize_property(&pci_npu_phandle_prop, prop_len);
423 pci_npu_phandle_prop->len = prop_len;
424
425 npu_phandles = (uint32_t *) pci_npu_phandle_prop->prop;
426 npu_phandles[prop_len/sizeof(*npu_phandles) - 1] = phandle;
427 unlock(&pci_npu_phandle_lock);
428 }
429
npu_dn_fixup(struct phb * phb,struct pci_device * pd,void * data __unused)430 static int npu_dn_fixup(struct phb *phb,
431 struct pci_device *pd,
432 void *data __unused)
433 {
434 struct npu *p = phb_to_npu(phb);
435 struct npu_dev *dev;
436
437 dev = bdfn_to_npu_dev(p, pd->bdfn);
438 assert(dev);
439
440 if (dev->phb || dev->pd)
441 return 0;
442
443 /* NPU devices require a slot location to associate with GPUs */
444 dev->slot_label = dt_prop_get(pd->dn, "ibm,slot-label");
445
446 /* Bind the emulated PCI device with the real one, which can't
447 * be done until the PCI devices are populated. Once the real
448 * PCI device is identified, we also need fix the device-tree
449 * for it
450 */
451 npu_dev_bind_pci_dev(dev);
452 if (dev->phb && dev->pd && dev->pd->dn) {
453 if (dt_find_property(dev->pd->dn, "ibm,npu"))
454 npu_append_pci_phandle(dev->pd->dn, pd->dn->phandle);
455 else
456 dt_add_property_cells(dev->pd->dn, "ibm,npu", pd->dn->phandle);
457
458 dt_add_property_cells(pd->dn, "ibm,gpu", dev->pd->dn->phandle);
459 }
460
461 return 0;
462 }
463
npu_phb_final_fixup(struct phb * phb)464 static void npu_phb_final_fixup(struct phb *phb)
465 {
466 pci_walk_dev(phb, NULL, npu_dn_fixup, NULL);
467 }
468
npu_ioda_init(struct npu * p)469 static void npu_ioda_init(struct npu *p)
470 {
471 uint64_t *data64;
472 uint32_t i;
473
474 /* LXIVT - Disable all LSIs */
475 for (i = 0; i < ARRAY_SIZE(p->lxive_cache); i++) {
476 data64 = &p->lxive_cache[i];
477 *data64 = SETFIELD(NPU_IODA_LXIVT_PRIORITY, 0ul, 0xff);
478 *data64 = SETFIELD(NPU_IODA_LXIVT_SERVER, *data64, 0);
479 }
480
481 /* PCT - Reset to reserved PE# */
482 for (i = 0; i < ARRAY_SIZE(p->pce_cache); i++) {
483 data64 = &p->pce_cache[i];
484 *data64 = SETFIELD(NPU_IODA_PCT_PE, 0ul, 0ul);
485 *data64 |= NPU_IODA_PCT_LINK_ENABLED;
486 }
487
488 /* Clear TVT */
489 memset(p->tve_cache, 0, sizeof(p->tve_cache));
490 }
491
npu_ioda_reset(struct phb * phb,bool purge)492 static int64_t npu_ioda_reset(struct phb *phb, bool purge)
493 {
494 struct npu *p = phb_to_npu(phb);
495 uint32_t i;
496
497 if (purge) {
498 NPUDBG(p, "Purging all IODA tables...\n");
499 npu_ioda_init(p);
500 }
501
502 /* LIST */
503 npu_ioda_sel(p, NPU_IODA_TBL_LIST, 0, true);
504 for (i = 0; i < 8; i++)
505 out_be64(p->at_regs + NPU_IODA_DATA0, 0x1);
506
507 /* LIXVT */
508 npu_ioda_sel(p, NPU_IODA_TBL_LXIVT, 0, true);
509 for (i = 0; i < ARRAY_SIZE(p->lxive_cache); i++)
510 out_be64(p->at_regs + NPU_IODA_DATA0, p->lxive_cache[i]);
511
512 /* PCT */
513 npu_ioda_sel(p, NPU_IODA_TBL_PCT, 0, true);
514 for (i = 0; i < ARRAY_SIZE(p->pce_cache); i++)
515 out_be64(p->at_regs + NPU_IODA_DATA0, p->pce_cache[i]);
516
517 /* TVT */
518 npu_ioda_sel(p, NPU_IODA_TBL_TVT, 0, true);
519 for (i = 0; i < ARRAY_SIZE(p->tve_cache); i++)
520 out_be64(p->at_regs + NPU_IODA_DATA0, p->tve_cache[i]);
521
522 return OPAL_SUCCESS;
523 }
524
npu_isn_valid(struct npu * p,uint32_t isn)525 static int npu_isn_valid(struct npu *p, uint32_t isn)
526 {
527 if (p->chip_id != p8_irq_to_chip(isn) || p->index != 0 ||
528 NPU_IRQ_NUM(isn) < NPU_LSI_IRQ_MIN ||
529 NPU_IRQ_NUM(isn) > NPU_LSI_IRQ_MAX) {
530 /**
531 * @fwts-label NPUisnInvalid
532 * @fwts-advice NVLink not functional
533 */
534 prlog(PR_ERR, "NPU%d: isn 0x%x not valid for this NPU\n",
535 p->phb.opal_id, isn);
536 return false;
537 }
538
539 return true;
540 }
541
npu_lsi_get_xive(struct irq_source * is,uint32_t isn,uint16_t * server,uint8_t * prio)542 static int64_t npu_lsi_get_xive(struct irq_source *is, uint32_t isn,
543 uint16_t *server, uint8_t *prio)
544 {
545 struct npu *p = is->data;
546 uint32_t irq = NPU_IRQ_NUM(isn);
547 uint64_t lxive;
548
549 if (!npu_isn_valid(p, isn))
550 return OPAL_PARAMETER;
551
552 /* The content is fetched from the cache, which requires
553 * that the initial cache should be initialized with the
554 * default values
555 */
556 irq -= NPU_LSI_IRQ_MIN;
557 lxive = p->lxive_cache[irq];
558 *server = GETFIELD(NPU_IODA_LXIVT_SERVER, lxive);
559 *prio = GETFIELD(NPU_IODA_LXIVT_PRIORITY, lxive);
560
561 return OPAL_SUCCESS;
562 }
563
npu_lsi_set_xive(struct irq_source * is,uint32_t isn,uint16_t server,uint8_t prio)564 static int64_t npu_lsi_set_xive(struct irq_source *is, uint32_t isn,
565 uint16_t server, uint8_t prio)
566 {
567 struct npu *p = is->data;
568 uint32_t irq = NPU_IRQ_NUM(isn);
569 uint64_t lxive;
570
571 if (!npu_isn_valid(p, isn))
572 return OPAL_PARAMETER;
573
574 /* Figure out LXIVT entry */
575 lxive = SETFIELD(NPU_IODA_LXIVT_SERVER, 0ul, server);
576 lxive = SETFIELD(NPU_IODA_LXIVT_PRIORITY, lxive, prio);
577
578 /* Cache LXIVT entry */
579 irq -= NPU_LSI_IRQ_MIN;
580 p->lxive_cache[irq] = lxive;
581
582 /* Update to LXIVT entry */
583 npu_ioda_sel(p, NPU_IODA_TBL_LXIVT, irq, false);
584 lxive = in_be64(p->at_regs + NPU_IODA_DATA0);
585 lxive = SETFIELD(NPU_IODA_LXIVT_SERVER, lxive, server);
586 lxive = SETFIELD(NPU_IODA_LXIVT_PRIORITY, lxive, prio);
587 out_be64(p->at_regs + NPU_IODA_DATA0, lxive);
588
589 return OPAL_SUCCESS;
590 }
591
npu_err_interrupt(struct irq_source * is,uint32_t isn)592 static void npu_err_interrupt(struct irq_source *is, uint32_t isn)
593 {
594 struct npu *p = is->data;
595 uint32_t irq = NPU_IRQ_NUM(isn);
596
597 if (!npu_isn_valid(p, isn))
598 return;
599
600 /* There're 4 LSIs used for error reporting: 4/5 for data
601 * link error reporting while 6/7 for frozen PE detection
602 */
603 irq -= NPU_LSI_IRQ_MIN;
604 switch (irq) {
605 case 4 ... 5:
606 prerror("Invalid NPU error interrupt received\n");
607 break;
608 case 6 ... 7:
609 opal_update_pending_evt(OPAL_EVENT_PCI_ERROR,
610 OPAL_EVENT_PCI_ERROR);
611 }
612 }
613
npu_lsi_attributes(struct irq_source * is,uint32_t isn)614 static uint64_t npu_lsi_attributes(struct irq_source *is, uint32_t isn)
615 {
616 struct npu *p = is->data;
617 uint32_t idx = isn - p->base_lsi;
618
619 if (idx >= 4)
620 return IRQ_ATTR_TARGET_OPAL | IRQ_ATTR_TARGET_RARE | IRQ_ATTR_TYPE_LSI;
621 return IRQ_ATTR_TARGET_LINUX;
622 }
623
624 /* Error LSIs (skiboot owned) */
625 static const struct irq_source_ops npu_lsi_irq_ops = {
626 .get_xive = npu_lsi_get_xive,
627 .set_xive = npu_lsi_set_xive,
628 .attributes = npu_lsi_attributes,
629 .interrupt = npu_err_interrupt,
630 };
631
npu_register_irq(struct npu * p)632 static void npu_register_irq(struct npu *p)
633 {
634 register_irq_source(&npu_lsi_irq_ops, p, p->base_lsi, 8);
635 }
636
npu_hw_init(struct npu * p)637 static void npu_hw_init(struct npu *p)
638 {
639 /* 3 MMIO setup for AT */
640 out_be64(p->at_regs + NPU_LSI_SOURCE_ID,
641 SETFIELD(NPU_LSI_SRC_ID_BASE, 0ul, NPU_LSI_IRQ_MIN >> 4));
642 BUILD_ASSERT((NPU_LSI_IRQ_MIN & 0x07F0) == NPU_LSI_IRQ_MIN);
643 out_be64(p->at_regs + NPU_INTREP_TIMER, 0x0ul);
644 npu_ioda_reset(&p->phb, false);
645 }
646
npu_map_pe_dma_window_real(struct phb * phb,uint64_t pe_number,uint16_t window_id,uint64_t pci_start_addr,uint64_t pci_mem_size)647 static int64_t npu_map_pe_dma_window_real(struct phb *phb,
648 uint64_t pe_number,
649 uint16_t window_id,
650 uint64_t pci_start_addr,
651 uint64_t pci_mem_size)
652 {
653 struct npu *p = phb_to_npu(phb);
654 uint64_t end;
655 uint64_t tve;
656
657 /* Sanity check. Each PE has one corresponding TVE */
658 if (pe_number >= NPU_NUM_OF_PES ||
659 window_id != pe_number)
660 return OPAL_PARAMETER;
661
662 if (pci_mem_size) {
663 /* Enable */
664
665 end = pci_start_addr + pci_mem_size;
666
667 /* We have to be 16M aligned */
668 if ((pci_start_addr & 0x00ffffff) ||
669 (pci_mem_size & 0x00ffffff))
670 return OPAL_PARAMETER;
671
672 /*
673 * It *looks* like this is the max we can support (we need
674 * to verify this. Also we are not checking for rollover,
675 * but then we aren't trying too hard to protect ourselves
676 * againt a completely broken OS.
677 */
678 if (end > 0x0003ffffffffffffull)
679 return OPAL_PARAMETER;
680
681 /*
682 * Put start address bits 49:24 into TVE[52:53]||[0:23]
683 * and end address bits 49:24 into TVE[54:55]||[24:47]
684 * and set TVE[51]
685 */
686 tve = (pci_start_addr << 16) & (0xffffffull << 48);
687 tve |= (pci_start_addr >> 38) & (3ull << 10);
688 tve |= (end >> 8) & (0xfffffful << 16);
689 tve |= (end >> 40) & (3ull << 8);
690 tve |= PPC_BIT(51);
691 } else {
692 /* Disable */
693 tve = 0;
694 }
695
696 npu_ioda_sel(p, NPU_IODA_TBL_TVT, window_id, false);
697 out_be64(p->at_regs + NPU_IODA_DATA0, tve);
698 p->tve_cache[window_id] = tve;
699
700 return OPAL_SUCCESS;
701 }
702
npu_map_pe_dma_window(struct phb * phb,uint64_t pe_number,uint16_t window_id,uint16_t tce_levels,uint64_t tce_table_addr,uint64_t tce_table_size,uint64_t tce_page_size)703 static int64_t npu_map_pe_dma_window(struct phb *phb,
704 uint64_t pe_number,
705 uint16_t window_id,
706 uint16_t tce_levels,
707 uint64_t tce_table_addr,
708 uint64_t tce_table_size,
709 uint64_t tce_page_size)
710 {
711 struct npu *p = phb_to_npu(phb);
712 uint64_t tts_encoded;
713 uint64_t data64 = 0;
714
715 /* Sanity check. Each PE has one corresponding TVE */
716 if (pe_number >= NPU_NUM_OF_PES ||
717 window_id != pe_number)
718 return OPAL_PARAMETER;
719
720 /* Special condition, zero TCE table size used to disable
721 * the TVE.
722 */
723 if (!tce_table_size) {
724 npu_ioda_sel(p, NPU_IODA_TBL_TVT, window_id, false);
725 out_be64(p->at_regs + NPU_IODA_DATA0, 0ul);
726 p->tve_cache[window_id] = 0ul;
727 return OPAL_SUCCESS;
728 }
729
730 /* Additional arguments validation */
731 if (tce_levels < 1 ||
732 tce_levels > 4 ||
733 !is_pow2(tce_table_size) ||
734 tce_table_size < 0x1000)
735 return OPAL_PARAMETER;
736
737 /* TCE table size */
738 data64 = SETFIELD(NPU_IODA_TVT_TTA, 0ul, tce_table_addr >> 12);
739 tts_encoded = ilog2(tce_table_size) - 11;
740 if (tts_encoded > 39)
741 return OPAL_PARAMETER;
742 data64 = SETFIELD(NPU_IODA_TVT_SIZE, data64, tts_encoded);
743
744 /* TCE page size */
745 switch (tce_page_size) {
746 case 0x10000: /* 64K */
747 data64 = SETFIELD(NPU_IODA_TVT_PSIZE, data64, 5);
748 break;
749 case 0x1000000: /* 16M */
750 data64 = SETFIELD(NPU_IODA_TVT_PSIZE, data64, 13);
751 break;
752 case 0x10000000: /* 256M */
753 data64 = SETFIELD(NPU_IODA_TVT_PSIZE, data64, 17);
754 break;
755 case 0x1000: /* 4K */
756 default:
757 data64 = SETFIELD(NPU_IODA_TVT_PSIZE, data64, 1);
758 }
759
760 /* Number of levels */
761 data64 = SETFIELD(NPU_IODA_TVT_LEVELS, data64, tce_levels - 1);
762
763 /* Update to hardware */
764 npu_ioda_sel(p, NPU_IODA_TBL_TVT, window_id, false);
765 out_be64(p->at_regs + NPU_IODA_DATA0, data64);
766 p->tve_cache[window_id] = data64;
767
768 return OPAL_SUCCESS;
769 }
770
npu_set_pe(struct phb * phb,uint64_t pe_number,uint64_t bdfn,uint8_t bcompare,uint8_t dcompare,uint8_t fcompare,uint8_t action)771 static int64_t npu_set_pe(struct phb *phb,
772 uint64_t pe_number,
773 uint64_t bdfn,
774 uint8_t bcompare,
775 uint8_t dcompare,
776 uint8_t fcompare,
777 uint8_t action)
778 {
779 struct npu *p = phb_to_npu(phb);
780 struct npu_dev *dev;
781 uint32_t link_idx;
782 uint64_t *data64;
783
784 /* Sanity check */
785 if (action != OPAL_MAP_PE &&
786 action != OPAL_UNMAP_PE)
787 return OPAL_PARAMETER;
788 if (pe_number >= NPU_NUM_OF_PES)
789 return OPAL_PARAMETER;
790
791 /* All emulated PCI devices hooked to root bus, whose
792 * bus number is zero.
793 */
794 dev = bdfn_to_npu_dev(p, bdfn);
795 if ((bdfn >> 8) || !dev)
796 return OPAL_PARAMETER;
797
798 link_idx = dev->index;
799 dev->pe_number = pe_number;
800
801 /* Separate links will be mapped to different PEs */
802 if (bcompare != OpalPciBusAll ||
803 dcompare != OPAL_COMPARE_RID_DEVICE_NUMBER ||
804 fcompare != OPAL_COMPARE_RID_FUNCTION_NUMBER)
805 return OPAL_UNSUPPORTED;
806
807 /* Map the link to the corresponding PE */
808 data64 = &p->pce_cache[link_idx];
809 if (action == OPAL_MAP_PE)
810 *data64 = SETFIELD(NPU_IODA_PCT_PE, *data64,
811 pe_number);
812 else
813 *data64 = SETFIELD(NPU_IODA_PCT_PE, *data64,
814 NPU_NUM_OF_PES);
815
816 *data64 |= NPU_IODA_PCT_LINK_ENABLED;
817
818 npu_ioda_sel(p, NPU_IODA_TBL_PCT, link_idx, false);
819 out_be64(p->at_regs + NPU_IODA_DATA0, *data64);
820
821 return OPAL_SUCCESS;
822 }
823
npu_get_link_state(struct pci_slot * slot __unused,uint8_t * val)824 static int64_t npu_get_link_state(struct pci_slot *slot __unused, uint8_t *val)
825 {
826 /* As we're emulating all PCI stuff, the link bandwidth
827 * isn't big deal anyway.
828 */
829 *val = OPAL_SHPC_LINK_UP_x1;
830 return OPAL_SUCCESS;
831 }
832
npu_get_power_state(struct pci_slot * slot __unused,uint8_t * val)833 static int64_t npu_get_power_state(struct pci_slot *slot __unused, uint8_t *val)
834 {
835 *val = PCI_SLOT_POWER_ON;
836 return OPAL_SUCCESS;
837 }
838
npu_hreset(struct pci_slot * slot __unused)839 static int64_t npu_hreset(struct pci_slot *slot __unused)
840 {
841 prlog(PR_DEBUG, "NPU: driver should call reset procedure here\n");
842
843 return OPAL_SUCCESS;
844 }
845
npu_freset(struct pci_slot * slot __unused)846 static int64_t npu_freset(struct pci_slot *slot __unused)
847 {
848 /* FIXME: PHB fundamental reset, which need to be
849 * figured out later. It's used by EEH recovery
850 * upon fenced AT.
851 */
852 return OPAL_SUCCESS;
853 }
854
npu_slot_create(struct phb * phb)855 static struct pci_slot *npu_slot_create(struct phb *phb)
856 {
857 struct pci_slot *slot;
858
859 slot = pci_slot_alloc(phb, NULL);
860 if (!slot)
861 return slot;
862
863 /* Elementary functions */
864 slot->ops.get_presence_state = NULL;
865 slot->ops.get_link_state = npu_get_link_state;
866 slot->ops.get_power_state = npu_get_power_state;
867 slot->ops.get_attention_state = NULL;
868 slot->ops.get_latch_state = NULL;
869 slot->ops.set_power_state = NULL;
870 slot->ops.set_attention_state = NULL;
871
872 slot->ops.prepare_link_change = NULL;
873 slot->ops.poll_link = NULL;
874 slot->ops.hreset = npu_hreset;
875 slot->ops.freset = npu_freset;
876 slot->ops.creset = NULL;
877
878 return slot;
879 }
880
npu_freeze_status(struct phb * phb,uint64_t pe_number __unused,uint8_t * freeze_state,uint16_t * pci_error_type __unused,uint16_t * severity __unused)881 static int64_t npu_freeze_status(struct phb *phb,
882 uint64_t pe_number __unused,
883 uint8_t *freeze_state,
884 uint16_t *pci_error_type __unused,
885 uint16_t *severity __unused)
886 {
887 /* FIXME: When it's called by skiboot PCI config accessor,
888 * the PE number is fixed to 0, which is incorrect. We need
889 * introduce another PHB callback to translate it. For now,
890 * it keeps the skiboot PCI enumeration going.
891 */
892 struct npu *p = phb_to_npu(phb);
893 if (p->fenced)
894 *freeze_state = OPAL_EEH_STOPPED_MMIO_DMA_FREEZE;
895 else
896 *freeze_state = OPAL_EEH_STOPPED_NOT_FROZEN;
897 return OPAL_SUCCESS;
898 }
899
npu_eeh_next_error(struct phb * phb,uint64_t * first_frozen_pe,uint16_t * pci_error_type,uint16_t * severity)900 static int64_t npu_eeh_next_error(struct phb *phb,
901 uint64_t *first_frozen_pe,
902 uint16_t *pci_error_type,
903 uint16_t *severity)
904 {
905 struct npu *p = phb_to_npu(phb);
906 int i;
907 uint64_t result = 0;
908 *first_frozen_pe = -1;
909 *pci_error_type = OPAL_EEH_NO_ERROR;
910 *severity = OPAL_EEH_SEV_NO_ERROR;
911
912 if (p->fenced) {
913 *pci_error_type = OPAL_EEH_PHB_ERROR;
914 *severity = OPAL_EEH_SEV_PHB_FENCED;
915 return OPAL_SUCCESS;
916 }
917
918 npu_ioda_sel(p, NPU_IODA_TBL_PESTB, 0, true);
919 for (i = 0; i < NPU_NUM_OF_PES; i++) {
920 result = in_be64(p->at_regs + NPU_IODA_DATA0);
921 if (result > 0) {
922 *first_frozen_pe = i;
923 *pci_error_type = OPAL_EEH_PE_ERROR;
924 *severity = OPAL_EEH_SEV_PE_ER;
925 break;
926 }
927 }
928
929 return OPAL_SUCCESS;
930 }
931
932 /* For use in error injection and handling. */
npu_set_fence_state(struct npu * p,bool fence)933 void npu_set_fence_state(struct npu *p, bool fence) {
934 p->fenced = fence;
935
936 if (fence)
937 prlog(PR_ERR, "NPU: Chip %x is fenced, reboot required.\n",
938 p->chip_id);
939 else
940 prlog(PR_WARNING, "NPU: un-fencing is dangerous and should \
941 only be used for development purposes.");
942 }
943
944 /* Sets the NPU to trigger an error when a DMA occurs */
npu_err_inject(struct phb * phb,uint64_t pe_number,uint32_t type,uint32_t func __unused,uint64_t addr __unused,uint64_t mask __unused)945 static int64_t npu_err_inject(struct phb *phb, uint64_t pe_number,
946 uint32_t type, uint32_t func __unused,
947 uint64_t addr __unused, uint64_t mask __unused)
948 {
949 struct npu *p = phb_to_npu(phb);
950 struct npu_dev *dev = NULL;
951 int i;
952
953 if (pe_number >= NPU_NUM_OF_PES) {
954 prlog(PR_ERR, "NPU: error injection failed, bad PE given\n");
955 return OPAL_PARAMETER;
956 }
957
958 for (i = 0; i < p->total_devices; i++) {
959 if (p->devices[i].pe_number == pe_number) {
960 dev = &p->devices[i];
961 break;
962 }
963 }
964
965 if (!dev) {
966 prlog(PR_ERR, "NPU: couldn't find device with PE%llx\n", pe_number);
967 return OPAL_PARAMETER;
968 }
969
970 /* TODO: extend this to conform to OPAL injection standards */
971 if (type > 1) {
972 prlog(PR_ERR, "NPU: invalid error injection type\n");
973 return OPAL_PARAMETER;
974 } else if (type == 1) {
975 /* Emulate fence mode. */
976 npu_set_fence_state(p, true);
977 } else {
978 /* Cause a freeze with an invalid MMIO read. If the BAR is not
979 * enabled, this will checkstop the machine.
980 */
981 npu_dev_bar_update(p->chip_id, &dev->bar, true);
982 in_be64((void *)dev->bar.base);
983 }
984
985 return OPAL_SUCCESS;
986 }
987
988 static const struct phb_ops npu_ops = {
989 .cfg_read8 = npu_cfg_read8,
990 .cfg_read16 = npu_cfg_read16,
991 .cfg_read32 = npu_cfg_read32,
992 .cfg_write8 = npu_cfg_write8,
993 .cfg_write16 = npu_cfg_write16,
994 .cfg_write32 = npu_cfg_write32,
995 .choose_bus = NULL,
996 .get_reserved_pe_number = NULL,
997 .device_init = NULL,
998 .phb_final_fixup = npu_phb_final_fixup,
999 .ioda_reset = npu_ioda_reset,
1000 .papr_errinjct_reset = NULL,
1001 .pci_reinit = NULL,
1002 .set_phb_mem_window = NULL,
1003 .phb_mmio_enable = NULL,
1004 .map_pe_mmio_window = NULL,
1005 .map_pe_dma_window = npu_map_pe_dma_window,
1006 .map_pe_dma_window_real = npu_map_pe_dma_window_real,
1007 .pci_msi_eoi = NULL,
1008 .set_xive_pe = NULL,
1009 .get_msi_32 = NULL,
1010 .get_msi_64 = NULL,
1011 .set_pe = npu_set_pe,
1012 .set_peltv = NULL,
1013 .eeh_freeze_status = npu_freeze_status,
1014 .eeh_freeze_clear = NULL,
1015 .eeh_freeze_set = NULL,
1016 .next_error = npu_eeh_next_error,
1017 .err_inject = npu_err_inject,
1018 .get_diag_data2 = NULL,
1019 .set_capi_mode = NULL,
1020 .set_capp_recovery = NULL,
1021 };
1022
assign_mmio_bars(uint32_t gcid,uint32_t xscom,struct dt_node * npu_dn,uint64_t mm_win[2],uint64_t at_bar[2])1023 static void assign_mmio_bars(uint32_t gcid, uint32_t xscom,
1024 struct dt_node *npu_dn, uint64_t mm_win[2],
1025 uint64_t at_bar[2])
1026 {
1027 uint64_t mem_start, mem_end;
1028 struct npu_dev_bar bar;
1029 struct dt_node *link;
1030
1031 /* Configure BAR selection.
1032 *
1033 * Currently, each PHY contains 2 links and each link has 2
1034 * BARs. The first BAR is assigned to the DLTL region which is
1035 * what the kernel uses. The second BAR is either assigned to
1036 * either the PL or AT region or unassigned. The PL0/PL1/AT
1037 * MMIO regions are not exposed to the kernel so we assigned
1038 * them at the start of the available memory area followed by
1039 * the DLTL regions. So we end up with the following memory
1040 * map (assuming we're given a memory region starting at
1041 * 0x3fff000000000):
1042 *
1043 * Link#0-BAR#0: NTL/NDL BAR (128KB) - 0x3fff000420000
1044 * Link#0-BAR#1: PL0 BAR ( 2MB) - 0x3fff000000000
1045 * Link#1-BAR#0: NTL/NDL BAR (128KB) - 0x3fff000440000
1046 * Link#1-BAR#1: AT BAR ( 64KB) - 0x3fff000400000
1047 * Link#2-BAR#0: NTL/NDL BAR (128KB) - 0x3fff000460000
1048 * Link#2-BAR#1: PL1 BAR ( 2MB) - 0x3fff000200000
1049 * Link#3-BAR#0: NTL/NDL BAR (128KB) - 0x3fff000480000
1050 * Link#3-BAR#1: UNASSIGNED
1051 */
1052 xscom_write(gcid, xscom + NPU_AT_SCOM_OFFSET + NX_BAR,
1053 0x0211000043500000UL);
1054
1055 xscom_read(gcid, npu_link_scom_base(npu_dn, xscom, 0) + NX_MMIO_BAR_0,
1056 &mem_start);
1057 mem_start = GETFIELD(NX_MMIO_BAR_BASE, mem_start) << 12;
1058
1059 xscom_read(gcid, npu_link_scom_base(npu_dn, xscom, 5) + NX_MMIO_BAR_0,
1060 &mem_end);
1061 mem_end = (GETFIELD(NX_MMIO_BAR_BASE, mem_end) << 12) +
1062 get_bar_size(mem_end);
1063
1064 /* PL0 BAR comes first at 0x3fff000000000 */
1065 bar.xscom = npu_link_scom_base(npu_dn, xscom, 0) + NX_MMIO_BAR_1;
1066 bar.base = mem_start;
1067 bar.size = NX_MMIO_PL_SIZE;
1068 npu_dev_bar_update(gcid, &bar, true);
1069
1070 /* PL1 BAR */
1071 bar.xscom = npu_link_scom_base(npu_dn, xscom, 4) + NX_MMIO_BAR_1;
1072 bar.base += bar.size;
1073 bar.size = NX_MMIO_PL_SIZE;
1074 npu_dev_bar_update(gcid, &bar, true);
1075
1076 /* Then the AT BAR */
1077 bar.xscom = npu_link_scom_base(npu_dn, xscom, 1) + NX_MMIO_BAR_1;
1078 bar.base += bar.size;
1079 bar.size = NX_MMIO_AT_SIZE;
1080 at_bar[0] = bar.base;
1081 at_bar[1] = NX_MMIO_AT_SIZE;
1082 npu_dev_bar_update(gcid, &bar, true);
1083
1084 /* Now we configure all the DLTL BARs. These are the ones
1085 * actually exposed to the kernel. */
1086 mm_win[0] = bar.base + bar.size;
1087 dt_for_each_node(npu_dn, link) {
1088 uint32_t index;
1089
1090 index = dt_prop_get_u32(link, "ibm,npu-link-index");
1091 bar.xscom = npu_link_scom_base(npu_dn, xscom, index) +
1092 NX_MMIO_BAR_0;
1093 bar.base += bar.size;
1094 bar.size = NX_MMIO_DL_SIZE;
1095 bar.base = ALIGN_UP(bar.base, bar.size);
1096 npu_dev_bar_update(gcid, &bar, false);
1097 }
1098 mm_win[1] = (bar.base + bar.size) - mm_win[0];
1099
1100 /* If we weren't given enough room to setup all the BARs we
1101 * require it's better to crash here than risk creating
1102 * overlapping BARs which will xstop the machine randomly in
1103 * the future.*/
1104 assert(bar.base + bar.size <= mem_end);
1105 }
1106
1107 /* Probe NPU device node and create PCI root device node
1108 * accordingly. The NPU deivce node should specify number
1109 * of links and xscom base address to access links.
1110 */
npu_probe_phb(struct dt_node * dn)1111 static void npu_probe_phb(struct dt_node *dn)
1112 {
1113 struct dt_node *np;
1114 uint32_t gcid, index, phb_index, xscom;
1115 uint64_t at_bar[2], mm_win[2];
1116 uint32_t links;
1117 char *path;
1118
1119 /* Retrieve chip id */
1120 path = dt_get_path(dn);
1121 gcid = dt_get_chip_id(dn);
1122 index = dt_prop_get_u32(dn, "ibm,npu-index");
1123 phb_index = dt_prop_get_u32(dn, "ibm,phb-index");
1124 links = dt_prop_get_u32(dn, "ibm,npu-links");
1125 prlog(PR_INFO, "Chip %d Found NPU%d (%d links) at %s\n",
1126 gcid, index, links, path);
1127 free(path);
1128
1129 /* Retrieve xscom base addr */
1130 xscom = dt_get_address(dn, 0, NULL);
1131 prlog(PR_INFO, " XSCOM Base: %08x\n", xscom);
1132
1133 assign_mmio_bars(gcid, xscom, dn, mm_win, at_bar);
1134 prlog(PR_INFO, " AT BAR: %016llx (%lldKB)\n",
1135 at_bar[0], at_bar[1] / 0x400);
1136
1137 /* Create PCI root device node */
1138 np = dt_new_addr(dt_root, "pciex", at_bar[0]);
1139 assert(np);
1140
1141 dt_add_property_strings(np, "compatible",
1142 "ibm,power8-npu-pciex", "ibm,ioda2-npu-phb");
1143 dt_add_property_strings(np, "device_type", "pciex");
1144 dt_add_property(np, "reg", at_bar, sizeof(at_bar));
1145
1146 dt_add_property_cells(np, "ibm,phb-index", phb_index);
1147 dt_add_property_cells(np, "ibm,npu-index", index);
1148 dt_add_property_cells(np, "ibm,chip-id", gcid);
1149 dt_add_property_cells(np, "ibm,xscom-base", xscom);
1150 dt_add_property_cells(np, "ibm,npcq", dn->phandle);
1151 dt_add_property_cells(np, "ibm,links", links);
1152 dt_add_property(np, "ibm,mmio-window", mm_win, sizeof(mm_win));
1153 dt_add_property_cells(np, "ibm,phb-diag-data-size", 0);
1154
1155 /* Disable fast reboot - not currently supported */
1156 disable_fast_reboot("NVLink device enabled");
1157 }
1158
npu_dev_populate_vendor_cap(struct npu_dev_cap * cap)1159 static void npu_dev_populate_vendor_cap(struct npu_dev_cap *cap)
1160 {
1161 struct npu_dev *dev = cap->dev;
1162 struct pci_virt_device *pvd = dev->pvd;
1163 uint32_t offset = cap->start;
1164 uint8_t val;
1165
1166 /* Add length and version information */
1167 val = cap->end - cap->start;
1168 PCI_VIRT_CFG_INIT_RO(pvd, offset + 2, 1, val);
1169 PCI_VIRT_CFG_INIT_RO(pvd, offset + 3, 1, OPAL_NPU_VERSION);
1170 offset += 4;
1171
1172 /* Defaults when the trap can't handle the read/write (eg. due
1173 * to reading/writing less than 4 bytes). */
1174 val = 0x0;
1175 PCI_VIRT_CFG_INIT_RO(pvd, offset, 4, val);
1176 PCI_VIRT_CFG_INIT_RO(pvd, offset + 4, 4, val);
1177
1178 /* Create a trap for AT/PL procedures */
1179 pci_virt_add_filter(pvd, offset, 8,
1180 PCI_REG_FLAG_READ | PCI_REG_FLAG_WRITE,
1181 npu_dev_procedure, NULL);
1182 offset += 8;
1183
1184 PCI_VIRT_CFG_INIT_RO(pvd, offset, 1, dev->index);
1185 }
1186
npu_dev_populate_pcie_cap(struct npu_dev_cap * cap)1187 static void npu_dev_populate_pcie_cap(struct npu_dev_cap *cap)
1188 {
1189 struct npu_dev *dev = cap->dev;
1190 struct pci_virt_device *pvd = dev->pvd;
1191 uint32_t base = cap->start;
1192 uint32_t val;
1193
1194 /* Sanity check on capability ID */
1195 if (cap->id != PCI_CFG_CAP_ID_EXP) {
1196 prlog(PR_NOTICE, "%s: Invalid capability ID %d (%d)\n",
1197 __func__, cap->id, PCI_CFG_CAP_ID_EXP);
1198 return;
1199 }
1200
1201 /* Sanity check on spanned registers */
1202 if ((cap->end - cap->start) < PCIE_CAP_START) {
1203 prlog(PR_NOTICE, "%s: Invalid reg region [%x, %x] for cap %d\n",
1204 __func__, cap->start, cap->end, cap->id);
1205 return;
1206 }
1207
1208 /* 0x00 - ID/PCIE capability */
1209 val = cap->id;
1210 val |= ((0x2 << 16) | (PCIE_TYPE_ENDPOINT << 20));
1211 PCI_VIRT_CFG_INIT_RO(pvd, base, 4, val);
1212
1213 /* 0x04 - Device capability
1214 *
1215 * We should support FLR. Otherwise, it might have
1216 * problem passing it through to userland via Linux
1217 * VFIO infrastructure
1218 */
1219 val = ((PCIE_MPSS_128) |
1220 (PCIE_PHANTOM_NONE << 3) |
1221 (PCIE_L0SL_MAX_NO_LIMIT << 6) |
1222 (PCIE_L1L_MAX_NO_LIMIT << 9) |
1223 (PCICAP_EXP_DEVCAP_FUNC_RESET));
1224 PCI_VIRT_CFG_INIT_RO(pvd, base + PCICAP_EXP_DEVCAP, 4, val);
1225
1226 pci_virt_add_filter(pvd, base + PCICAP_EXP_DEVCTL, 2,
1227 PCI_REG_FLAG_WRITE,
1228 npu_dev_cfg_exp_devcap, NULL);
1229
1230 /* 0x08 - Device control and status */
1231 PCI_VIRT_CFG_INIT(pvd, base + PCICAP_EXP_DEVCTL, 4, 0x00002810,
1232 0xffff0000, 0x000f0000);
1233
1234 /* 0x0c - Link capability */
1235 val = (PCIE_LSPEED_VECBIT_2 | (PCIE_LWIDTH_1X << 4));
1236 PCI_VIRT_CFG_INIT_RO(pvd, base + PCICAP_EXP_LCAP, 4, val);
1237
1238 /* 0x10 - Link control and status */
1239 PCI_VIRT_CFG_INIT(pvd, base + PCICAP_EXP_LCTL, 4, 0x00130000,
1240 0xfffff000, 0xc0000000);
1241
1242 /* 0x14 - Slot capability */
1243 PCI_VIRT_CFG_INIT_RO(pvd, base + PCICAP_EXP_SLOTCAP, 4, 0x00000000);
1244
1245 /* 0x18 - Slot control and status */
1246 PCI_VIRT_CFG_INIT_RO(pvd, base + PCICAP_EXP_SLOTCTL, 4, 0x00000000);
1247
1248 /* 0x1c - Root control and capability */
1249 PCI_VIRT_CFG_INIT(pvd, base + PCICAP_EXP_RC, 4, 0x00000000,
1250 0xffffffe0, 0x00000000);
1251
1252 /* 0x20 - Root status */
1253 PCI_VIRT_CFG_INIT(pvd, base + PCICAP_EXP_RSTAT, 4, 0x00000000,
1254 0xffffffff, 0x00010000);
1255
1256 /* 0x24 - Device capability 2 */
1257 PCI_VIRT_CFG_INIT_RO(pvd, base + PCIECAP_EXP_DCAP2, 4, 0x00000000);
1258
1259 /* 0x28 - Device Control and status 2 */
1260 PCI_VIRT_CFG_INIT(pvd, base + PCICAP_EXP_DCTL2, 4, 0x00070000,
1261 0xffff0000, 0x00000000);
1262
1263 /* 0x2c - Link capability 2 */
1264 PCI_VIRT_CFG_INIT_RO(pvd, base + PCICAP_EXP_LCAP2, 4, 0x00000007);
1265
1266 /* 0x30 - Link control and status 2 */
1267 PCI_VIRT_CFG_INIT(pvd, base + PCICAP_EXP_LCTL2, 4, 0x00000003,
1268 0xffff0000, 0x00200000);
1269
1270 /* 0x34 - Slot capability 2 */
1271 PCI_VIRT_CFG_INIT_RO(pvd, base + PCICAP_EXP_SCAP2, 4, 0x00000000);
1272
1273 /* 0x38 - Slot control and status 2 */
1274 PCI_VIRT_CFG_INIT_RO(pvd, base + PCICAP_EXP_SCTL2, 4, 0x00000000);
1275 }
1276
npu_dev_create_capability(struct npu_dev * dev,void (* populate)(struct npu_dev_cap *),uint16_t id,uint16_t start,uint16_t end)1277 static struct npu_dev_cap *npu_dev_create_capability(struct npu_dev *dev,
1278 void (*populate)(struct npu_dev_cap *),
1279 uint16_t id,
1280 uint16_t start,
1281 uint16_t end)
1282 {
1283 struct npu_dev_cap *cap;
1284
1285 /* Check if the capability is existing */
1286 cap = npu_dev_find_capability(dev, id);
1287 if (cap)
1288 return cap;
1289
1290 /* Allocate new one */
1291 cap = zalloc(sizeof(struct npu_dev_cap));
1292 assert(cap);
1293
1294 /* Put it into the pool */
1295 cap->id = id;
1296 cap->start = start;
1297 cap->end = end;
1298 cap->dev = dev;
1299 cap->populate = populate;
1300 list_add_tail(&dev->capabilities, &cap->link);
1301
1302 return cap;
1303 }
1304
npu_dev_find_capability(struct npu_dev * dev,uint16_t id)1305 static struct npu_dev_cap *npu_dev_find_capability(struct npu_dev *dev,
1306 uint16_t id)
1307 {
1308 struct npu_dev_cap *cap;
1309
1310 list_for_each(&dev->capabilities, cap, link) {
1311 if (cap->id == id)
1312 return cap;
1313 }
1314
1315 return NULL;
1316 }
1317
1318 /*
1319 * All capabilities should be put into the device capability
1320 * list according to register offset in ascending order for
1321 * easy access at later point.
1322 */
npu_dev_create_capabilities(struct npu_dev * dev)1323 static void npu_dev_create_capabilities(struct npu_dev *dev)
1324 {
1325 list_head_init(&dev->capabilities);
1326
1327 /* PCI express capability */
1328 npu_dev_create_capability(dev, npu_dev_populate_pcie_cap,
1329 PCI_CFG_CAP_ID_EXP, PCIE_CAP_START,
1330 PCIE_CAP_END);
1331
1332 /* Vendor specific capability */
1333 npu_dev_create_capability(dev, npu_dev_populate_vendor_cap,
1334 PCI_CFG_CAP_ID_VENDOR, VENDOR_CAP_START,
1335 VENDOR_CAP_END);
1336 }
1337
npu_dev_create_cfg(struct npu_dev * dev)1338 static void npu_dev_create_cfg(struct npu_dev *dev)
1339 {
1340 struct pci_virt_device *pvd = dev->pvd;
1341 struct npu_dev_cap *cap;
1342 uint32_t offset;
1343 uint32_t last_cap_offset;
1344
1345 /* 0x00 - Vendor/Device ID */
1346 PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_VENDOR_ID, 4, 0x04ea1014);
1347
1348 /* 0x04 - Command/Status
1349 *
1350 * Create one trap to trace toggling memory BAR enable bit
1351 */
1352 PCI_VIRT_CFG_INIT(pvd, PCI_CFG_CMD, 4, 0x00100000, 0xffb802b8,
1353 0xf9000000);
1354
1355 pci_virt_add_filter(pvd, PCI_CFG_CMD, 1, PCI_REG_FLAG_WRITE,
1356 npu_dev_cfg_write_cmd, NULL);
1357
1358 /* 0x08 - Rev/Class/Cache */
1359 PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_REV_ID, 4, 0x06800100);
1360
1361 /* 0x0c - CLS/Latency Timer/Header/BIST */
1362 PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_CACHE_LINE_SIZE, 4, 0x00800000);
1363
1364 /* 0x10 - BARs, always 64-bits non-prefetchable
1365 *
1366 * Each emulated device represents one link and therefore
1367 * there is one BAR for the associated DLTL region.
1368 */
1369
1370 /* Low 32-bits */
1371 PCI_VIRT_CFG_INIT(pvd, PCI_CFG_BAR0, 4,
1372 (dev->bar.base & 0xfffffff0) | dev->bar.flags,
1373 0x0000000f, 0x00000000);
1374
1375 /* High 32-bits */
1376 PCI_VIRT_CFG_INIT(pvd, PCI_CFG_BAR1, 4, (dev->bar.base >> 32),
1377 0x00000000, 0x00000000);
1378
1379 /*
1380 * Create trap. Writting 0xFF's to BAR registers should be
1381 * trapped and return size on next read
1382 */
1383 pci_virt_add_filter(pvd, PCI_CFG_BAR0, 8,
1384 PCI_REG_FLAG_READ | PCI_REG_FLAG_WRITE,
1385 npu_dev_cfg_bar, &dev->bar);
1386
1387 /* 0x18/1c/20/24 - Disabled BAR#2/3/4/5
1388 *
1389 * Mark those BARs readonly so that 0x0 will be returned when
1390 * probing the length and the BARs will be skipped.
1391 */
1392 PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_BAR2, 4, 0x00000000);
1393 PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_BAR3, 4, 0x00000000);
1394 PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_BAR4, 4, 0x00000000);
1395 PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_BAR5, 4, 0x00000000);
1396
1397 /* 0x28 - Cardbus CIS pointer */
1398 PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_CARDBUS_CIS, 4, 0x00000000);
1399
1400 /* 0x2c - Subsystem ID */
1401 PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_SUBSYS_VENDOR_ID, 4, 0x00000000);
1402
1403 /* 0x30 - ROM BAR
1404 *
1405 * Force its size to be zero so that the kernel will skip
1406 * probing the ROM BAR. We needn't emulate ROM BAR.
1407 */
1408 PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_ROMBAR, 4, 0xffffffff);
1409
1410 /* 0x34 - PCI Capability
1411 *
1412 * By default, we don't have any capabilities
1413 */
1414 PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_CAP, 4, 0x00000000);
1415
1416 last_cap_offset = PCI_CFG_CAP - 1;
1417 list_for_each(&dev->capabilities, cap, link) {
1418 offset = cap->start;
1419
1420 /* Initialize config space for the capability */
1421 if (cap->populate)
1422 cap->populate(cap);
1423
1424 /* Add capability header */
1425 PCI_VIRT_CFG_INIT_RO(pvd, offset, 2, cap->id);
1426
1427 /* Update the next capability pointer */
1428 PCI_VIRT_CFG_NORMAL_WR(pvd, last_cap_offset + 1, 1, offset);
1429
1430 last_cap_offset = offset;
1431 }
1432
1433 /* 0x38 - Reserved */
1434 PCI_VIRT_CFG_INIT_RO(pvd, 0x38, 4, 0x00000000);
1435
1436 /* 0x3c - INT line/pin/Minimal grant/Maximal latency */
1437 if (!(dev->index % 2))
1438 PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_INT_LINE, 4, 0x00000100);
1439 else
1440 PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_INT_LINE, 4, 0x00000200);
1441 }
1442
npu_allocate_bdfn(struct npu * p,uint32_t group)1443 static uint32_t npu_allocate_bdfn(struct npu *p, uint32_t group)
1444 {
1445 int i;
1446 int bdfn = (group << 3);
1447
1448 for (i = 0; i < p->total_devices; i++) {
1449 if ((p->devices[i].pvd->bdfn & 0xf8) == (bdfn & 0xf8))
1450 bdfn++;
1451 }
1452
1453 return bdfn;
1454 }
1455
npu_create_devices(struct dt_node * dn,struct npu * p)1456 static void npu_create_devices(struct dt_node *dn, struct npu *p)
1457 {
1458 struct npu_dev *dev;
1459 struct dt_node *npu_dn, *link;
1460 uint32_t bdfn, npu_phandle, index = 0;
1461 uint64_t buid_reg;
1462 uint64_t lsisrcid;
1463 uint64_t buid;
1464
1465
1466 /* The bits in the LSI ID Base register are always compared and
1467 * can be set to 0 in the buid base and mask fields. The
1468 * buid (bus unit id) is the full irq minus the last 4 bits. */
1469 lsisrcid = GETFIELD(NPU_LSI_SRC_ID_BASE, NPU_LSI_SRC_ID_BASE);
1470 buid = p8_chip_irq_block_base(p->chip_id, P8_IRQ_BLOCK_MISC) >> 4;
1471
1472 buid_reg = SETFIELD(NP_IRQ_LEVELS, NP_BUID_ENABLE, ~0);
1473 buid_reg = SETFIELD(NP_BUID_MASK, buid_reg, ~lsisrcid);
1474 buid_reg = SETFIELD(NP_BUID_BASE, buid_reg, (buid & ~lsisrcid));
1475
1476 /* Get the npu node which has the links which we expand here
1477 * into pci like devices attached to our emulated phb. */
1478 npu_phandle = dt_prop_get_u32(dn, "ibm,npcq");
1479 npu_dn = dt_find_by_phandle(dt_root, npu_phandle);
1480 assert(npu_dn);
1481
1482 /* Walk the link@x nodes to initialize devices */
1483 p->total_devices = 0;
1484 p->phb.scan_map = 0;
1485 list_head_init(&p->phb.virt_devices);
1486 dt_for_each_compatible(npu_dn, link, "ibm,npu-link") {
1487 struct npu_dev_bar *bar;
1488 uint32_t group_id;
1489 uint64_t val;
1490
1491 dev = &p->devices[index];
1492 dev->index = dt_prop_get_u32(link, "ibm,npu-link-index");
1493 dev->xscom = npu_link_scom_base(npu_dn, p->xscom_base,
1494 dev->index);
1495
1496 dev->npu = p;
1497 dev->dt_node = link;
1498
1499 /* We don't support MMIO PHY access yet */
1500 dev->pl_base = NULL;
1501
1502 group_id = dt_prop_get_u32(link, "ibm,npu-group-id");
1503 bdfn = npu_allocate_bdfn(p, group_id);
1504
1505 /* This must be done after calling
1506 * npu_allocate_bdfn() */
1507 p->total_devices++;
1508 p->phb.scan_map |= 0x1 << ((bdfn & 0xf8) >> 3);
1509
1510 dev->pl_xscom_base = dt_prop_get_u64(link, "ibm,npu-phy");
1511 dev->lane_mask = dt_prop_get_u32(link, "ibm,npu-lane-mask");
1512
1513 /* Setup BUID/ISRN */
1514 xscom_write(p->chip_id, dev->xscom + NX_NP_BUID, buid_reg);
1515
1516 /* Create PCI virtual device */
1517 dev->pvd = pci_virt_add_device(&p->phb, bdfn, NPU_DEV_CFG_SIZE, dev);
1518 assert(dev->pvd);
1519 bar = &dev->bar;
1520 bar->flags = (PCI_CFG_BAR_TYPE_MEM |
1521 PCI_CFG_BAR_MEM64);
1522
1523 /* Update BAR info */
1524 bar->xscom = dev->xscom + NX_MMIO_BAR_0;
1525 xscom_read(p->chip_id, bar->xscom, &val);
1526 bar->base = GETFIELD(NX_MMIO_BAR_BASE, val) << 12;
1527 bar->size = get_bar_size(val);
1528
1529 /*
1530 * The config space is initialised with the BARs
1531 * disabled, so make sure it is actually disabled in
1532 * hardware.
1533 */
1534 npu_dev_bar_update(p->chip_id, bar, false);
1535
1536 /* Initialize capabilities */
1537 npu_dev_create_capabilities(dev);
1538
1539 /* Initialize config space */
1540 npu_dev_create_cfg(dev);
1541
1542 index++;
1543 }
1544 }
1545
npu_add_phb_properties(struct npu * p)1546 static void npu_add_phb_properties(struct npu *p)
1547 {
1548 struct dt_node *np = p->phb.dt_node;
1549 uint32_t icsp = get_ics_phandle();
1550 uint64_t tkill, mm_base, mm_size;
1551 uint32_t base_lsi = p->base_lsi;
1552 uint32_t map[] = {
1553 /* Dev 0 INT#A (used by fn0) */
1554 0x0000, 0x0, 0x0, 0x1, icsp, base_lsi + NPU_LSI_INT_DL0, 1,
1555 /* Dev 0 INT#B (used by fn1) */
1556 0x0000, 0x0, 0x0, 0x2, icsp, base_lsi + NPU_LSI_INT_DL1, 1,
1557 /* Dev 1 INT#A (used by fn0) */
1558 0x0800, 0x0, 0x0, 0x1, icsp, base_lsi + NPU_LSI_INT_DL2, 1,
1559 /* Dev 1 INT#B (used by fn1) */
1560 0x0800, 0x0, 0x0, 0x2, icsp, base_lsi + NPU_LSI_INT_DL3, 1,
1561 };
1562 /* Mask is bus, device and INT# */
1563 uint32_t mask[] = {0xf800, 0x0, 0x0, 0x7};
1564 char slotbuf[32];
1565
1566 /* Add various properties that HB doesn't have to
1567 * add, some of them simply because they result from
1568 * policy decisions made in skiboot rather than in HB
1569 * such as the MMIO windows going to PCI, interrupts,
1570 * etc.
1571 */
1572 dt_add_property_cells(np, "#address-cells", 3);
1573 dt_add_property_cells(np, "#size-cells", 2);
1574 dt_add_property_cells(np, "#interrupt-cells", 1);
1575 dt_add_property_cells(np, "bus-range", 0, 0xff);
1576 dt_add_property_cells(np, "clock-frequency", 0x200, 0);
1577 dt_add_property_cells(np, "interrupt-parent", icsp);
1578
1579 /* DLPL Interrupts, we don't use the standard swizzle */
1580 p->phb.lstate.int_size = 0;
1581 dt_add_property(np, "interrupt-map", map, sizeof(map));
1582 dt_add_property(np, "interrupt-map-mask", mask, sizeof(mask));
1583
1584 /* NPU PHB properties */
1585 /* TODO: Due to an errata TCE KILL only works when DMA traffic
1586 * has been stopped. We need to implement the work around
1587 * which is to do a TCE kill all instead. */
1588 tkill = cleanup_addr((uint64_t)p->at_regs) + NPU_TCE_KILL;
1589 dt_add_property_cells(np, "ibm,opal-num-pes",
1590 NPU_NUM_OF_PES);
1591 dt_add_property_cells(np, "ibm,opal-reserved-pe",
1592 0);
1593 dt_add_property_u64(np, "ibm,opal-tce-kill", tkill);
1594
1595 /* Memory window is exposed as 32-bits non-prefetchable
1596 * one because 64-bits prefetchable one is kind of special
1597 * to kernel.
1598 */
1599 mm_base = p->mm_base;
1600 mm_size = p->mm_size;
1601 dt_add_property_cells(np, "ranges", 0x02000000,
1602 hi32(mm_base), lo32(mm_base),
1603 hi32(mm_base), lo32(mm_base),
1604 hi32(mm_size), lo32(mm_size));
1605
1606 /* Set the slot location on the NPU PHB. This PHB can contain
1607 * devices that correlate with multiple physical slots, so
1608 * present the chip ID instead.
1609 */
1610 snprintf(slotbuf, sizeof(slotbuf), "NPU Chip %d", p->chip_id);
1611 dt_add_property_string(np, "ibm,io-base-loc-code", slotbuf);
1612 }
1613
npu_create_phb(struct dt_node * dn)1614 static void npu_create_phb(struct dt_node *dn)
1615 {
1616 const struct dt_property *prop;
1617 struct npu *p;
1618 struct pci_slot *slot;
1619 uint32_t links;
1620 void *pmem;
1621
1622 /* Retrieve number of devices */
1623 links = dt_prop_get_u32(dn, "ibm,links");
1624 pmem = zalloc(sizeof(struct npu) + links * sizeof(struct npu_dev));
1625 assert(pmem);
1626
1627 /* Populate PHB */
1628 p = pmem;
1629 p->index = dt_prop_get_u32(dn, "ibm,npu-index");
1630 p->chip_id = dt_prop_get_u32(dn, "ibm,chip-id");
1631 p->xscom_base = dt_prop_get_u32(dn, "ibm,xscom-base");
1632 p->total_devices = links;
1633
1634 /* TODO: When hardware fences are implemented, detect them here */
1635 p->fenced = false;
1636
1637 /* This is the AT base */
1638 p->at_xscom = p->xscom_base + NPU_AT_SCOM_OFFSET;
1639 p->at_regs = (void *)dt_get_address(dn, 0, NULL);
1640
1641 prop = dt_require_property(dn, "ibm,mmio-window", -1);
1642 assert(prop->len >= (2 * sizeof(uint64_t)));
1643 p->mm_base = ((const uint64_t *)prop->prop)[0];
1644 p->mm_size = ((const uint64_t *)prop->prop)[1];
1645
1646 p->devices = pmem + sizeof(struct npu);
1647
1648 /* Interrupt */
1649 p->base_lsi = p8_chip_irq_block_base(p->chip_id, P8_IRQ_BLOCK_MISC) +
1650 NPU_LSI_IRQ_MIN;
1651
1652 /* Generic PHB */
1653 p->phb.dt_node = dn;
1654 p->phb.ops = &npu_ops;
1655 p->phb.phb_type = phb_type_pcie_v3;
1656
1657 /* Populate devices */
1658 npu_create_devices(dn, p);
1659
1660 /* Populate extra properties */
1661 npu_add_phb_properties(p);
1662
1663 /* Create PHB slot */
1664 slot = npu_slot_create(&p->phb);
1665 if (!slot)
1666 {
1667 /**
1668 * @fwts-label NPUCannotCreatePHBSlot
1669 * @fwts-advice Firmware probably ran out of memory creating
1670 * NPU slot. NVLink functionality could be broken.
1671 */
1672 prlog(PR_ERR, "NPU: Cannot create PHB slot\n");
1673 }
1674
1675 /* Register PHB */
1676 pci_register_phb(&p->phb, OPAL_DYNAMIC_PHB_ID);
1677
1678 /* Initialize IODA cache */
1679 npu_ioda_init(p);
1680
1681 /* Register interrupt source */
1682 npu_register_irq(p);
1683
1684 /* Initialize hardware */
1685 npu_hw_init(p);
1686 }
1687
probe_npu(void)1688 void probe_npu(void)
1689 {
1690 struct dt_node *np;
1691
1692 /* Scan NPU XSCOM nodes */
1693 dt_for_each_compatible(dt_root, np, "ibm,power8-npu")
1694 npu_probe_phb(np);
1695
1696 /* Scan newly created PHB nodes */
1697 dt_for_each_compatible(dt_root, np, "ibm,power8-npu-pciex")
1698 npu_create_phb(np);
1699 }
1700