1 /* Copyright 2013-2015 IBM Corp.
2  *
3  * Licensed under the Apache License, Version 2.0 (the "License");
4  * you may not use this file except in compliance with the License.
5  * You may obtain a copy of the License at
6  *
7  *      http://www.apache.org/licenses/LICENSE-2.0
8  *
9  * Unless required by applicable law or agreed to in writing, software
10  * distributed under the License is distributed on an "AS IS" BASIS,
11  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
12  * implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 #include <skiboot.h>
17 #include <io.h>
18 #include <timebase.h>
19 #include <pci.h>
20 #include <pci-cfg.h>
21 #include <pci-virt.h>
22 #include <pci-slot.h>
23 #include <interrupts.h>
24 #include <opal.h>
25 #include <opal-api.h>
26 #include <cpu.h>
27 #include <device.h>
28 #include <ccan/str/str.h>
29 #include <ccan/array_size/array_size.h>
30 #include <ccan/build_assert/build_assert.h>
31 #include <affinity.h>
32 #include <npu-regs.h>
33 #include <npu.h>
34 #include <xscom.h>
35 #include <string.h>
36 
37 /*
38  * Terminology:
39  *
40  *  Brick - A group of either 8 TX or 8 RX lanes
41  *  Link - A group of 8 TX and 8 RX lanes
42  *
43  * Each link is represented in system software as an emulated PCI
44  * device. Garrison has two chips each with 4 links, therefore there
45  * are 8 emulated PCI devices in total.
46  *
47  *  +----------------------------------------------------------------+
48  *  |              PBCQ3 (SCOM Base Address 0x2012c00)               |
49  *  |               PHB3 (SCOM Base Address 0x9012c00)               |
50  *  +----------------------------------------------------------------+
51  *                          ||||||||  ||||||||
52  *                          ||||||||  ||||||||
53  *                          ||||||||  ||||||||
54  *                          ||||||||  ||||||||
55  *  +----------------------------------------------------------------+
56  *  |                             PCIe x8                            |
57  *  +----------------------------------------------------------------+
58  *  |                               GPU0                             |
59  *  +--------------------------------+-------------------------------+
60  *  |           NV Link 1            |           NV Link 0           |
61  *  +---------------+----------------+---------------+---------------+
62  *  |      RX       |      TX        |      RX       |      TX       |
63  *  +---------------+----------------+---------------+---------------+
64  *      ||||||||        ||||||||         ||||||||        ||||||||
65  *      ||||||||        ||||||||         ||||||||        ||||||||
66  *      ||||||||        ||||||||         ||||||||        ||||||||
67  *      ||||||||        ||||||||         ||||||||        ||||||||
68  *  +---------------+----------------+---------------+---------------+
69  *  |      TX       |      RX        |      TX       |      RX       |
70  *  +---------------+----------------+---------------+---------------+
71  *  |           Lanes [0:7]         PHY 0       Lanes [8:15]         |
72  *  |               SCOM Base Address 0x8000080008010c3f             |
73  *  +--------------------------------+-------------------------------+
74  *  |          Link 0 NDL/NTL        |         Link 1 NTL/NDL        |
75  *  |   SCOM Base Address 0x8013c00  |  SCOM Base Address 0x8013c40  |
76  *  +--------------------------------+-------------------------------+
77  *  |                                                                |
78  *  |          Address Translation/AT (shared for all links)         |
79  *  |                 SCOM Base Address 0x8013d80                    |
80  *  |                                                                |
81  *  +--------------------------------+-------------------------------+
82  *  |          Link 3 NDL/NTL        |         Link 4 NTL/NDL        |
83  *  |   SCOM Base Address 0x8013d00  |  SCOM Base Address 0x8013d40  |
84  *  +--------------------------------+-------------------------------+
85  *  |           Lanes [8:15]        PHY 1       Lanes [0:7]          |
86  *  |               SCOM Base Address 0x8000080008010c7f             |
87  *  +---------------+----------------+---------------+---------------+
88  *  |      TX       |      RX        |      TX       |      RX       |
89  *  +---------------+----------------+---------------+---------------+
90  *      ||||||||        ||||||||         ||||||||        ||||||||
91  *      ||||||||        ||||||||         ||||||||        ||||||||
92  *      ||||||||        ||||||||         ||||||||        ||||||||
93  *      ||||||||        ||||||||         ||||||||        ||||||||
94  *  +---------------+----------------+---------------+---------------+
95  *  |      RX       |      TX        |      RX       |      TX       |
96  *  +---------------+----------------+---------------+---------------+
97  *  |           NV Link 2            |           NV Link 3           |
98  *  +--------------------------------+-------------------------------+
99  *  |                               GPU1                             |
100  *  +----------------------------------------------------------------+
101  *  |                             PCIe x8                            |
102  *  +----------------------------------------------------------------+
103  *                          ||||||||  ||||||||
104  *                          ||||||||  ||||||||
105  *                          ||||||||  ||||||||
106  *                          ||||||||  ||||||||
107  *  +----------------------------------------------------------------+
108  *  |               PHB2 (SCOM Base Address 0x9012800)               |
109  *  |              PBCQ2 (SCOM Base Address 0x2012800)               |
110  *  +----------------------------------------------------------------+
111  *
112  */
113 
114 static struct npu_dev_cap *npu_dev_find_capability(struct npu_dev *dev,
115 						   uint16_t id);
116 
117 #define OPAL_NPU_VERSION          0x02
118 
119 #define PCIE_CAP_START	          0x40
120 #define PCIE_CAP_END	          0x80
121 #define VENDOR_CAP_START          0x80
122 #define VENDOR_CAP_END	          0x90
123 
124 #define VENDOR_CAP_PCI_DEV_OFFSET 0x0d
125 
126 /* Returns the scom base for the given link index */
npu_link_scom_base(struct dt_node * dn,uint32_t scom_base,int index)127 static uint64_t npu_link_scom_base(struct dt_node *dn, uint32_t scom_base,
128 				   int index)
129 {
130 	struct dt_node *link;
131 	uint32_t link_index;
132 	char namebuf[32];
133 
134 	snprintf(namebuf, sizeof(namebuf), "link@%x", index);
135 	link = dt_find_by_name(dn, namebuf);
136 	assert(link);
137 	link_index = dt_prop_get_u32(link, "ibm,npu-link-index");
138 	return scom_base + (link_index * NPU_LINK_SIZE);
139 }
140 
get_bar_size(uint64_t bar)141 static uint64_t get_bar_size(uint64_t bar)
142 {
143 	return (1 << GETFIELD(NX_MMIO_BAR_SIZE, bar)) * 0x10000;
144 }
145 
146 /* Update the changes of the device BAR to link BARs */
npu_dev_bar_update(uint32_t gcid,struct npu_dev_bar * bar,bool enable)147 static void npu_dev_bar_update(uint32_t gcid, struct npu_dev_bar *bar,
148 			       bool enable)
149 {
150 	uint64_t val;
151 
152 	if (!bar->xscom)
153 		return;
154 
155 	val = bar->base;
156 	val = SETFIELD(NX_MMIO_BAR_SIZE, val, ilog2(bar->size / 0x10000));
157 	if (enable)
158 		val |= NX_MMIO_BAR_ENABLE;
159 	xscom_write(gcid, bar->xscom, val);
160 }
161 
162 /* Trap for PCI command (0x4) to enable or disable device's BARs */
npu_dev_cfg_write_cmd(void * dev,struct pci_cfg_reg_filter * pcrf __unused,uint32_t offset,uint32_t size,uint32_t * data,bool write)163 static int64_t npu_dev_cfg_write_cmd(void *dev,
164 				     struct pci_cfg_reg_filter *pcrf __unused,
165 				     uint32_t offset, uint32_t size,
166 				     uint32_t *data, bool write)
167 {
168 	struct pci_virt_device *pvd = dev;
169 	struct npu_dev *ndev = pvd->data;
170 	bool enable;
171 
172 	if (!write)
173 		return OPAL_PARTIAL;
174 
175 	if (offset != PCI_CFG_CMD)
176 		return OPAL_PARAMETER;
177 	if (size != 1 && size != 2 && size != 4)
178 		return OPAL_PARAMETER;
179 
180 	/* Update device BARs and link BARs will be syncrhonized
181 	 * with hardware automatically.
182 	 */
183 	enable = !!(*data & PCI_CFG_CMD_MEM_EN);
184 	npu_dev_bar_update(ndev->npu->chip_id, &ndev->bar, enable);
185 
186 	/* Normal path to update PCI config buffer */
187 	return OPAL_PARTIAL;
188 }
189 
190 /*
191  * Trap for memory BARs: 0xFF's should be written to BAR register
192  * prior to getting its size.
193  */
npu_dev_cfg_bar_read(struct npu_dev * dev __unused,struct pci_cfg_reg_filter * pcrf,uint32_t offset,uint32_t size,uint32_t * data)194 static int64_t npu_dev_cfg_bar_read(struct npu_dev *dev __unused,
195 				    struct pci_cfg_reg_filter *pcrf,
196 				    uint32_t offset, uint32_t size,
197 				    uint32_t *data)
198 {
199 	struct npu_dev_bar *bar = (struct npu_dev_bar *)(pcrf->data);
200 
201 	/* Revert to normal path if we weren't trapped for BAR size */
202 	if (!bar->trapped)
203 		return OPAL_PARTIAL;
204 
205 	if (offset != pcrf->start &&
206 	    offset != pcrf->start + 4)
207 		return OPAL_PARAMETER;
208 	if (size != 4)
209 		return OPAL_PARAMETER;
210 
211 	bar->trapped = false;
212 	*data = bar->bar_sz;
213 	return OPAL_SUCCESS;
214 }
215 
npu_dev_cfg_bar_write(struct npu_dev * dev,struct pci_cfg_reg_filter * pcrf,uint32_t offset,uint32_t size,uint32_t data)216 static int64_t npu_dev_cfg_bar_write(struct npu_dev *dev,
217 				     struct pci_cfg_reg_filter *pcrf,
218 				     uint32_t offset, uint32_t size,
219 				     uint32_t data)
220 {
221 	struct pci_virt_device *pvd = dev->pvd;
222 	struct npu_dev_bar *bar = (struct npu_dev_bar *)(pcrf->data);
223 	uint32_t pci_cmd;
224 
225 	if (offset != pcrf->start &&
226 	    offset != pcrf->start + 4)
227 		return OPAL_PARAMETER;
228 	if (size != 4)
229 		return OPAL_PARAMETER;
230 
231 	/* Return BAR size on next read */
232 	if (data == 0xffffffff) {
233 		bar->trapped = true;
234 		if (offset == pcrf->start)
235 			bar->bar_sz = (bar->size & 0xffffffff);
236 		else
237 			bar->bar_sz = (bar->size >> 32);
238 
239 		return OPAL_SUCCESS;
240 	}
241 
242 	/* Update BAR base address */
243 	if (offset == pcrf->start) {
244 		bar->base &= 0xffffffff00000000UL;
245 		bar->base |= (data & 0xfffffff0);
246 	} else {
247 		bar->base &= 0x00000000ffffffffUL;
248 		bar->base |= ((uint64_t)data << 32);
249 
250 		PCI_VIRT_CFG_NORMAL_RD(pvd, PCI_CFG_CMD, 4, &pci_cmd);
251 		npu_dev_bar_update(dev->npu->chip_id, bar,
252 				   !!(pci_cmd & PCI_CFG_CMD_MEM_EN));
253 	}
254 
255 	/* We still depend on the normal path to update the
256 	 * cached config buffer.
257 	 */
258 	return OPAL_PARAMETER;
259 }
260 
npu_dev_cfg_bar(void * dev,struct pci_cfg_reg_filter * pcrf,uint32_t offset,uint32_t len,uint32_t * data,bool write)261 static int64_t npu_dev_cfg_bar(void *dev, struct pci_cfg_reg_filter *pcrf,
262 			       uint32_t offset, uint32_t len, uint32_t *data,
263 			       bool write)
264 {
265 	struct pci_virt_device *pvd = dev;
266 	struct npu_dev *ndev = pvd->data;
267 
268 	if (write)
269 		return npu_dev_cfg_bar_write(ndev, pcrf, offset, len, *data);
270 
271 	return npu_dev_cfg_bar_read(ndev, pcrf, offset, len, data);
272 }
273 
npu_dev_cfg_exp_devcap(void * dev,struct pci_cfg_reg_filter * pcrf __unused,uint32_t offset,uint32_t size,uint32_t * data,bool write)274 static int64_t npu_dev_cfg_exp_devcap(void *dev,
275 		struct pci_cfg_reg_filter *pcrf __unused,
276 		uint32_t offset, uint32_t size,
277 		uint32_t *data, bool write)
278 {
279 	struct pci_virt_device *pvd = dev;
280 	struct npu_dev *ndev = pvd->data;
281 
282 	assert(write);
283 
284 	if ((size != 2) || (offset & 1)) {
285 		/* Short config writes are not supported */
286 		prlog(PR_ERR, "NPU%d: Unsupported write to pcie control register\n",
287 		      ndev->phb->opal_id);
288 		return OPAL_PARAMETER;
289 	}
290 
291 	if (*data & PCICAP_EXP_DEVCTL_FUNC_RESET)
292 		npu_dev_procedure_reset(ndev);
293 
294 	return OPAL_PARTIAL;
295 }
296 
bdfn_to_npu_dev(struct npu * p,uint32_t bdfn)297 static struct npu_dev *bdfn_to_npu_dev(struct npu *p, uint32_t bdfn)
298 {
299 	struct pci_virt_device *pvd;
300 
301 	/* Sanity check */
302 	if (bdfn & ~0xff)
303 		return NULL;
304 
305 	pvd = pci_virt_find_device(&p->phb, bdfn);
306 	if (pvd)
307 		return pvd->data;
308 
309 	return NULL;
310 }
311 
312 #define NPU_CFG_READ(size, type)						\
313 static int64_t npu_cfg_read##size(struct phb *phb, uint32_t bdfn,		\
314 				  uint32_t offset, type *data)			\
315 {										\
316 	uint32_t val;								\
317 	int64_t ret;								\
318 										\
319 	ret = pci_virt_cfg_read(phb, bdfn, offset, sizeof(*data), &val);	\
320 	*data = (type)val;							\
321 	return ret;								\
322 }
323 #define NPU_CFG_WRITE(size, type)						\
324 static int64_t npu_cfg_write##size(struct phb *phb, uint32_t bdfn,		\
325 				   uint32_t offset, type data)			\
326 {										\
327 	uint32_t val = data;                                            	\
328 										\
329 	return pci_virt_cfg_write(phb, bdfn, offset, sizeof(data), val);	\
330 }
331 
332 NPU_CFG_READ(8,   u8);
333 NPU_CFG_READ(16,  u16);
334 NPU_CFG_READ(32,  u32);
335 NPU_CFG_WRITE(8,  u8);
336 NPU_CFG_WRITE(16, u16);
337 NPU_CFG_WRITE(32, u32);
338 
__npu_dev_bind_pci_dev(struct phb * phb __unused,struct pci_device * pd,void * data)339 static int __npu_dev_bind_pci_dev(struct phb *phb __unused,
340 				  struct pci_device *pd,
341 				  void *data)
342 {
343 	struct npu_dev *dev = data;
344 	struct dt_node *pci_dt_node;
345 	char *pcislot;
346 
347 	/* Ignore non-nvidia PCI devices */
348 	if ((pd->vdid & 0xffff) != 0x10de)
349 		return 0;
350 
351 	/* Find the PCI device's slot location */
352 	for (pci_dt_node = pd->dn;
353 	     pci_dt_node && !dt_find_property(pci_dt_node, "ibm,slot-label");
354 	     pci_dt_node = pci_dt_node->parent);
355 
356 	if (!pci_dt_node)
357 		return 0;
358 
359 	pcislot = (char *)dt_prop_get(pci_dt_node, "ibm,slot-label");
360 
361 	prlog(PR_DEBUG, "NPU: comparing GPU %s and NPU %s\n",
362 	      pcislot, dev->slot_label);
363 
364 	if (streq(pcislot, dev->slot_label))
365 		return 1;
366 
367 	return 0;
368 }
369 
npu_dev_bind_pci_dev(struct npu_dev * dev)370 static void npu_dev_bind_pci_dev(struct npu_dev *dev)
371 {
372 	struct phb *phb;
373 	uint32_t i;
374 
375 	if (dev->pd)
376 		return;
377 
378 	for (i = 0; i < 64; i++) {
379 		if (dev->npu->phb.opal_id == i)
380 			continue;
381 
382 		phb = pci_get_phb(i);
383 		if (!phb)
384 			continue;
385 
386 		dev->pd = pci_walk_dev(phb, NULL, __npu_dev_bind_pci_dev, dev);
387 		if (dev->pd) {
388 			dev->phb = phb;
389 			/* Found the device, set the bit in config space */
390 			PCI_VIRT_CFG_INIT_RO(dev->pvd, VENDOR_CAP_START +
391 				VENDOR_CAP_PCI_DEV_OFFSET, 1, 0x01);
392 			return;
393 		}
394 	}
395 
396 	prlog(PR_INFO, "%s: No PCI device for NPU device %04x:00:%02x.0 to bind to. If you expect a GPU to be there, this is a problem.\n",
397 	      __func__, dev->npu->phb.opal_id, dev->index);
398 }
399 
400 static struct lock pci_npu_phandle_lock = LOCK_UNLOCKED;
401 
402 /* Appends an NPU phandle to the given PCI device node ibm,npu
403  * property */
npu_append_pci_phandle(struct dt_node * dn,u32 phandle)404 static void npu_append_pci_phandle(struct dt_node *dn, u32 phandle)
405 {
406 	uint32_t *npu_phandles;
407 	struct dt_property *pci_npu_phandle_prop;
408 	size_t prop_len;
409 
410 	/* Use a lock to make sure no one else has a reference to an
411 	 * ibm,npu property (this assumes this is the only function
412 	 * that holds a reference to it). */
413 	lock(&pci_npu_phandle_lock);
414 
415 	/* This function shouldn't be called unless ibm,npu exists */
416 	pci_npu_phandle_prop = (struct dt_property *)
417 		dt_require_property(dn, "ibm,npu", -1);
418 
419 	/* Need to append to the properties */
420 	prop_len = pci_npu_phandle_prop->len;
421 	prop_len += sizeof(*npu_phandles);
422 	dt_resize_property(&pci_npu_phandle_prop, prop_len);
423 	pci_npu_phandle_prop->len = prop_len;
424 
425 	npu_phandles = (uint32_t *) pci_npu_phandle_prop->prop;
426 	npu_phandles[prop_len/sizeof(*npu_phandles) - 1] = phandle;
427 	unlock(&pci_npu_phandle_lock);
428 }
429 
npu_dn_fixup(struct phb * phb,struct pci_device * pd,void * data __unused)430 static int npu_dn_fixup(struct phb *phb,
431 			struct pci_device *pd,
432 			void *data __unused)
433 {
434 	struct npu *p = phb_to_npu(phb);
435 	struct npu_dev *dev;
436 
437 	dev = bdfn_to_npu_dev(p, pd->bdfn);
438 	assert(dev);
439 
440 	if (dev->phb || dev->pd)
441 		return 0;
442 
443 	/* NPU devices require a slot location to associate with GPUs */
444 	dev->slot_label = dt_prop_get(pd->dn, "ibm,slot-label");
445 
446 	/* Bind the emulated PCI device with the real one, which can't
447 	 * be done until the PCI devices are populated. Once the real
448 	 * PCI device is identified, we also need fix the device-tree
449 	 * for it
450 	 */
451 	npu_dev_bind_pci_dev(dev);
452 	if (dev->phb && dev->pd && dev->pd->dn) {
453 		if (dt_find_property(dev->pd->dn, "ibm,npu"))
454 			npu_append_pci_phandle(dev->pd->dn, pd->dn->phandle);
455 		else
456 			dt_add_property_cells(dev->pd->dn, "ibm,npu", pd->dn->phandle);
457 
458 		dt_add_property_cells(pd->dn, "ibm,gpu", dev->pd->dn->phandle);
459 	}
460 
461 	return 0;
462 }
463 
npu_phb_final_fixup(struct phb * phb)464 static void npu_phb_final_fixup(struct phb *phb)
465 {
466 	pci_walk_dev(phb, NULL, npu_dn_fixup, NULL);
467 }
468 
npu_ioda_init(struct npu * p)469 static void npu_ioda_init(struct npu *p)
470 {
471 	uint64_t *data64;
472 	uint32_t i;
473 
474 	/* LXIVT - Disable all LSIs */
475 	for (i = 0; i < ARRAY_SIZE(p->lxive_cache); i++) {
476 		data64 = &p->lxive_cache[i];
477 		*data64 = SETFIELD(NPU_IODA_LXIVT_PRIORITY, 0ul, 0xff);
478 		*data64 = SETFIELD(NPU_IODA_LXIVT_SERVER, *data64, 0);
479 	}
480 
481 	/* PCT - Reset to reserved PE# */
482 	for (i = 0; i < ARRAY_SIZE(p->pce_cache); i++) {
483 		data64 = &p->pce_cache[i];
484 		*data64 = SETFIELD(NPU_IODA_PCT_PE, 0ul, 0ul);
485 		*data64 |= NPU_IODA_PCT_LINK_ENABLED;
486 	}
487 
488 	/* Clear TVT */
489 	memset(p->tve_cache, 0, sizeof(p->tve_cache));
490 }
491 
npu_ioda_reset(struct phb * phb,bool purge)492 static int64_t npu_ioda_reset(struct phb *phb, bool purge)
493 {
494 	struct npu *p = phb_to_npu(phb);
495 	uint32_t i;
496 
497 	if (purge) {
498 		NPUDBG(p, "Purging all IODA tables...\n");
499 		npu_ioda_init(p);
500 	}
501 
502 	/* LIST */
503 	npu_ioda_sel(p, NPU_IODA_TBL_LIST, 0, true);
504 	for (i = 0; i < 8; i++)
505 		out_be64(p->at_regs + NPU_IODA_DATA0, 0x1);
506 
507 	/* LIXVT */
508 	npu_ioda_sel(p, NPU_IODA_TBL_LXIVT, 0, true);
509 	for (i = 0; i < ARRAY_SIZE(p->lxive_cache); i++)
510 		out_be64(p->at_regs + NPU_IODA_DATA0, p->lxive_cache[i]);
511 
512 	/* PCT */
513 	npu_ioda_sel(p, NPU_IODA_TBL_PCT, 0, true);
514 	for (i = 0; i < ARRAY_SIZE(p->pce_cache); i++)
515 		out_be64(p->at_regs + NPU_IODA_DATA0, p->pce_cache[i]);
516 
517 	/* TVT */
518 	npu_ioda_sel(p, NPU_IODA_TBL_TVT, 0, true);
519 	for (i = 0; i < ARRAY_SIZE(p->tve_cache); i++)
520 		out_be64(p->at_regs + NPU_IODA_DATA0, p->tve_cache[i]);
521 
522 	return OPAL_SUCCESS;
523 }
524 
npu_isn_valid(struct npu * p,uint32_t isn)525 static int npu_isn_valid(struct npu *p, uint32_t isn)
526 {
527 	if (p->chip_id != p8_irq_to_chip(isn) || p->index != 0 ||
528 	    NPU_IRQ_NUM(isn) < NPU_LSI_IRQ_MIN ||
529 	    NPU_IRQ_NUM(isn) > NPU_LSI_IRQ_MAX) {
530 		/**
531 		 * @fwts-label NPUisnInvalid
532 		 * @fwts-advice NVLink not functional
533 		 */
534 		prlog(PR_ERR, "NPU%d: isn 0x%x not valid for this NPU\n",
535 		      p->phb.opal_id, isn);
536 		return false;
537 	}
538 
539 	return true;
540 }
541 
npu_lsi_get_xive(struct irq_source * is,uint32_t isn,uint16_t * server,uint8_t * prio)542 static int64_t npu_lsi_get_xive(struct irq_source *is, uint32_t isn,
543 				uint16_t *server, uint8_t *prio)
544 {
545 	struct npu *p = is->data;
546 	uint32_t irq = NPU_IRQ_NUM(isn);
547 	uint64_t lxive;
548 
549 	if (!npu_isn_valid(p, isn))
550 		return OPAL_PARAMETER;
551 
552 	/* The content is fetched from the cache, which requires
553 	 * that the initial cache should be initialized with the
554 	 * default values
555 	 */
556 	irq -= NPU_LSI_IRQ_MIN;
557 	lxive = p->lxive_cache[irq];
558 	*server = GETFIELD(NPU_IODA_LXIVT_SERVER, lxive);
559 	*prio = GETFIELD(NPU_IODA_LXIVT_PRIORITY, lxive);
560 
561 	return OPAL_SUCCESS;
562 }
563 
npu_lsi_set_xive(struct irq_source * is,uint32_t isn,uint16_t server,uint8_t prio)564 static int64_t npu_lsi_set_xive(struct irq_source *is, uint32_t isn,
565 				uint16_t server, uint8_t prio)
566 {
567 	struct npu *p = is->data;
568 	uint32_t irq = NPU_IRQ_NUM(isn);
569 	uint64_t lxive;
570 
571 	if (!npu_isn_valid(p, isn))
572 		return OPAL_PARAMETER;
573 
574 	/* Figure out LXIVT entry */
575 	lxive = SETFIELD(NPU_IODA_LXIVT_SERVER, 0ul, server);
576 	lxive = SETFIELD(NPU_IODA_LXIVT_PRIORITY, lxive, prio);
577 
578 	/* Cache LXIVT entry */
579 	irq -= NPU_LSI_IRQ_MIN;
580 	p->lxive_cache[irq] = lxive;
581 
582 	/* Update to LXIVT entry */
583 	npu_ioda_sel(p, NPU_IODA_TBL_LXIVT, irq, false);
584 	lxive = in_be64(p->at_regs + NPU_IODA_DATA0);
585 	lxive = SETFIELD(NPU_IODA_LXIVT_SERVER, lxive, server);
586 	lxive = SETFIELD(NPU_IODA_LXIVT_PRIORITY, lxive, prio);
587 	out_be64(p->at_regs + NPU_IODA_DATA0, lxive);
588 
589 	return OPAL_SUCCESS;
590 }
591 
npu_err_interrupt(struct irq_source * is,uint32_t isn)592 static void npu_err_interrupt(struct irq_source *is, uint32_t isn)
593 {
594 	struct npu *p = is->data;
595 	uint32_t irq = NPU_IRQ_NUM(isn);
596 
597 	if (!npu_isn_valid(p, isn))
598 		return;
599 
600 	/* There're 4 LSIs used for error reporting: 4/5 for data
601 	 * link error reporting while 6/7 for frozen PE detection
602 	 */
603 	irq -= NPU_LSI_IRQ_MIN;
604 	switch (irq) {
605 	case 4 ... 5:
606 		prerror("Invalid NPU error interrupt received\n");
607 		break;
608 	case 6 ... 7:
609 		opal_update_pending_evt(OPAL_EVENT_PCI_ERROR,
610 					OPAL_EVENT_PCI_ERROR);
611 	}
612 }
613 
npu_lsi_attributes(struct irq_source * is,uint32_t isn)614 static uint64_t npu_lsi_attributes(struct irq_source *is, uint32_t isn)
615 {
616 	struct npu *p = is->data;
617 	uint32_t idx = isn - p->base_lsi;
618 
619 	if (idx >= 4)
620 		return IRQ_ATTR_TARGET_OPAL | IRQ_ATTR_TARGET_RARE | IRQ_ATTR_TYPE_LSI;
621 	return IRQ_ATTR_TARGET_LINUX;
622 }
623 
624 /* Error LSIs (skiboot owned) */
625 static const struct irq_source_ops npu_lsi_irq_ops = {
626 	.get_xive	= npu_lsi_get_xive,
627 	.set_xive	= npu_lsi_set_xive,
628 	.attributes	= npu_lsi_attributes,
629 	.interrupt	= npu_err_interrupt,
630 };
631 
npu_register_irq(struct npu * p)632 static void npu_register_irq(struct npu *p)
633 {
634 	register_irq_source(&npu_lsi_irq_ops, p, p->base_lsi, 8);
635 }
636 
npu_hw_init(struct npu * p)637 static void npu_hw_init(struct npu *p)
638 {
639 	/* 3 MMIO setup for AT */
640 	out_be64(p->at_regs + NPU_LSI_SOURCE_ID,
641 		 SETFIELD(NPU_LSI_SRC_ID_BASE, 0ul, NPU_LSI_IRQ_MIN >> 4));
642 	BUILD_ASSERT((NPU_LSI_IRQ_MIN & 0x07F0) == NPU_LSI_IRQ_MIN);
643 	out_be64(p->at_regs + NPU_INTREP_TIMER, 0x0ul);
644 	npu_ioda_reset(&p->phb, false);
645 }
646 
npu_map_pe_dma_window_real(struct phb * phb,uint64_t pe_number,uint16_t window_id,uint64_t pci_start_addr,uint64_t pci_mem_size)647 static int64_t npu_map_pe_dma_window_real(struct phb *phb,
648 					   uint64_t pe_number,
649 					   uint16_t window_id,
650 					   uint64_t pci_start_addr,
651 					   uint64_t pci_mem_size)
652 {
653 	struct npu *p = phb_to_npu(phb);
654 	uint64_t end;
655 	uint64_t tve;
656 
657 	/* Sanity check. Each PE has one corresponding TVE */
658 	if (pe_number >= NPU_NUM_OF_PES ||
659 	    window_id != pe_number)
660 		return OPAL_PARAMETER;
661 
662 	if (pci_mem_size) {
663 		/* Enable */
664 
665 		end = pci_start_addr + pci_mem_size;
666 
667 		/* We have to be 16M aligned */
668 		if ((pci_start_addr & 0x00ffffff) ||
669 		    (pci_mem_size & 0x00ffffff))
670 			return OPAL_PARAMETER;
671 
672 		/*
673 		 * It *looks* like this is the max we can support (we need
674 		 * to verify this. Also we are not checking for rollover,
675 		 * but then we aren't trying too hard to protect ourselves
676 		 * againt a completely broken OS.
677 		 */
678 		if (end > 0x0003ffffffffffffull)
679 			return OPAL_PARAMETER;
680 
681 		/*
682 		 * Put start address bits 49:24 into TVE[52:53]||[0:23]
683 		 * and end address bits 49:24 into TVE[54:55]||[24:47]
684 		 * and set TVE[51]
685 		 */
686 		tve  = (pci_start_addr << 16) & (0xffffffull << 48);
687 		tve |= (pci_start_addr >> 38) & (3ull << 10);
688 		tve |= (end >>  8) & (0xfffffful << 16);
689 		tve |= (end >> 40) & (3ull << 8);
690 		tve |= PPC_BIT(51);
691 	} else {
692 		/* Disable */
693 		tve = 0;
694 	}
695 
696 	npu_ioda_sel(p, NPU_IODA_TBL_TVT, window_id, false);
697 	out_be64(p->at_regs + NPU_IODA_DATA0, tve);
698 	p->tve_cache[window_id] = tve;
699 
700 	return OPAL_SUCCESS;
701 }
702 
npu_map_pe_dma_window(struct phb * phb,uint64_t pe_number,uint16_t window_id,uint16_t tce_levels,uint64_t tce_table_addr,uint64_t tce_table_size,uint64_t tce_page_size)703 static int64_t npu_map_pe_dma_window(struct phb *phb,
704 					 uint64_t pe_number,
705 					 uint16_t window_id,
706 					 uint16_t tce_levels,
707 					 uint64_t tce_table_addr,
708 					 uint64_t tce_table_size,
709 					 uint64_t tce_page_size)
710 {
711 	struct npu *p = phb_to_npu(phb);
712 	uint64_t tts_encoded;
713 	uint64_t data64 = 0;
714 
715 	/* Sanity check. Each PE has one corresponding TVE */
716 	if (pe_number >= NPU_NUM_OF_PES ||
717 	    window_id != pe_number)
718 		return OPAL_PARAMETER;
719 
720 	/* Special condition, zero TCE table size used to disable
721 	 * the TVE.
722 	 */
723 	if (!tce_table_size) {
724 		npu_ioda_sel(p, NPU_IODA_TBL_TVT, window_id, false);
725 		out_be64(p->at_regs + NPU_IODA_DATA0, 0ul);
726 		p->tve_cache[window_id] = 0ul;
727 		return OPAL_SUCCESS;
728 	}
729 
730 	/* Additional arguments validation */
731 	if (tce_levels < 1 ||
732 	    tce_levels > 4 ||
733 	    !is_pow2(tce_table_size) ||
734 	    tce_table_size < 0x1000)
735 		return OPAL_PARAMETER;
736 
737 	/* TCE table size */
738 	data64 = SETFIELD(NPU_IODA_TVT_TTA, 0ul, tce_table_addr >> 12);
739 	tts_encoded = ilog2(tce_table_size) - 11;
740 	if (tts_encoded > 39)
741 		return OPAL_PARAMETER;
742 	data64 = SETFIELD(NPU_IODA_TVT_SIZE, data64, tts_encoded);
743 
744 	/* TCE page size */
745 	switch (tce_page_size) {
746 	case 0x10000:		/* 64K */
747 		data64 = SETFIELD(NPU_IODA_TVT_PSIZE, data64, 5);
748 		break;
749 	case 0x1000000:		/* 16M */
750 		data64 = SETFIELD(NPU_IODA_TVT_PSIZE, data64, 13);
751 		break;
752 	case 0x10000000:	/* 256M */
753 		data64 = SETFIELD(NPU_IODA_TVT_PSIZE, data64, 17);
754 		break;
755 	case 0x1000:		/* 4K */
756 	default:
757 		data64 = SETFIELD(NPU_IODA_TVT_PSIZE, data64, 1);
758 	}
759 
760 	/* Number of levels */
761 	data64 = SETFIELD(NPU_IODA_TVT_LEVELS, data64, tce_levels - 1);
762 
763 	/* Update to hardware */
764 	npu_ioda_sel(p, NPU_IODA_TBL_TVT, window_id, false);
765 	out_be64(p->at_regs + NPU_IODA_DATA0, data64);
766 	p->tve_cache[window_id] = data64;
767 
768 	return OPAL_SUCCESS;
769 }
770 
npu_set_pe(struct phb * phb,uint64_t pe_number,uint64_t bdfn,uint8_t bcompare,uint8_t dcompare,uint8_t fcompare,uint8_t action)771 static int64_t npu_set_pe(struct phb *phb,
772 			      uint64_t pe_number,
773 			      uint64_t bdfn,
774 			      uint8_t bcompare,
775 			      uint8_t dcompare,
776 			      uint8_t fcompare,
777 			      uint8_t action)
778 {
779 	struct npu *p = phb_to_npu(phb);
780 	struct npu_dev *dev;
781 	uint32_t link_idx;
782 	uint64_t *data64;
783 
784 	/* Sanity check */
785 	if (action != OPAL_MAP_PE &&
786 	    action != OPAL_UNMAP_PE)
787 		return OPAL_PARAMETER;
788 	if (pe_number >= NPU_NUM_OF_PES)
789 		return OPAL_PARAMETER;
790 
791 	/* All emulated PCI devices hooked to root bus, whose
792 	 * bus number is zero.
793 	 */
794 	dev = bdfn_to_npu_dev(p, bdfn);
795 	if ((bdfn >> 8) || !dev)
796 		return OPAL_PARAMETER;
797 
798 	link_idx = dev->index;
799 	dev->pe_number = pe_number;
800 
801 	/* Separate links will be mapped to different PEs */
802 	if (bcompare != OpalPciBusAll ||
803 	    dcompare != OPAL_COMPARE_RID_DEVICE_NUMBER ||
804 	    fcompare != OPAL_COMPARE_RID_FUNCTION_NUMBER)
805 		return OPAL_UNSUPPORTED;
806 
807 	/* Map the link to the corresponding PE */
808 	data64 = &p->pce_cache[link_idx];
809 	if (action == OPAL_MAP_PE)
810 		*data64 = SETFIELD(NPU_IODA_PCT_PE, *data64,
811 				   pe_number);
812 	else
813 		*data64 = SETFIELD(NPU_IODA_PCT_PE, *data64,
814 				   NPU_NUM_OF_PES);
815 
816 	*data64 |= NPU_IODA_PCT_LINK_ENABLED;
817 
818 	npu_ioda_sel(p, NPU_IODA_TBL_PCT, link_idx, false);
819 	out_be64(p->at_regs + NPU_IODA_DATA0, *data64);
820 
821 	return OPAL_SUCCESS;
822 }
823 
npu_get_link_state(struct pci_slot * slot __unused,uint8_t * val)824 static int64_t npu_get_link_state(struct pci_slot *slot __unused, uint8_t *val)
825 {
826 	/* As we're emulating all PCI stuff, the link bandwidth
827 	 * isn't big deal anyway.
828 	 */
829 	*val = OPAL_SHPC_LINK_UP_x1;
830 	return OPAL_SUCCESS;
831 }
832 
npu_get_power_state(struct pci_slot * slot __unused,uint8_t * val)833 static int64_t npu_get_power_state(struct pci_slot *slot __unused, uint8_t *val)
834 {
835 	*val = PCI_SLOT_POWER_ON;
836 	return OPAL_SUCCESS;
837 }
838 
npu_hreset(struct pci_slot * slot __unused)839 static int64_t npu_hreset(struct pci_slot *slot __unused)
840 {
841 	prlog(PR_DEBUG, "NPU: driver should call reset procedure here\n");
842 
843 	return OPAL_SUCCESS;
844 }
845 
npu_freset(struct pci_slot * slot __unused)846 static int64_t npu_freset(struct pci_slot *slot __unused)
847 {
848 	/* FIXME: PHB fundamental reset, which need to be
849 	 * figured out later. It's used by EEH recovery
850 	 * upon fenced AT.
851 	 */
852 	return OPAL_SUCCESS;
853 }
854 
npu_slot_create(struct phb * phb)855 static struct pci_slot *npu_slot_create(struct phb *phb)
856 {
857 	struct pci_slot *slot;
858 
859 	slot = pci_slot_alloc(phb, NULL);
860 	if (!slot)
861 		return slot;
862 
863 	/* Elementary functions */
864 	slot->ops.get_presence_state  = NULL;
865 	slot->ops.get_link_state      = npu_get_link_state;
866 	slot->ops.get_power_state     = npu_get_power_state;
867 	slot->ops.get_attention_state = NULL;
868 	slot->ops.get_latch_state     = NULL;
869 	slot->ops.set_power_state     = NULL;
870 	slot->ops.set_attention_state = NULL;
871 
872 	slot->ops.prepare_link_change = NULL;
873 	slot->ops.poll_link           = NULL;
874 	slot->ops.hreset              = npu_hreset;
875 	slot->ops.freset              = npu_freset;
876 	slot->ops.creset              = NULL;
877 
878 	return slot;
879 }
880 
npu_freeze_status(struct phb * phb,uint64_t pe_number __unused,uint8_t * freeze_state,uint16_t * pci_error_type __unused,uint16_t * severity __unused)881 static int64_t npu_freeze_status(struct phb *phb,
882 				     uint64_t pe_number __unused,
883 				     uint8_t *freeze_state,
884 				     uint16_t *pci_error_type __unused,
885 				     uint16_t *severity __unused)
886 {
887 	/* FIXME: When it's called by skiboot PCI config accessor,
888 	 * the PE number is fixed to 0, which is incorrect. We need
889 	 * introduce another PHB callback to translate it. For now,
890 	 * it keeps the skiboot PCI enumeration going.
891 	 */
892 	struct npu *p = phb_to_npu(phb);
893 	if (p->fenced)
894 		*freeze_state = OPAL_EEH_STOPPED_MMIO_DMA_FREEZE;
895 	else
896 		*freeze_state = OPAL_EEH_STOPPED_NOT_FROZEN;
897 	return OPAL_SUCCESS;
898 }
899 
npu_eeh_next_error(struct phb * phb,uint64_t * first_frozen_pe,uint16_t * pci_error_type,uint16_t * severity)900 static int64_t npu_eeh_next_error(struct phb *phb,
901 				  uint64_t *first_frozen_pe,
902 				  uint16_t *pci_error_type,
903 				  uint16_t *severity)
904 {
905 	struct npu *p = phb_to_npu(phb);
906 	int i;
907 	uint64_t result = 0;
908 	*first_frozen_pe = -1;
909 	*pci_error_type = OPAL_EEH_NO_ERROR;
910 	*severity = OPAL_EEH_SEV_NO_ERROR;
911 
912 	if (p->fenced) {
913 		*pci_error_type = OPAL_EEH_PHB_ERROR;
914 		*severity = OPAL_EEH_SEV_PHB_FENCED;
915 		return OPAL_SUCCESS;
916 	}
917 
918 	npu_ioda_sel(p, NPU_IODA_TBL_PESTB, 0, true);
919 	for (i = 0; i < NPU_NUM_OF_PES; i++) {
920 		result = in_be64(p->at_regs + NPU_IODA_DATA0);
921 		if (result > 0) {
922 			*first_frozen_pe = i;
923 			*pci_error_type = OPAL_EEH_PE_ERROR;
924 			*severity = OPAL_EEH_SEV_PE_ER;
925 			break;
926 		}
927 	}
928 
929 	return OPAL_SUCCESS;
930 }
931 
932 /* For use in error injection and handling. */
npu_set_fence_state(struct npu * p,bool fence)933 void npu_set_fence_state(struct npu *p, bool fence) {
934 	p->fenced = fence;
935 
936 	if (fence)
937 		prlog(PR_ERR, "NPU: Chip %x is fenced, reboot required.\n",
938 		      p->chip_id);
939 	else
940 		prlog(PR_WARNING, "NPU: un-fencing is dangerous and should \
941 		      only be used for development purposes.");
942 }
943 
944 /* Sets the NPU to trigger an error when a DMA occurs */
npu_err_inject(struct phb * phb,uint64_t pe_number,uint32_t type,uint32_t func __unused,uint64_t addr __unused,uint64_t mask __unused)945 static int64_t npu_err_inject(struct phb *phb, uint64_t pe_number,
946 			      uint32_t type, uint32_t func __unused,
947 			      uint64_t addr __unused, uint64_t mask __unused)
948 {
949 	struct npu *p = phb_to_npu(phb);
950 	struct npu_dev *dev = NULL;
951 	int i;
952 
953 	if (pe_number >= NPU_NUM_OF_PES) {
954 		prlog(PR_ERR, "NPU: error injection failed, bad PE given\n");
955 		return OPAL_PARAMETER;
956 	}
957 
958 	for (i = 0; i < p->total_devices; i++) {
959 		if (p->devices[i].pe_number == pe_number) {
960 			dev = &p->devices[i];
961 			break;
962 		}
963 	}
964 
965 	if (!dev) {
966 		prlog(PR_ERR, "NPU: couldn't find device with PE%llx\n", pe_number);
967 		return OPAL_PARAMETER;
968 	}
969 
970 	/* TODO: extend this to conform to OPAL injection standards */
971 	if (type > 1) {
972 		prlog(PR_ERR, "NPU: invalid error injection type\n");
973 		return OPAL_PARAMETER;
974 	} else if (type == 1) {
975 		/* Emulate fence mode. */
976 		npu_set_fence_state(p, true);
977 	} else {
978 		/* Cause a freeze with an invalid MMIO read.  If the BAR is not
979 		 * enabled, this will checkstop the machine.
980 		 */
981 		npu_dev_bar_update(p->chip_id, &dev->bar, true);
982 		in_be64((void *)dev->bar.base);
983 	}
984 
985 	return OPAL_SUCCESS;
986 }
987 
988 static const struct phb_ops npu_ops = {
989 	.cfg_read8		= npu_cfg_read8,
990 	.cfg_read16		= npu_cfg_read16,
991 	.cfg_read32		= npu_cfg_read32,
992 	.cfg_write8		= npu_cfg_write8,
993 	.cfg_write16		= npu_cfg_write16,
994 	.cfg_write32		= npu_cfg_write32,
995 	.choose_bus		= NULL,
996 	.get_reserved_pe_number	= NULL,
997 	.device_init		= NULL,
998 	.phb_final_fixup	= npu_phb_final_fixup,
999 	.ioda_reset		= npu_ioda_reset,
1000 	.papr_errinjct_reset	= NULL,
1001 	.pci_reinit		= NULL,
1002 	.set_phb_mem_window	= NULL,
1003 	.phb_mmio_enable	= NULL,
1004 	.map_pe_mmio_window	= NULL,
1005 	.map_pe_dma_window	= npu_map_pe_dma_window,
1006 	.map_pe_dma_window_real	= npu_map_pe_dma_window_real,
1007 	.pci_msi_eoi		= NULL,
1008 	.set_xive_pe		= NULL,
1009 	.get_msi_32		= NULL,
1010 	.get_msi_64		= NULL,
1011 	.set_pe			= npu_set_pe,
1012 	.set_peltv		= NULL,
1013 	.eeh_freeze_status	= npu_freeze_status,
1014 	.eeh_freeze_clear	= NULL,
1015 	.eeh_freeze_set		= NULL,
1016 	.next_error		= npu_eeh_next_error,
1017 	.err_inject		= npu_err_inject,
1018 	.get_diag_data2		= NULL,
1019 	.set_capi_mode		= NULL,
1020 	.set_capp_recovery	= NULL,
1021 };
1022 
assign_mmio_bars(uint32_t gcid,uint32_t xscom,struct dt_node * npu_dn,uint64_t mm_win[2],uint64_t at_bar[2])1023 static void assign_mmio_bars(uint32_t gcid, uint32_t xscom,
1024 			     struct dt_node *npu_dn, uint64_t mm_win[2],
1025 			     uint64_t at_bar[2])
1026 {
1027 	uint64_t mem_start, mem_end;
1028 	struct npu_dev_bar bar;
1029 	struct dt_node *link;
1030 
1031 	/* Configure BAR selection.
1032 	 *
1033 	 * Currently, each PHY contains 2 links and each link has 2
1034 	 * BARs. The first BAR is assigned to the DLTL region which is
1035 	 * what the kernel uses. The second BAR is either assigned to
1036 	 * either the PL or AT region or unassigned. The PL0/PL1/AT
1037 	 * MMIO regions are not exposed to the kernel so we assigned
1038 	 * them at the start of the available memory area followed by
1039 	 * the DLTL regions. So we end up with the following memory
1040 	 * map (assuming we're given a memory region starting at
1041 	 * 0x3fff000000000):
1042 	 *
1043 	 * Link#0-BAR#0: NTL/NDL BAR (128KB) - 0x3fff000420000
1044 	 * Link#0-BAR#1:     PL0 BAR (  2MB) - 0x3fff000000000
1045 	 * Link#1-BAR#0: NTL/NDL BAR (128KB) - 0x3fff000440000
1046 	 * Link#1-BAR#1:      AT BAR ( 64KB) - 0x3fff000400000
1047 	 * Link#2-BAR#0: NTL/NDL BAR (128KB) - 0x3fff000460000
1048 	 * Link#2-BAR#1:     PL1 BAR (  2MB) - 0x3fff000200000
1049 	 * Link#3-BAR#0: NTL/NDL BAR (128KB) - 0x3fff000480000
1050 	 * Link#3-BAR#1:  UNASSIGNED
1051 	 */
1052 	xscom_write(gcid, xscom + NPU_AT_SCOM_OFFSET + NX_BAR,
1053 		    0x0211000043500000UL);
1054 
1055 	xscom_read(gcid, npu_link_scom_base(npu_dn, xscom, 0) + NX_MMIO_BAR_0,
1056 		   &mem_start);
1057 	mem_start = GETFIELD(NX_MMIO_BAR_BASE, mem_start) << 12;
1058 
1059 	xscom_read(gcid, npu_link_scom_base(npu_dn, xscom, 5) + NX_MMIO_BAR_0,
1060 		   &mem_end);
1061 	mem_end = (GETFIELD(NX_MMIO_BAR_BASE, mem_end) << 12) +
1062 		get_bar_size(mem_end);
1063 
1064 	/* PL0 BAR comes first at 0x3fff000000000 */
1065 	bar.xscom = npu_link_scom_base(npu_dn, xscom, 0) + NX_MMIO_BAR_1;
1066 	bar.base = mem_start;
1067 	bar.size = NX_MMIO_PL_SIZE;
1068 	npu_dev_bar_update(gcid, &bar, true);
1069 
1070 	/* PL1 BAR */
1071 	bar.xscom = npu_link_scom_base(npu_dn, xscom, 4) + NX_MMIO_BAR_1;
1072 	bar.base += bar.size;
1073 	bar.size = NX_MMIO_PL_SIZE;
1074 	npu_dev_bar_update(gcid, &bar, true);
1075 
1076 	/* Then the AT BAR */
1077 	bar.xscom = npu_link_scom_base(npu_dn, xscom, 1) + NX_MMIO_BAR_1;
1078 	bar.base += bar.size;
1079 	bar.size = NX_MMIO_AT_SIZE;
1080 	at_bar[0] = bar.base;
1081 	at_bar[1] = NX_MMIO_AT_SIZE;
1082 	npu_dev_bar_update(gcid, &bar, true);
1083 
1084 	/* Now we configure all the DLTL BARs. These are the ones
1085 	 * actually exposed to the kernel. */
1086 	mm_win[0] = bar.base + bar.size;
1087 	dt_for_each_node(npu_dn, link) {
1088 		uint32_t index;
1089 
1090 		index = dt_prop_get_u32(link, "ibm,npu-link-index");
1091 		bar.xscom = npu_link_scom_base(npu_dn, xscom, index) +
1092 			NX_MMIO_BAR_0;
1093 		bar.base += bar.size;
1094 		bar.size = NX_MMIO_DL_SIZE;
1095 		bar.base = ALIGN_UP(bar.base, bar.size);
1096 		npu_dev_bar_update(gcid, &bar, false);
1097 	}
1098 	mm_win[1] = (bar.base + bar.size) - mm_win[0];
1099 
1100 	/* If we weren't given enough room to setup all the BARs we
1101 	 * require it's better to crash here than risk creating
1102 	 * overlapping BARs which will xstop the machine randomly in
1103 	 * the future.*/
1104 	assert(bar.base + bar.size <= mem_end);
1105 }
1106 
1107 /* Probe NPU device node and create PCI root device node
1108  * accordingly. The NPU deivce node should specify number
1109  * of links and xscom base address to access links.
1110  */
npu_probe_phb(struct dt_node * dn)1111 static void npu_probe_phb(struct dt_node *dn)
1112 {
1113 	struct dt_node *np;
1114 	uint32_t gcid, index, phb_index, xscom;
1115 	uint64_t at_bar[2], mm_win[2];
1116 	uint32_t links;
1117 	char *path;
1118 
1119 	/* Retrieve chip id */
1120 	path = dt_get_path(dn);
1121 	gcid = dt_get_chip_id(dn);
1122 	index = dt_prop_get_u32(dn, "ibm,npu-index");
1123 	phb_index = dt_prop_get_u32(dn, "ibm,phb-index");
1124 	links = dt_prop_get_u32(dn, "ibm,npu-links");
1125 	prlog(PR_INFO, "Chip %d Found NPU%d (%d links) at %s\n",
1126 	      gcid, index, links, path);
1127 	free(path);
1128 
1129 	/* Retrieve xscom base addr */
1130 	xscom = dt_get_address(dn, 0, NULL);
1131 	prlog(PR_INFO, "   XSCOM Base:  %08x\n", xscom);
1132 
1133 	assign_mmio_bars(gcid, xscom, dn, mm_win, at_bar);
1134 	prlog(PR_INFO, "   AT BAR:      %016llx (%lldKB)\n",
1135 	      at_bar[0], at_bar[1] / 0x400);
1136 
1137 	/* Create PCI root device node */
1138 	np = dt_new_addr(dt_root, "pciex", at_bar[0]);
1139 	assert(np);
1140 
1141 	dt_add_property_strings(np, "compatible",
1142 				"ibm,power8-npu-pciex", "ibm,ioda2-npu-phb");
1143 	dt_add_property_strings(np, "device_type", "pciex");
1144 	dt_add_property(np, "reg", at_bar, sizeof(at_bar));
1145 
1146 	dt_add_property_cells(np, "ibm,phb-index", phb_index);
1147 	dt_add_property_cells(np, "ibm,npu-index", index);
1148 	dt_add_property_cells(np, "ibm,chip-id", gcid);
1149 	dt_add_property_cells(np, "ibm,xscom-base", xscom);
1150 	dt_add_property_cells(np, "ibm,npcq", dn->phandle);
1151 	dt_add_property_cells(np, "ibm,links", links);
1152 	dt_add_property(np, "ibm,mmio-window", mm_win, sizeof(mm_win));
1153 	dt_add_property_cells(np, "ibm,phb-diag-data-size", 0);
1154 
1155 	/* Disable fast reboot - not currently supported */
1156 	disable_fast_reboot("NVLink device enabled");
1157 }
1158 
npu_dev_populate_vendor_cap(struct npu_dev_cap * cap)1159 static void npu_dev_populate_vendor_cap(struct npu_dev_cap *cap)
1160 {
1161 	struct npu_dev *dev = cap->dev;
1162 	struct pci_virt_device *pvd = dev->pvd;
1163 	uint32_t offset = cap->start;
1164 	uint8_t val;
1165 
1166 	/* Add length and version information */
1167 	val = cap->end - cap->start;
1168 	PCI_VIRT_CFG_INIT_RO(pvd, offset + 2, 1, val);
1169 	PCI_VIRT_CFG_INIT_RO(pvd, offset + 3, 1, OPAL_NPU_VERSION);
1170 	offset += 4;
1171 
1172 	/* Defaults when the trap can't handle the read/write (eg. due
1173 	 * to reading/writing less than 4 bytes). */
1174 	val = 0x0;
1175 	PCI_VIRT_CFG_INIT_RO(pvd, offset, 4, val);
1176 	PCI_VIRT_CFG_INIT_RO(pvd, offset + 4, 4, val);
1177 
1178 	/* Create a trap for AT/PL procedures */
1179 	pci_virt_add_filter(pvd, offset, 8,
1180 			    PCI_REG_FLAG_READ | PCI_REG_FLAG_WRITE,
1181 			    npu_dev_procedure, NULL);
1182 	offset += 8;
1183 
1184 	PCI_VIRT_CFG_INIT_RO(pvd, offset, 1, dev->index);
1185 }
1186 
npu_dev_populate_pcie_cap(struct npu_dev_cap * cap)1187 static void npu_dev_populate_pcie_cap(struct npu_dev_cap *cap)
1188 {
1189 	struct npu_dev *dev = cap->dev;
1190 	struct pci_virt_device *pvd = dev->pvd;
1191 	uint32_t base = cap->start;
1192 	uint32_t val;
1193 
1194 	/* Sanity check on capability ID */
1195 	if (cap->id != PCI_CFG_CAP_ID_EXP) {
1196 		prlog(PR_NOTICE, "%s: Invalid capability ID %d (%d)\n",
1197 		      __func__, cap->id, PCI_CFG_CAP_ID_EXP);
1198 		return;
1199 	}
1200 
1201 	/* Sanity check on spanned registers */
1202 	if ((cap->end - cap->start) < PCIE_CAP_START) {
1203 		prlog(PR_NOTICE, "%s: Invalid reg region [%x, %x] for cap %d\n",
1204 		      __func__, cap->start, cap->end, cap->id);
1205 		return;
1206 	}
1207 
1208 	/* 0x00 - ID/PCIE capability */
1209 	val = cap->id;
1210 	val |= ((0x2 << 16) | (PCIE_TYPE_ENDPOINT << 20));
1211 	PCI_VIRT_CFG_INIT_RO(pvd, base, 4, val);
1212 
1213 	/* 0x04 - Device capability
1214 	 *
1215 	 * We should support FLR. Otherwise, it might have
1216 	 * problem passing it through to userland via Linux
1217 	 * VFIO infrastructure
1218 	 */
1219 	val = ((PCIE_MPSS_128) |
1220 	       (PCIE_PHANTOM_NONE << 3) |
1221 	       (PCIE_L0SL_MAX_NO_LIMIT << 6) |
1222 	       (PCIE_L1L_MAX_NO_LIMIT << 9) |
1223 	       (PCICAP_EXP_DEVCAP_FUNC_RESET));
1224 	PCI_VIRT_CFG_INIT_RO(pvd, base + PCICAP_EXP_DEVCAP, 4, val);
1225 
1226 	pci_virt_add_filter(pvd, base + PCICAP_EXP_DEVCTL, 2,
1227 			    PCI_REG_FLAG_WRITE,
1228 			    npu_dev_cfg_exp_devcap, NULL);
1229 
1230 	/* 0x08 - Device control and status */
1231 	PCI_VIRT_CFG_INIT(pvd, base + PCICAP_EXP_DEVCTL, 4, 0x00002810,
1232 			 0xffff0000, 0x000f0000);
1233 
1234 	/* 0x0c - Link capability */
1235 	val = (PCIE_LSPEED_VECBIT_2 | (PCIE_LWIDTH_1X << 4));
1236 	PCI_VIRT_CFG_INIT_RO(pvd, base + PCICAP_EXP_LCAP, 4, val);
1237 
1238 	/* 0x10 - Link control and status */
1239 	PCI_VIRT_CFG_INIT(pvd, base + PCICAP_EXP_LCTL, 4, 0x00130000,
1240 			 0xfffff000, 0xc0000000);
1241 
1242 	/* 0x14 - Slot capability */
1243 	PCI_VIRT_CFG_INIT_RO(pvd, base + PCICAP_EXP_SLOTCAP, 4, 0x00000000);
1244 
1245 	/* 0x18 - Slot control and status */
1246 	PCI_VIRT_CFG_INIT_RO(pvd, base + PCICAP_EXP_SLOTCTL, 4, 0x00000000);
1247 
1248 	/* 0x1c - Root control and capability */
1249 	PCI_VIRT_CFG_INIT(pvd, base + PCICAP_EXP_RC, 4, 0x00000000,
1250 			 0xffffffe0, 0x00000000);
1251 
1252 	/* 0x20 - Root status */
1253 	PCI_VIRT_CFG_INIT(pvd, base + PCICAP_EXP_RSTAT, 4, 0x00000000,
1254 			 0xffffffff, 0x00010000);
1255 
1256 	/* 0x24 - Device capability 2 */
1257 	PCI_VIRT_CFG_INIT_RO(pvd, base + PCIECAP_EXP_DCAP2, 4, 0x00000000);
1258 
1259 	/* 0x28 - Device Control and status 2 */
1260 	PCI_VIRT_CFG_INIT(pvd, base + PCICAP_EXP_DCTL2, 4, 0x00070000,
1261 			 0xffff0000, 0x00000000);
1262 
1263 	/* 0x2c - Link capability 2 */
1264 	PCI_VIRT_CFG_INIT_RO(pvd, base + PCICAP_EXP_LCAP2, 4, 0x00000007);
1265 
1266 	/* 0x30 - Link control and status 2 */
1267 	PCI_VIRT_CFG_INIT(pvd, base + PCICAP_EXP_LCTL2, 4, 0x00000003,
1268 			 0xffff0000, 0x00200000);
1269 
1270 	/* 0x34 - Slot capability 2 */
1271 	PCI_VIRT_CFG_INIT_RO(pvd, base + PCICAP_EXP_SCAP2, 4, 0x00000000);
1272 
1273 	/* 0x38 - Slot control and status 2 */
1274 	PCI_VIRT_CFG_INIT_RO(pvd, base + PCICAP_EXP_SCTL2, 4, 0x00000000);
1275 }
1276 
npu_dev_create_capability(struct npu_dev * dev,void (* populate)(struct npu_dev_cap *),uint16_t id,uint16_t start,uint16_t end)1277 static struct npu_dev_cap *npu_dev_create_capability(struct npu_dev *dev,
1278 				  void (*populate)(struct npu_dev_cap *),
1279 				  uint16_t id,
1280 				  uint16_t start,
1281 				  uint16_t end)
1282 {
1283 	struct npu_dev_cap *cap;
1284 
1285 	/* Check if the capability is existing */
1286 	cap = npu_dev_find_capability(dev, id);
1287 	if (cap)
1288 		return cap;
1289 
1290 	/* Allocate new one */
1291 	cap = zalloc(sizeof(struct npu_dev_cap));
1292 	assert(cap);
1293 
1294 	/* Put it into the pool */
1295 	cap->id         = id;
1296 	cap->start      = start;
1297 	cap->end        = end;
1298 	cap->dev        = dev;
1299 	cap->populate	= populate;
1300 	list_add_tail(&dev->capabilities, &cap->link);
1301 
1302 	return cap;
1303 }
1304 
npu_dev_find_capability(struct npu_dev * dev,uint16_t id)1305 static struct npu_dev_cap *npu_dev_find_capability(struct npu_dev *dev,
1306 						   uint16_t id)
1307 {
1308 	struct npu_dev_cap *cap;
1309 
1310 	list_for_each(&dev->capabilities, cap, link) {
1311 		if (cap->id == id)
1312 			return cap;
1313 	}
1314 
1315 	return NULL;
1316 }
1317 
1318 /*
1319  * All capabilities should be put into the device capability
1320  * list according to register offset in ascending order for
1321  * easy access at later point.
1322  */
npu_dev_create_capabilities(struct npu_dev * dev)1323 static void npu_dev_create_capabilities(struct npu_dev *dev)
1324 {
1325 	list_head_init(&dev->capabilities);
1326 
1327 	/* PCI express capability */
1328 	npu_dev_create_capability(dev, npu_dev_populate_pcie_cap,
1329 				  PCI_CFG_CAP_ID_EXP, PCIE_CAP_START,
1330 				  PCIE_CAP_END);
1331 
1332 	/* Vendor specific capability */
1333 	npu_dev_create_capability(dev, npu_dev_populate_vendor_cap,
1334 				  PCI_CFG_CAP_ID_VENDOR, VENDOR_CAP_START,
1335 				  VENDOR_CAP_END);
1336 }
1337 
npu_dev_create_cfg(struct npu_dev * dev)1338 static void npu_dev_create_cfg(struct npu_dev *dev)
1339 {
1340 	struct pci_virt_device *pvd = dev->pvd;
1341 	struct npu_dev_cap *cap;
1342 	uint32_t offset;
1343 	uint32_t last_cap_offset;
1344 
1345 	/* 0x00 - Vendor/Device ID */
1346 	PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_VENDOR_ID, 4, 0x04ea1014);
1347 
1348 	/* 0x04 - Command/Status
1349 	 *
1350 	 * Create one trap to trace toggling memory BAR enable bit
1351 	 */
1352 	PCI_VIRT_CFG_INIT(pvd, PCI_CFG_CMD, 4, 0x00100000, 0xffb802b8,
1353 			 0xf9000000);
1354 
1355 	pci_virt_add_filter(pvd, PCI_CFG_CMD, 1, PCI_REG_FLAG_WRITE,
1356 			    npu_dev_cfg_write_cmd, NULL);
1357 
1358 	/* 0x08 - Rev/Class/Cache */
1359 	PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_REV_ID, 4, 0x06800100);
1360 
1361 	/* 0x0c - CLS/Latency Timer/Header/BIST */
1362 	PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_CACHE_LINE_SIZE, 4, 0x00800000);
1363 
1364 	/* 0x10 - BARs, always 64-bits non-prefetchable
1365 	 *
1366 	 * Each emulated device represents one link and therefore
1367 	 * there is one BAR for the associated DLTL region.
1368 	 */
1369 
1370 	/* Low 32-bits */
1371 	PCI_VIRT_CFG_INIT(pvd, PCI_CFG_BAR0, 4,
1372 			 (dev->bar.base & 0xfffffff0) | dev->bar.flags,
1373 			 0x0000000f, 0x00000000);
1374 
1375 	/* High 32-bits */
1376 	PCI_VIRT_CFG_INIT(pvd, PCI_CFG_BAR1, 4, (dev->bar.base >> 32),
1377 			 0x00000000, 0x00000000);
1378 
1379 	/*
1380 	 * Create trap. Writting 0xFF's to BAR registers should be
1381 	 * trapped and return size on next read
1382 	 */
1383 	pci_virt_add_filter(pvd, PCI_CFG_BAR0, 8,
1384 			    PCI_REG_FLAG_READ | PCI_REG_FLAG_WRITE,
1385 			    npu_dev_cfg_bar, &dev->bar);
1386 
1387 	/* 0x18/1c/20/24 - Disabled BAR#2/3/4/5
1388 	 *
1389 	 * Mark those BARs readonly so that 0x0 will be returned when
1390 	 * probing the length and the BARs will be skipped.
1391 	 */
1392 	PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_BAR2, 4, 0x00000000);
1393 	PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_BAR3, 4, 0x00000000);
1394 	PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_BAR4, 4, 0x00000000);
1395 	PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_BAR5, 4, 0x00000000);
1396 
1397 	/* 0x28 - Cardbus CIS pointer */
1398 	PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_CARDBUS_CIS, 4, 0x00000000);
1399 
1400 	/* 0x2c - Subsystem ID */
1401 	PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_SUBSYS_VENDOR_ID, 4, 0x00000000);
1402 
1403 	/* 0x30 - ROM BAR
1404 	 *
1405 	 * Force its size to be zero so that the kernel will skip
1406 	 * probing the ROM BAR. We needn't emulate ROM BAR.
1407 	 */
1408 	PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_ROMBAR, 4, 0xffffffff);
1409 
1410 	/* 0x34 - PCI Capability
1411 	 *
1412 	 * By default, we don't have any capabilities
1413 	 */
1414 	PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_CAP, 4, 0x00000000);
1415 
1416 	last_cap_offset = PCI_CFG_CAP - 1;
1417 	list_for_each(&dev->capabilities, cap, link) {
1418 		offset = cap->start;
1419 
1420 		/* Initialize config space for the capability */
1421 		if (cap->populate)
1422 			cap->populate(cap);
1423 
1424 		/* Add capability header */
1425 		PCI_VIRT_CFG_INIT_RO(pvd, offset, 2, cap->id);
1426 
1427 		/* Update the next capability pointer */
1428 		PCI_VIRT_CFG_NORMAL_WR(pvd, last_cap_offset + 1, 1, offset);
1429 
1430 		last_cap_offset = offset;
1431 	}
1432 
1433 	/* 0x38 - Reserved */
1434 	PCI_VIRT_CFG_INIT_RO(pvd, 0x38, 4, 0x00000000);
1435 
1436 	/* 0x3c - INT line/pin/Minimal grant/Maximal latency */
1437 	if (!(dev->index % 2))
1438 		PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_INT_LINE, 4, 0x00000100);
1439 	else
1440 		PCI_VIRT_CFG_INIT_RO(pvd, PCI_CFG_INT_LINE, 4, 0x00000200);
1441 }
1442 
npu_allocate_bdfn(struct npu * p,uint32_t group)1443 static uint32_t npu_allocate_bdfn(struct npu *p, uint32_t group)
1444 {
1445 	int i;
1446 	int bdfn = (group << 3);
1447 
1448 	for (i = 0; i < p->total_devices; i++) {
1449 		if ((p->devices[i].pvd->bdfn & 0xf8) == (bdfn & 0xf8))
1450 			bdfn++;
1451 	}
1452 
1453 	return bdfn;
1454 }
1455 
npu_create_devices(struct dt_node * dn,struct npu * p)1456 static void npu_create_devices(struct dt_node *dn, struct npu *p)
1457 {
1458 	struct npu_dev *dev;
1459 	struct dt_node *npu_dn, *link;
1460 	uint32_t bdfn, npu_phandle, index = 0;
1461 	uint64_t buid_reg;
1462 	uint64_t lsisrcid;
1463 	uint64_t buid;
1464 
1465 
1466 	/* The bits in the LSI ID Base register are always compared and
1467 	 * can be set to 0 in the buid base and mask fields.  The
1468 	 * buid (bus unit id) is the full irq minus the last 4 bits. */
1469 	lsisrcid = GETFIELD(NPU_LSI_SRC_ID_BASE, NPU_LSI_SRC_ID_BASE);
1470 	buid = p8_chip_irq_block_base(p->chip_id, P8_IRQ_BLOCK_MISC) >> 4;
1471 
1472 	buid_reg = SETFIELD(NP_IRQ_LEVELS, NP_BUID_ENABLE, ~0);
1473 	buid_reg = SETFIELD(NP_BUID_MASK, buid_reg, ~lsisrcid);
1474 	buid_reg = SETFIELD(NP_BUID_BASE, buid_reg, (buid & ~lsisrcid));
1475 
1476 	/* Get the npu node which has the links which we expand here
1477 	 * into pci like devices attached to our emulated phb. */
1478 	npu_phandle = dt_prop_get_u32(dn, "ibm,npcq");
1479 	npu_dn = dt_find_by_phandle(dt_root, npu_phandle);
1480 	assert(npu_dn);
1481 
1482 	/* Walk the link@x nodes to initialize devices */
1483 	p->total_devices = 0;
1484 	p->phb.scan_map = 0;
1485 	list_head_init(&p->phb.virt_devices);
1486 	dt_for_each_compatible(npu_dn, link, "ibm,npu-link") {
1487 		struct npu_dev_bar *bar;
1488 		uint32_t group_id;
1489 		uint64_t val;
1490 
1491 		dev = &p->devices[index];
1492 		dev->index = dt_prop_get_u32(link, "ibm,npu-link-index");
1493 		dev->xscom = npu_link_scom_base(npu_dn, p->xscom_base,
1494 						dev->index);
1495 
1496 		dev->npu = p;
1497 		dev->dt_node = link;
1498 
1499 		/* We don't support MMIO PHY access yet */
1500 		dev->pl_base = NULL;
1501 
1502 		group_id = dt_prop_get_u32(link, "ibm,npu-group-id");
1503 		bdfn = npu_allocate_bdfn(p, group_id);
1504 
1505 		/* This must be done after calling
1506 		 * npu_allocate_bdfn() */
1507 		p->total_devices++;
1508 		p->phb.scan_map |= 0x1 << ((bdfn & 0xf8) >> 3);
1509 
1510 		dev->pl_xscom_base = dt_prop_get_u64(link, "ibm,npu-phy");
1511 		dev->lane_mask = dt_prop_get_u32(link, "ibm,npu-lane-mask");
1512 
1513 		/* Setup BUID/ISRN */
1514 		xscom_write(p->chip_id, dev->xscom + NX_NP_BUID, buid_reg);
1515 
1516 		/* Create PCI virtual device */
1517 		dev->pvd = pci_virt_add_device(&p->phb, bdfn, NPU_DEV_CFG_SIZE, dev);
1518 		assert(dev->pvd);
1519 		bar = &dev->bar;
1520 		bar->flags = (PCI_CFG_BAR_TYPE_MEM |
1521 			      PCI_CFG_BAR_MEM64);
1522 
1523 		/* Update BAR info */
1524 		bar->xscom = dev->xscom + NX_MMIO_BAR_0;
1525 		xscom_read(p->chip_id, bar->xscom, &val);
1526 		bar->base  = GETFIELD(NX_MMIO_BAR_BASE, val) << 12;
1527 		bar->size = get_bar_size(val);
1528 
1529 		/*
1530 		 * The config space is initialised with the BARs
1531 		 * disabled, so make sure it is actually disabled in
1532 		 * hardware.
1533 		 */
1534 		npu_dev_bar_update(p->chip_id, bar, false);
1535 
1536 		/* Initialize capabilities */
1537 		npu_dev_create_capabilities(dev);
1538 
1539 		/* Initialize config space */
1540 		npu_dev_create_cfg(dev);
1541 
1542 		index++;
1543 	}
1544 }
1545 
npu_add_phb_properties(struct npu * p)1546 static void npu_add_phb_properties(struct npu *p)
1547 {
1548 	struct dt_node *np = p->phb.dt_node;
1549 	uint32_t icsp = get_ics_phandle();
1550 	uint64_t tkill, mm_base, mm_size;
1551 	uint32_t base_lsi = p->base_lsi;
1552 	uint32_t map[] = {
1553 		/* Dev 0 INT#A (used by fn0) */
1554 		0x0000, 0x0, 0x0, 0x1, icsp, base_lsi + NPU_LSI_INT_DL0, 1,
1555 		/* Dev 0 INT#B (used by fn1) */
1556 		0x0000, 0x0, 0x0, 0x2, icsp, base_lsi + NPU_LSI_INT_DL1, 1,
1557 		/* Dev 1 INT#A (used by fn0) */
1558 		0x0800, 0x0, 0x0, 0x1, icsp, base_lsi + NPU_LSI_INT_DL2, 1,
1559 		/* Dev 1 INT#B (used by fn1) */
1560 		0x0800, 0x0, 0x0, 0x2, icsp, base_lsi + NPU_LSI_INT_DL3, 1,
1561 	};
1562 	/* Mask is bus, device and INT# */
1563 	uint32_t mask[] = {0xf800, 0x0, 0x0, 0x7};
1564 	char slotbuf[32];
1565 
1566 	/* Add various properties that HB doesn't have to
1567 	 * add, some of them simply because they result from
1568 	 * policy decisions made in skiboot rather than in HB
1569 	 * such as the MMIO windows going to PCI, interrupts,
1570 	 * etc.
1571 	 */
1572 	dt_add_property_cells(np, "#address-cells", 3);
1573 	dt_add_property_cells(np, "#size-cells", 2);
1574 	dt_add_property_cells(np, "#interrupt-cells", 1);
1575 	dt_add_property_cells(np, "bus-range", 0, 0xff);
1576 	dt_add_property_cells(np, "clock-frequency", 0x200, 0);
1577         dt_add_property_cells(np, "interrupt-parent", icsp);
1578 
1579         /* DLPL Interrupts, we don't use the standard swizzle */
1580 	p->phb.lstate.int_size = 0;
1581 	dt_add_property(np, "interrupt-map", map, sizeof(map));
1582 	dt_add_property(np, "interrupt-map-mask", mask, sizeof(mask));
1583 
1584 	/* NPU PHB properties */
1585 	/* TODO: Due to an errata TCE KILL only works when DMA traffic
1586 	 * has been stopped. We need to implement the work around
1587 	 * which is to do a TCE kill all instead. */
1588 	tkill = cleanup_addr((uint64_t)p->at_regs) + NPU_TCE_KILL;
1589 	dt_add_property_cells(np, "ibm,opal-num-pes",
1590 			      NPU_NUM_OF_PES);
1591 	dt_add_property_cells(np, "ibm,opal-reserved-pe",
1592 			      0);
1593         dt_add_property_u64(np, "ibm,opal-tce-kill", tkill);
1594 
1595 	/* Memory window is exposed as 32-bits non-prefetchable
1596 	 * one because 64-bits prefetchable one is kind of special
1597 	 * to kernel.
1598 	 */
1599 	mm_base = p->mm_base;
1600 	mm_size = p->mm_size;
1601 	dt_add_property_cells(np, "ranges", 0x02000000,
1602 			      hi32(mm_base), lo32(mm_base),
1603 			      hi32(mm_base), lo32(mm_base),
1604 			      hi32(mm_size), lo32(mm_size));
1605 
1606 	/* Set the slot location on the NPU PHB.  This PHB can contain
1607 	 * devices that correlate with multiple physical slots, so
1608 	 * present the chip ID instead.
1609 	 */
1610 	snprintf(slotbuf, sizeof(slotbuf), "NPU Chip %d", p->chip_id);
1611 	dt_add_property_string(np, "ibm,io-base-loc-code", slotbuf);
1612 }
1613 
npu_create_phb(struct dt_node * dn)1614 static void npu_create_phb(struct dt_node *dn)
1615 {
1616 	const struct dt_property *prop;
1617 	struct npu *p;
1618 	struct pci_slot *slot;
1619 	uint32_t links;
1620 	void *pmem;
1621 
1622 	/* Retrieve number of devices */
1623 	links = dt_prop_get_u32(dn, "ibm,links");
1624 	pmem = zalloc(sizeof(struct npu) + links * sizeof(struct npu_dev));
1625 	assert(pmem);
1626 
1627 	/* Populate PHB */
1628 	p = pmem;
1629 	p->index = dt_prop_get_u32(dn, "ibm,npu-index");
1630 	p->chip_id = dt_prop_get_u32(dn, "ibm,chip-id");
1631 	p->xscom_base = dt_prop_get_u32(dn, "ibm,xscom-base");
1632 	p->total_devices = links;
1633 
1634 	/* TODO: When hardware fences are implemented, detect them here */
1635 	p->fenced = false;
1636 
1637 	/* This is the AT base */
1638 	p->at_xscom = p->xscom_base + NPU_AT_SCOM_OFFSET;
1639 	p->at_regs = (void *)dt_get_address(dn, 0, NULL);
1640 
1641 	prop = dt_require_property(dn, "ibm,mmio-window", -1);
1642 	assert(prop->len >= (2 * sizeof(uint64_t)));
1643 	p->mm_base = ((const uint64_t *)prop->prop)[0];
1644 	p->mm_size = ((const uint64_t *)prop->prop)[1];
1645 
1646 	p->devices = pmem + sizeof(struct npu);
1647 
1648 	/* Interrupt */
1649         p->base_lsi = p8_chip_irq_block_base(p->chip_id, P8_IRQ_BLOCK_MISC) +
1650 		NPU_LSI_IRQ_MIN;
1651 
1652 	/* Generic PHB */
1653 	p->phb.dt_node = dn;
1654 	p->phb.ops = &npu_ops;
1655 	p->phb.phb_type = phb_type_pcie_v3;
1656 
1657 	/* Populate devices */
1658 	npu_create_devices(dn, p);
1659 
1660 	/* Populate extra properties */
1661 	npu_add_phb_properties(p);
1662 
1663 	/* Create PHB slot */
1664 	slot = npu_slot_create(&p->phb);
1665 	if (!slot)
1666 	{
1667 		/**
1668 		 * @fwts-label NPUCannotCreatePHBSlot
1669 		 * @fwts-advice Firmware probably ran out of memory creating
1670 		 * NPU slot. NVLink functionality could be broken.
1671 		 */
1672 		prlog(PR_ERR, "NPU: Cannot create PHB slot\n");
1673 	}
1674 
1675 	/* Register PHB */
1676 	pci_register_phb(&p->phb, OPAL_DYNAMIC_PHB_ID);
1677 
1678 	/* Initialize IODA cache */
1679 	npu_ioda_init(p);
1680 
1681 	/* Register interrupt source */
1682 	npu_register_irq(p);
1683 
1684 	/* Initialize hardware */
1685 	npu_hw_init(p);
1686 }
1687 
probe_npu(void)1688 void probe_npu(void)
1689 {
1690 	struct dt_node *np;
1691 
1692 	/* Scan NPU XSCOM nodes */
1693 	dt_for_each_compatible(dt_root, np, "ibm,power8-npu")
1694 		npu_probe_phb(np);
1695 
1696 	/* Scan newly created PHB nodes */
1697 	dt_for_each_compatible(dt_root, np, "ibm,power8-npu-pciex")
1698 		npu_create_phb(np);
1699 }
1700