1 // SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
2 /*
3  * PHB4: PCI Host Bridge 4, in POWER9
4  *
5  * Copyright 2013-2019 IBM Corp.
6  * Copyright 2018 Raptor Engineering, LLC
7  */
8 
9 /*
10  *
11  * FIXME:
12  *   More stuff for EEH support:
13  *      - PBCQ error reporting interrupt
14  *	- I2C-based power management (replacing SHPC)
15  *	- Directly detect fenced PHB through one dedicated HW reg
16  */
17 
18 /*
19  * This is a simplified view of the PHB4 reset and link training steps
20  *
21  * Step 1:
22  * - Check for hotplug status:
23  *  o PHB_PCIE_HOTPLUG_STATUS bit PHB_PCIE_HPSTAT_PRESENCE
24  *  o If not set -> Bail out (Slot is empty)
25  *
26  * Step 2:
27  * - Do complete PHB reset:
28  *   o PHB/ETU reset procedure
29  *
30  * Step 3:
31  * - Drive PERST active (skip if already asserted. ie. after cold reboot)
32  * - Wait 250ms (for cards to reset)
33  *   o powervm have used 250ms for a long time without any problems
34  *
35  * Step 4:
36  * - Drive PERST inactive
37  *
38  * Step 5:
39  * - Look for inband presence:
40  *   o From PERST we have two stages to get inband presence detected
41  *     1) Devices must enter Detect state within 20 ms of the end of
42  *          Fundamental Reset
43  *     2) Receiver detect pulse are every 12ms
44  *      - Hence minimum wait time 20 + 12 = 32ms
45  *   o Unfortunatey, we've seen cards take 440ms
46  *   o Hence we are conservative and poll here for 1000ms (> 440ms)
47  * - If no inband presence after 100ms -> Bail out (Slot is broken)
48  *   o PHB_PCIE_DLP_TRAIN_CTL bit PHB_PCIE_DLP_INBAND_PRESENCE
49  *
50  * Step 6:
51  * - Look for link training done:
52  *   o PHB_PCIE_DLP_TRAIN_CTL bit PHB_PCIE_DLP_TL_LINKACT
53  * - If not set after 2000ms, Retry (3 times) -> Goto Step 2
54  *   o phy lockup could link training failure, hence going back to a
55  *     complete PHB reset on retry
56  *   o not expect to happen very often
57  *
58  * Step 7:
59  * - Wait for 1 sec (before touching device config space):
60  * -  From PCIe spec:
61  *     Root Complex and/or system software must allow at least 1.0 s after
62  *     a Conventional Reset of a device, before it may determine that a
63  *     device which fails to return a Successful Completion status for a
64  *     valid Configuration Request is a broken device.
65  *
66  * Step 8:
67  * - Sanity check for fence and link still up:
68  *   o If fenced or link down, Retry (3 times) -> Goto Step 2
69  *   o This is not nessary but takes no time and can be useful
70  *   o Once we leave here, much harder to recover from errors
71  *
72  * Step 9:
73  * - Check for optimised link for directly attached devices:
74  *   o Wait for CRS (so we can read device config space)
75  *   o Check chip and device are in allowlist. if not, Goto Step 10
76  *   o If trained link speed is degraded, retry ->  Goto Step 2
77  *   o If trained link width is degraded, retry -> Goto Step 2
78  *   o If still degraded after 3 retries. Give up, Goto Step 10.
79  *
80  * Step 10:
81  *  - PHB good, start probing config space.
82  *    o core/pci.c: pci_reset_phb() -> pci_scan_phb()
83  */
84 
85 
86 #undef NO_ASB
87 #undef LOG_CFG
88 
89 #include <skiboot.h>
90 #include <io.h>
91 #include <timebase.h>
92 #include <pci.h>
93 #include <pci-cfg.h>
94 #include <pci-slot.h>
95 #include <vpd.h>
96 #include <interrupts.h>
97 #include <opal.h>
98 #include <cpu.h>
99 #include <device.h>
100 #include <ccan/str/str.h>
101 #include <ccan/array_size/array_size.h>
102 #include <xscom.h>
103 #include <affinity.h>
104 #include <phb4.h>
105 #include <phb4-regs.h>
106 #include <phb4-capp.h>
107 #include <capp.h>
108 #include <fsp.h>
109 #include <chip.h>
110 #include <chiptod.h>
111 #include <xive.h>
112 #include <xscom-p9-regs.h>
113 #include <phys-map.h>
114 #include <nvram.h>
115 
116 /* Enable this to disable error interrupts for debug purposes */
117 #undef DISABLE_ERR_INTS
118 
119 static void phb4_init_hw(struct phb4 *p);
120 
121 #define PHBDBG(p, fmt, a...)	prlog(PR_DEBUG, "PHB#%04x[%d:%d]: " fmt, \
122 				      (p)->phb.opal_id, (p)->chip_id, \
123 				      (p)->index,  ## a)
124 #define PHBINF(p, fmt, a...)	prlog(PR_INFO, "PHB#%04x[%d:%d]: " fmt, \
125 				      (p)->phb.opal_id, (p)->chip_id, \
126 				      (p)->index,  ## a)
127 #define PHBNOTICE(p, fmt, a...)	prlog(PR_NOTICE, "PHB#%04x[%d:%d]: " fmt, \
128 				      (p)->phb.opal_id, (p)->chip_id, \
129 				      (p)->index,  ## a)
130 #define PHBERR(p, fmt, a...)	prlog(PR_ERR, "PHB#%04x[%d:%d]: " fmt, \
131 				      (p)->phb.opal_id, (p)->chip_id, \
132 				      (p)->index,  ## a)
133 #ifdef LOG_CFG
134 #define PHBLOGCFG(p, fmt, a...)	PHBDBG(p, fmt, ## a)
135 #else
136 #define PHBLOGCFG(p, fmt, a...) do {} while (0)
137 #endif
138 
139 static bool pci_eeh_mmio;
140 static bool pci_retry_all;
141 static int rx_err_max = PHB4_RX_ERR_MAX;
142 
is_phb4(void)143 static inline bool is_phb4(void)
144 {
145 	return (proc_gen == proc_gen_p9);
146 }
147 
is_phb5(void)148 static inline bool is_phb5(void)
149 {
150 	return (proc_gen == proc_gen_p10);
151 }
152 
153 /* PQ offloading on the XIVE IC. */
phb_pq_disable(struct phb4 * p __unused)154 static inline bool phb_pq_disable(struct phb4 *p __unused)
155 {
156 	if (is_phb5())
157 		return xive2_cap_phb_pq_disable();
158 
159 	return false;
160 }
161 
162 /*
163  * Use the ESB page of the XIVE IC for event notification. Latency
164  * improvement.
165  */
phb_abt_mode(struct phb4 * p __unused)166 static inline bool phb_abt_mode(struct phb4 *p __unused)
167 {
168 	if (is_phb5())
169 		return xive2_cap_phb_abt();
170 
171 	return false;
172 }
173 
phb_can_store_eoi(struct phb4 * p)174 static inline bool phb_can_store_eoi(struct phb4 *p)
175 {
176 	if (is_phb5())
177 		/* PQ offloading is required for StoreEOI */
178 		return XIVE2_STORE_EOI_ENABLED && phb_pq_disable(p);
179 
180 	return XIVE_STORE_EOI_ENABLED;
181 }
182 
183 /* Note: The "ASB" name is historical, practically this means access via
184  * the XSCOM backdoor
185  */
phb4_read_reg_asb(struct phb4 * p,uint32_t offset)186 static inline uint64_t phb4_read_reg_asb(struct phb4 *p, uint32_t offset)
187 {
188 #ifdef NO_ASB
189 	return in_be64(p->regs + offset);
190 #else
191 	int64_t rc;
192 	uint64_t addr, val;
193 
194 	/* Address register: must use 4 bytes for built-in config space.
195 	 *
196 	 * This path isn't usable for outbound configuration space
197 	 */
198 	if (((offset & 0xfffffffc) == PHB_CONFIG_DATA) && (offset & 3)) {
199 		PHBERR(p, "XSCOM unaligned access to CONFIG_DATA unsupported\n");
200 		return -1ull;
201 	}
202 	addr = XETU_HV_IND_ADDR_VALID | offset;
203 	if ((offset >= 0x1000 && offset < 0x1800) || (offset == PHB_CONFIG_DATA))
204 		addr |= XETU_HV_IND_ADDR_4B;
205  	rc = xscom_write(p->chip_id, p->etu_xscom + XETU_HV_IND_ADDRESS, addr);
206 	if (rc != 0) {
207 		PHBERR(p, "XSCOM error addressing register 0x%x\n", offset);
208 		return -1ull;
209 	}
210  	rc = xscom_read(p->chip_id, p->etu_xscom + XETU_HV_IND_DATA, &val);
211 	if (rc != 0) {
212 		PHBERR(p, "XSCOM error reading register 0x%x\n", offset);
213 		return -1ull;
214 	}
215 	return val;
216 #endif
217 }
218 
phb4_write_reg_asb(struct phb4 * p,uint32_t offset,uint64_t val)219 static inline void phb4_write_reg_asb(struct phb4 *p,
220 				      uint32_t offset, uint64_t val)
221 {
222 #ifdef NO_ASB
223 	out_be64(p->regs + offset, val);
224 #else
225 	int64_t rc;
226 	uint64_t addr;
227 
228 	/* Address register: must use 4 bytes for built-in config space.
229 	 *
230 	 * This path isn't usable for outbound configuration space
231 	 */
232 	if (((offset & 0xfffffffc) == PHB_CONFIG_DATA) && (offset & 3)) {
233 		PHBERR(p, "XSCOM access to CONFIG_DATA unsupported\n");
234 		return;
235 	}
236 	addr = XETU_HV_IND_ADDR_VALID | offset;
237 	if ((offset >= 0x1000 && offset < 0x1800) || (offset == PHB_CONFIG_DATA))
238 		addr |= XETU_HV_IND_ADDR_4B;
239  	rc = xscom_write(p->chip_id, p->etu_xscom + XETU_HV_IND_ADDRESS, addr);
240 	if (rc != 0) {
241 		PHBERR(p, "XSCOM error addressing register 0x%x\n", offset);
242 		return;
243 	}
244  	rc = xscom_write(p->chip_id, p->etu_xscom + XETU_HV_IND_DATA, val);
245 	if (rc != 0) {
246 		PHBERR(p, "XSCOM error writing register 0x%x\n", offset);
247 		return;
248 	}
249 #endif
250 }
251 
phb4_read_reg(struct phb4 * p,uint32_t offset)252 static uint64_t phb4_read_reg(struct phb4 *p, uint32_t offset)
253 {
254 	/* No register accesses are permitted while in reset */
255 	if (p->flags & PHB4_ETU_IN_RESET)
256 		return -1ull;
257 
258 	if (p->flags & PHB4_CFG_USE_ASB)
259 		return phb4_read_reg_asb(p, offset);
260 	else
261 		return in_be64(p->regs + offset);
262 }
263 
phb4_write_reg(struct phb4 * p,uint32_t offset,uint64_t val)264 static void phb4_write_reg(struct phb4 *p, uint32_t offset, uint64_t val)
265 {
266 	/* No register accesses are permitted while in reset */
267 	if (p->flags & PHB4_ETU_IN_RESET)
268 		return;
269 
270 	if (p->flags & PHB4_CFG_USE_ASB)
271 		phb4_write_reg_asb(p, offset, val);
272 	else
273 		return out_be64(p->regs + offset, val);
274 }
275 
276 /* Helper to select an IODA table entry */
phb4_ioda_sel(struct phb4 * p,uint32_t table,uint32_t addr,bool autoinc)277 static inline void phb4_ioda_sel(struct phb4 *p, uint32_t table,
278 				 uint32_t addr, bool autoinc)
279 {
280 	phb4_write_reg(p, PHB_IODA_ADDR,
281 		       (autoinc ? PHB_IODA_AD_AUTOINC : 0)	|
282 		       SETFIELD(PHB_IODA_AD_TSEL, 0ul, table)	|
283 		       SETFIELD(PHB_IODA_AD_TADR, 0ul, addr));
284 }
285 
286 /*
287  * Configuration space access
288  *
289  * The PHB lock is assumed to be already held
290  */
phb4_pcicfg_check(struct phb4 * p,uint32_t bdfn,uint32_t offset,uint32_t size,uint16_t * pe)291 static int64_t phb4_pcicfg_check(struct phb4 *p, uint32_t bdfn,
292 				 uint32_t offset, uint32_t size,
293 				 uint16_t *pe)
294 {
295 	uint32_t sm = size - 1;
296 
297 	if (offset > 0xfff || bdfn > 0xffff)
298 		return OPAL_PARAMETER;
299 	if (offset & sm)
300 		return OPAL_PARAMETER;
301 
302 	/* The root bus only has a device at 0 and we get into an
303 	 * error state if we try to probe beyond that, so let's
304 	 * avoid that and just return an error to Linux
305 	 */
306 	if (PCI_BUS_NUM(bdfn) == 0 && (bdfn & 0xff))
307 		return OPAL_HARDWARE;
308 
309 	/* Check PHB state */
310 	if (p->broken)
311 		return OPAL_HARDWARE;
312 
313 	/* Fetch the PE# from cache */
314 	*pe = be16_to_cpu(p->tbl_rtt[bdfn]);
315 
316 	return OPAL_SUCCESS;
317 }
318 
phb4_rc_read(struct phb4 * p,uint32_t offset,uint8_t sz,void * data,bool use_asb)319 static int64_t phb4_rc_read(struct phb4 *p, uint32_t offset, uint8_t sz,
320 			    void *data, bool use_asb)
321 {
322 	uint32_t reg = offset & ~3;
323 	uint32_t oval;
324 
325 	/* Some registers are handled locally */
326 	switch (reg) {
327 		/* Bridge base/limit registers are cached here as HW
328 		 * doesn't implement them (it hard codes values that
329 		 * will confuse a proper PCI implementation).
330 		 */
331 	case PCI_CFG_MEM_BASE:		/* Includes PCI_CFG_MEM_LIMIT */
332 		oval = p->rc_cache[(reg - 0x20) >> 2] & 0xfff0fff0;
333 		break;
334 	case PCI_CFG_PREF_MEM_BASE:	/* Includes PCI_CFG_PREF_MEM_LIMIT */
335 		oval = p->rc_cache[(reg - 0x20) >> 2] & 0xfff0fff0;
336 		oval |= 0x00010001;
337 		break;
338 	case PCI_CFG_IO_BASE_U16:	/* Includes PCI_CFG_IO_LIMIT_U16 */
339 		oval = 0;
340 		break;
341 	case PCI_CFG_PREF_MEM_BASE_U32:
342 	case PCI_CFG_PREF_MEM_LIMIT_U32:
343 		oval = p->rc_cache[(reg - 0x20) >> 2];
344 		break;
345 	default:
346 		oval = 0xffffffff; /* default if offset too big */
347 		if (reg < PHB_RC_CONFIG_SIZE) {
348 			if (use_asb)
349 				oval = bswap_32(phb4_read_reg_asb(p, PHB_RC_CONFIG_BASE
350 								  + reg));
351 			else
352 				oval = in_le32(p->regs + PHB_RC_CONFIG_BASE + reg);
353 		}
354 	}
355 
356 	/* Apply any post-read fixups */
357 	switch (reg) {
358 	case PCI_CFG_IO_BASE:
359 		oval |= 0x01f1; /* Set IO base < limit to disable the window */
360 		break;
361 	}
362 
363 	switch (sz) {
364 	case 1:
365 		offset &= 3;
366 		*((uint8_t *)data) = (oval >> (offset << 3)) & 0xff;
367 		PHBLOGCFG(p, "000 CFG08 Rd %02x=%02x\n",
368 			  offset, *((uint8_t *)data));
369 		break;
370 	case 2:
371 		offset &= 2;
372 		*((uint16_t *)data) = (oval >> (offset << 3)) & 0xffff;
373 		PHBLOGCFG(p, "000 CFG16 Rd %02x=%04x\n",
374 			  offset, *((uint16_t *)data));
375 		break;
376 	case 4:
377 		*((uint32_t *)data) = oval;
378 		PHBLOGCFG(p, "000 CFG32 Rd %02x=%08x\n",
379 			  offset, *((uint32_t *)data));
380 		break;
381 	default:
382 		assert(false);
383 	}
384 	return OPAL_SUCCESS;
385 }
386 
phb4_rc_write(struct phb4 * p,uint32_t offset,uint8_t sz,uint32_t val,bool use_asb)387 static int64_t phb4_rc_write(struct phb4 *p, uint32_t offset, uint8_t sz,
388 			     uint32_t val, bool use_asb)
389 {
390 	uint32_t reg = offset & ~3;
391 	uint32_t old, mask, shift, oldold;
392 	int64_t rc;
393 
394 	if (reg > PHB_RC_CONFIG_SIZE)
395 		return OPAL_SUCCESS;
396 
397 	/* If size isn't 4-bytes, do a RMW cycle */
398 	if (sz < 4) {
399 		rc = phb4_rc_read(p, reg, 4, &old, use_asb);
400 		if (rc != OPAL_SUCCESS)
401 			return rc;
402 
403 		/*
404 		 * Since we have to Read-Modify-Write here, we need to filter
405 		 * out registers that have write-1-to-clear bits to prevent
406 		 * clearing stuff we shouldn't be.  So for any register this
407 		 * applies to, mask out those bits.
408 		 */
409 		oldold = old;
410 		switch(reg) {
411 		case 0x1C: /* Secondary status */
412 			old &= 0x00ffffff; /* mask out 24-31 */
413 			break;
414 		case 0x50: /* EC - Device status */
415 			old &= 0xfff0ffff; /* mask out 16-19 */
416 			break;
417 		case 0x58: /* EC - Link status */
418 			old &= 0x3fffffff; /* mask out 30-31 */
419 			break;
420 		case 0x78: /* EC - Link status 2 */
421 			old &= 0xf000ffff; /* mask out 16-27 */
422 			break;
423 		/* These registers *only* have write-1-to-clear bits */
424 		case 0x104: /* AER - Uncorr. error status */
425 		case 0x110: /* AER - Corr. error status */
426 		case 0x130: /* AER - Root error status */
427 		case 0x180: /* P16 - status */
428 		case 0x184: /* P16 - LDPM status */
429 		case 0x188: /* P16 - FRDPM status */
430 		case 0x18C: /* P16 - SRDPM status */
431 			old &= 0x00000000;
432 			break;
433 		}
434 
435 		if (old != oldold) {
436 			PHBLOGCFG(p, "Rewrote %x to %x for reg %x for W1C\n",
437 				  oldold, old, reg);
438 		}
439 
440 		if (sz == 1) {
441 			shift = (offset & 3) << 3;
442 			mask = 0xff << shift;
443 			val = (old & ~mask) | ((val & 0xff) << shift);
444 		} else {
445 			shift = (offset & 2) << 3;
446 			mask = 0xffff << shift;
447 			val = (old & ~mask) | ((val & 0xffff) << shift);
448 		}
449 	}
450 
451 	/* Some registers are handled locally */
452 	switch (reg) {
453 		/* See comment in phb4_rc_read() */
454 	case PCI_CFG_MEM_BASE:		/* Includes PCI_CFG_MEM_LIMIT */
455 	case PCI_CFG_PREF_MEM_BASE:	/* Includes PCI_CFG_PREF_MEM_LIMIT */
456 	case PCI_CFG_PREF_MEM_BASE_U32:
457 	case PCI_CFG_PREF_MEM_LIMIT_U32:
458 		p->rc_cache[(reg - 0x20) >> 2] = val;
459 		break;
460 	case PCI_CFG_IO_BASE_U16:	/* Includes PCI_CFG_IO_LIMIT_U16 */
461 		break;
462 	default:
463 		/* Workaround PHB config space enable */
464 		PHBLOGCFG(p, "000 CFG%02d Wr %02x=%08x\n", 8 * sz, reg, val);
465 		if (use_asb)
466 			phb4_write_reg_asb(p, PHB_RC_CONFIG_BASE + reg, val);
467 		else
468 			out_le32(p->regs + PHB_RC_CONFIG_BASE + reg, val);
469 	}
470 	return OPAL_SUCCESS;
471 }
472 
phb4_pcicfg_read(struct phb4 * p,uint32_t bdfn,uint32_t offset,uint32_t size,void * data)473 static int64_t phb4_pcicfg_read(struct phb4 *p, uint32_t bdfn,
474 				uint32_t offset, uint32_t size,
475 				void *data)
476 {
477 	uint64_t addr, val64;
478 	int64_t rc;
479 	uint16_t pe;
480 	bool use_asb = false;
481 
482 	rc = phb4_pcicfg_check(p, bdfn, offset, size, &pe);
483 	if (rc)
484 		return rc;
485 
486 	if (p->flags & PHB4_AIB_FENCED) {
487 		if (!(p->flags & PHB4_CFG_USE_ASB))
488 			return OPAL_HARDWARE;
489 		if (bdfn != 0)
490 			return OPAL_HARDWARE;
491 		use_asb = true;
492 	} else if ((p->flags & PHB4_CFG_BLOCKED) && bdfn != 0) {
493 		return OPAL_HARDWARE;
494 	}
495 
496 	/* Handle per-device filters */
497 	rc = pci_handle_cfg_filters(&p->phb, bdfn, offset, size,
498 				    (uint32_t *)data, false);
499 	if (rc != OPAL_PARTIAL)
500 		return rc;
501 
502 	/* Handle root complex MMIO based config space */
503 	if (bdfn == 0)
504 		return phb4_rc_read(p, offset, size, data, use_asb);
505 
506 	addr = PHB_CA_ENABLE;
507 	addr = SETFIELD(PHB_CA_BDFN, addr, bdfn);
508 	addr = SETFIELD(PHB_CA_REG, addr, offset & ~3u);
509 	addr = SETFIELD(PHB_CA_PE, addr, pe);
510 	if (use_asb) {
511 		phb4_write_reg_asb(p, PHB_CONFIG_ADDRESS, addr);
512 		sync();
513 		val64 = bswap_64(phb4_read_reg_asb(p, PHB_CONFIG_DATA));
514 		switch(size) {
515 		case 1:
516 			*((uint8_t *)data) = val64 >> (8 * (offset & 3));
517 			break;
518 		case 2:
519 			*((uint16_t *)data) = val64 >> (8 * (offset & 2));
520 			break;
521 		case 4:
522 			*((uint32_t *)data) = val64;
523 			break;
524 		default:
525 			return OPAL_PARAMETER;
526 		}
527 	} else {
528 		out_be64(p->regs + PHB_CONFIG_ADDRESS, addr);
529 		switch(size) {
530 		case 1:
531 			*((uint8_t *)data) =
532 				in_8(p->regs + PHB_CONFIG_DATA + (offset & 3));
533 			PHBLOGCFG(p, "%03x CFG08 Rd %02x=%02x\n",
534 				  bdfn, offset, *((uint8_t *)data));
535 			break;
536 		case 2:
537 			*((uint16_t *)data) =
538 				in_le16(p->regs + PHB_CONFIG_DATA + (offset & 2));
539 			PHBLOGCFG(p, "%03x CFG16 Rd %02x=%04x\n",
540 				  bdfn, offset, *((uint16_t *)data));
541 			break;
542 		case 4:
543 			*((uint32_t *)data) = in_le32(p->regs + PHB_CONFIG_DATA);
544 			PHBLOGCFG(p, "%03x CFG32 Rd %02x=%08x\n",
545 				  bdfn, offset, *((uint32_t *)data));
546 			break;
547 		default:
548 			return OPAL_PARAMETER;
549 		}
550 	}
551 	return OPAL_SUCCESS;
552 }
553 
554 
555 #define PHB4_PCI_CFG_READ(size, type)					\
556 static int64_t phb4_pcicfg_read##size(struct phb *phb, uint32_t bdfn,	\
557 				      uint32_t offset, type *data)	\
558 {									\
559 	struct phb4 *p = phb_to_phb4(phb);				\
560 									\
561 	/* Initialize data in case of error */				\
562 	*data = (type)0xffffffff;					\
563 	return phb4_pcicfg_read(p, bdfn, offset, sizeof(type), data);	\
564 }
565 
phb4_pcicfg_write(struct phb4 * p,uint32_t bdfn,uint32_t offset,uint32_t size,uint32_t data)566 static int64_t phb4_pcicfg_write(struct phb4 *p, uint32_t bdfn,
567 				 uint32_t offset, uint32_t size,
568 				 uint32_t data)
569 {
570 	uint64_t addr;
571 	int64_t rc;
572 	uint16_t pe;
573 	bool use_asb = false;
574 
575 	rc = phb4_pcicfg_check(p, bdfn, offset, size, &pe);
576 	if (rc)
577 		return rc;
578 
579 	if (p->flags & PHB4_AIB_FENCED) {
580 		if (!(p->flags & PHB4_CFG_USE_ASB))
581 			return OPAL_HARDWARE;
582 		if (bdfn != 0)
583 			return OPAL_HARDWARE;
584 		use_asb = true;
585 	} else if ((p->flags & PHB4_CFG_BLOCKED) && bdfn != 0) {
586 		return OPAL_HARDWARE;
587 	}
588 
589 	/* Handle per-device filters */
590 	rc = pci_handle_cfg_filters(&p->phb, bdfn, offset, size,
591 				    (uint32_t *)&data, true);
592 	if (rc != OPAL_PARTIAL)
593 		return rc;
594 
595 	/* Handle root complex MMIO based config space */
596 	if (bdfn == 0)
597 		return phb4_rc_write(p, offset, size, data, use_asb);
598 
599 	addr = PHB_CA_ENABLE;
600 	addr = SETFIELD(PHB_CA_BDFN, addr, bdfn);
601 	addr = SETFIELD(PHB_CA_REG, addr, offset & ~3u);
602 	addr = SETFIELD(PHB_CA_PE, addr, pe);
603 	if (use_asb) {
604 		/* We don't support ASB config space writes */
605 		return OPAL_UNSUPPORTED;
606 	} else {
607 		out_be64(p->regs + PHB_CONFIG_ADDRESS, addr);
608 		switch(size) {
609 		case 1:
610 			out_8(p->regs + PHB_CONFIG_DATA + (offset & 3), data);
611 			break;
612 		case 2:
613 			out_le16(p->regs + PHB_CONFIG_DATA + (offset & 2), data);
614 			break;
615 		case 4:
616 			out_le32(p->regs + PHB_CONFIG_DATA, data);
617 			break;
618 		default:
619 			return OPAL_PARAMETER;
620 		}
621 	}
622 	PHBLOGCFG(p, "%03x CFG%d Wr %02x=%08x\n", bdfn, 8 * size, offset, data);
623 	return OPAL_SUCCESS;
624 }
625 
626 #define PHB4_PCI_CFG_WRITE(size, type)					\
627 static int64_t phb4_pcicfg_write##size(struct phb *phb, uint32_t bdfn,	\
628 				       uint32_t offset, type data)	\
629 {									\
630 	struct phb4 *p = phb_to_phb4(phb);				\
631 									\
632 	return phb4_pcicfg_write(p, bdfn, offset, sizeof(type), data);	\
633 }
634 
635 PHB4_PCI_CFG_READ(8, u8)
636 PHB4_PCI_CFG_READ(16, u16)
637 PHB4_PCI_CFG_READ(32, u32)
638 PHB4_PCI_CFG_WRITE(8, u8)
639 PHB4_PCI_CFG_WRITE(16, u16)
640 PHB4_PCI_CFG_WRITE(32, u32)
641 
phb4_get_reserved_pe_number(struct phb * phb)642 static int64_t phb4_get_reserved_pe_number(struct phb *phb)
643 {
644 	struct phb4 *p = phb_to_phb4(phb);
645 
646 	return PHB4_RESERVED_PE_NUM(p);
647 }
648 
649 
phb4_root_port_init(struct phb * phb,struct pci_device * dev,int ecap,int aercap)650 static void phb4_root_port_init(struct phb *phb, struct pci_device *dev,
651 				int ecap, int aercap)
652 {
653 	struct phb4 *p = phb_to_phb4(phb);
654 	struct pci_slot *slot = dev->slot;
655 	uint16_t bdfn = dev->bdfn;
656 	uint16_t val16;
657 	uint32_t val32;
658 
659 	/*
660 	 * Use the PHB's callback so that UTL events will be masked or
661 	 * unmasked when the link is down or up.
662 	 */
663 	if (dev->slot && dev->slot->ops.prepare_link_change &&
664 	    phb->slot && phb->slot->ops.prepare_link_change)
665 		dev->slot->ops.prepare_link_change =
666 			phb->slot->ops.prepare_link_change;
667 
668 	// FIXME: check recommended init values for phb4
669 
670 	/*
671 	 * Enable the bridge slot capability in the root port's config
672 	 * space. This should probably be done *before* we start
673 	 * scanning config space, but we need a pci_device struct to
674 	 * exist before we do a slot lookup so *faaaaaaaaaaaaaart*
675 	 */
676 	if (slot && slot->pluggable && slot->power_limit) {
677 		uint64_t val;
678 
679 		val = in_be64(p->regs + PHB_PCIE_SCR);
680 		val |= PHB_PCIE_SCR_SLOT_CAP;
681 		out_be64(p->regs + PHB_PCIE_SCR, val);
682 
683 		/* update the cached slotcap */
684 		pci_cfg_read32(phb, bdfn, ecap + PCICAP_EXP_SLOTCAP,
685 				&slot->slot_cap);
686 	}
687 
688 	/* Enable SERR and parity checking */
689 	pci_cfg_read16(phb, bdfn, PCI_CFG_CMD, &val16);
690 	val16 |= (PCI_CFG_CMD_SERR_EN | PCI_CFG_CMD_PERR_RESP |
691 		  PCI_CFG_CMD_MEM_EN);
692 	pci_cfg_write16(phb, bdfn, PCI_CFG_CMD, val16);
693 
694 	/* Enable reporting various errors */
695 	if (!ecap) return;
696 	pci_cfg_read16(phb, bdfn, ecap + PCICAP_EXP_DEVCTL, &val16);
697 	val16 |= (PCICAP_EXP_DEVCTL_CE_REPORT |
698 		  PCICAP_EXP_DEVCTL_NFE_REPORT |
699 		  PCICAP_EXP_DEVCTL_FE_REPORT |
700 		  PCICAP_EXP_DEVCTL_UR_REPORT);
701 	pci_cfg_write16(phb, bdfn, ecap + PCICAP_EXP_DEVCTL, val16);
702 
703 	if (!aercap) return;
704 
705 	/* Mask various unrecoverable errors */
706 	pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_UE_MASK, &val32);
707 	val32 |= (PCIECAP_AER_UE_MASK_POISON_TLP |
708 		  PCIECAP_AER_UE_MASK_COMPL_TIMEOUT |
709 		  PCIECAP_AER_UE_MASK_COMPL_ABORT |
710 		  PCIECAP_AER_UE_MASK_ECRC);
711 	pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_UE_MASK, val32);
712 
713 	/* Report various unrecoverable errors as fatal errors */
714 	pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_UE_SEVERITY, &val32);
715 	val32 |= (PCIECAP_AER_UE_SEVERITY_DLLP |
716 		  PCIECAP_AER_UE_SEVERITY_SURPRISE_DOWN |
717 		  PCIECAP_AER_UE_SEVERITY_FLOW_CTL_PROT |
718 		  PCIECAP_AER_UE_SEVERITY_UNEXP_COMPL |
719 		  PCIECAP_AER_UE_SEVERITY_RECV_OVFLOW |
720 		  PCIECAP_AER_UE_SEVERITY_MALFORMED_TLP);
721 	pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_UE_SEVERITY, val32);
722 
723 	/* Mask various recoverable errors */
724 	pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_CE_MASK, &val32);
725 	val32 |= PCIECAP_AER_CE_MASK_ADV_NONFATAL;
726 	pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_CE_MASK, val32);
727 
728 	/* Enable ECRC check */
729 	pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, &val32);
730 	val32 |= (PCIECAP_AER_CAPCTL_ECRCG_EN |
731 		  PCIECAP_AER_CAPCTL_ECRCC_EN);
732 	pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, val32);
733 
734 	/* Enable all error reporting */
735 	pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_RERR_CMD, &val32);
736 	val32 |= (PCIECAP_AER_RERR_CMD_FE |
737 		  PCIECAP_AER_RERR_CMD_NFE |
738 		  PCIECAP_AER_RERR_CMD_CE);
739 	pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_RERR_CMD, val32);
740 }
741 
phb4_switch_port_init(struct phb * phb,struct pci_device * dev,int ecap,int aercap)742 static void phb4_switch_port_init(struct phb *phb,
743 				  struct pci_device *dev,
744 				  int ecap, int aercap)
745 {
746 	uint16_t bdfn = dev->bdfn;
747 	uint16_t val16;
748 	uint32_t val32;
749 
750 	// FIXME: update AER settings for phb4
751 
752 	/* Enable SERR and parity checking and disable INTx */
753 	pci_cfg_read16(phb, bdfn, PCI_CFG_CMD, &val16);
754 	val16 |= (PCI_CFG_CMD_PERR_RESP |
755 		  PCI_CFG_CMD_SERR_EN |
756 		  PCI_CFG_CMD_INTx_DIS);
757 	pci_cfg_write16(phb, bdfn, PCI_CFG_CMD, val16);
758 
759 	/* Disable partity error and enable system error */
760 	pci_cfg_read16(phb, bdfn, PCI_CFG_BRCTL, &val16);
761 	val16 &= ~PCI_CFG_BRCTL_PERR_RESP_EN;
762 	val16 |= PCI_CFG_BRCTL_SERR_EN;
763 	pci_cfg_write16(phb, bdfn, PCI_CFG_BRCTL, val16);
764 
765 	/* Enable reporting various errors */
766 	if (!ecap) return;
767 	pci_cfg_read16(phb, bdfn, ecap + PCICAP_EXP_DEVCTL, &val16);
768 	val16 |= (PCICAP_EXP_DEVCTL_CE_REPORT |
769 		  PCICAP_EXP_DEVCTL_NFE_REPORT |
770 		  PCICAP_EXP_DEVCTL_FE_REPORT);
771 	/* HW279570 - Disable reporting of correctable errors */
772 	val16 &= ~PCICAP_EXP_DEVCTL_CE_REPORT;
773 	pci_cfg_write16(phb, bdfn, ecap + PCICAP_EXP_DEVCTL, val16);
774 
775 	/* Unmask all unrecoverable errors */
776 	if (!aercap) return;
777 	pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_UE_MASK, 0x0);
778 
779 	/* Severity of unrecoverable errors */
780 	if (dev->dev_type == PCIE_TYPE_SWITCH_UPPORT)
781 		val32 = (PCIECAP_AER_UE_SEVERITY_DLLP |
782 			 PCIECAP_AER_UE_SEVERITY_SURPRISE_DOWN |
783 			 PCIECAP_AER_UE_SEVERITY_FLOW_CTL_PROT |
784 			 PCIECAP_AER_UE_SEVERITY_RECV_OVFLOW |
785 			 PCIECAP_AER_UE_SEVERITY_MALFORMED_TLP |
786 			 PCIECAP_AER_UE_SEVERITY_INTERNAL);
787 	else
788 		val32 = (PCIECAP_AER_UE_SEVERITY_FLOW_CTL_PROT |
789 			 PCIECAP_AER_UE_SEVERITY_INTERNAL);
790 	pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_UE_SEVERITY, val32);
791 
792 	/*
793 	 * Mask various correctable errors
794 	 */
795 	val32 = PCIECAP_AER_CE_MASK_ADV_NONFATAL;
796 	pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_CE_MASK, val32);
797 
798 	/* Enable ECRC generation and disable ECRC check */
799 	pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, &val32);
800 	val32 |= PCIECAP_AER_CAPCTL_ECRCG_EN;
801 	val32 &= ~PCIECAP_AER_CAPCTL_ECRCC_EN;
802 	pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, val32);
803 }
804 
phb4_endpoint_init(struct phb * phb,struct pci_device * dev,int ecap,int aercap)805 static void phb4_endpoint_init(struct phb *phb,
806 			       struct pci_device *dev,
807 			       int ecap, int aercap)
808 {
809 	uint16_t bdfn = dev->bdfn;
810 	uint16_t val16;
811 	uint32_t val32;
812 
813 	/* Enable SERR and parity checking */
814 	pci_cfg_read16(phb, bdfn, PCI_CFG_CMD, &val16);
815 	val16 |= (PCI_CFG_CMD_PERR_RESP |
816 		  PCI_CFG_CMD_SERR_EN);
817 	pci_cfg_write16(phb, bdfn, PCI_CFG_CMD, val16);
818 
819 	/* Enable reporting various errors */
820 	if (!ecap) return;
821 	pci_cfg_read16(phb, bdfn, ecap + PCICAP_EXP_DEVCTL, &val16);
822 	val16 &= ~PCICAP_EXP_DEVCTL_CE_REPORT;
823 	val16 |= (PCICAP_EXP_DEVCTL_NFE_REPORT |
824 		  PCICAP_EXP_DEVCTL_FE_REPORT |
825 		  PCICAP_EXP_DEVCTL_UR_REPORT);
826 	pci_cfg_write16(phb, bdfn, ecap + PCICAP_EXP_DEVCTL, val16);
827 
828 	/* Enable ECRC generation and check */
829 	if (!aercap)
830 		return;
831 
832 	pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, &val32);
833 	val32 |= (PCIECAP_AER_CAPCTL_ECRCG_EN |
834 		  PCIECAP_AER_CAPCTL_ECRCC_EN);
835 	pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, val32);
836 }
837 
phb4_pcicfg_no_dstate(void * dev __unused,struct pci_cfg_reg_filter * pcrf,uint32_t offset,uint32_t len __unused,uint32_t * data __unused,bool write)838 static int64_t phb4_pcicfg_no_dstate(void *dev __unused,
839 				     struct pci_cfg_reg_filter *pcrf,
840 				     uint32_t offset, uint32_t len __unused,
841 				     uint32_t *data __unused,  bool write)
842 {
843 	uint32_t loff = offset - pcrf->start;
844 
845 	/* Disable D-state change on children of the PHB. For now we
846 	 * simply block all writes to the PM control/status
847 	 */
848 	if (write && loff >= 4 && loff < 6)
849 		return OPAL_SUCCESS;
850 
851 	return OPAL_PARTIAL;
852 }
853 
phb4_pec2_dma_engine_realloc(struct phb4 * p)854 void phb4_pec2_dma_engine_realloc(struct phb4 *p)
855 {
856 	uint64_t reg;
857 
858 	/*
859 	 * Allocate 16 extra dma read engines to stack 0, to boost dma
860 	 * performance for devices on stack 0 of PEC2, i.e PHB3.
861 	 * It comes at a price of reduced read engine allocation for
862 	 * devices on stack 1 and 2. The engine allocation becomes
863 	 * 48/8/8 instead of the default 32/16/16.
864 	 *
865 	 * The reallocation magic value should be 0xffff0000ff008000,
866 	 * but per the PCI designers, dma engine 32 (bit 0) has a
867 	 * quirk, and 0x7fff80007F008000 has the same effect (engine
868 	 * 32 goes to PHB4).
869 	 */
870 	if (p->index != 3) /* shared slot on PEC2 */
871 		return;
872 
873 	PHBINF(p, "Allocating an extra 16 dma read engines on PEC2 stack0\n");
874 	reg = 0x7fff80007F008000ULL;
875 	xscom_write(p->chip_id,
876 		    p->pci_xscom + XPEC_PCI_PRDSTKOVR, reg);
877 	xscom_write(p->chip_id,
878 		    p->pe_xscom  + XPEC_NEST_READ_STACK_OVERRIDE, reg);
879 }
880 
phb4_check_device_quirks(struct pci_device * dev)881 static void phb4_check_device_quirks(struct pci_device *dev)
882 {
883 	/* Some special adapter tweaks for devices directly under the PHB */
884 	if (dev->primary_bus != 1)
885 		return;
886 
887 	/* PM quirk */
888 	if (!pci_has_cap(dev, PCI_CFG_CAP_ID_PM, false))
889 		return;
890 
891 	pci_add_cfg_reg_filter(dev,
892 			       pci_cap(dev, PCI_CFG_CAP_ID_PM, false), 8,
893 			       PCI_REG_FLAG_WRITE,
894 			       phb4_pcicfg_no_dstate);
895 }
896 
phb4_device_init(struct phb * phb,struct pci_device * dev,void * data __unused)897 static int phb4_device_init(struct phb *phb, struct pci_device *dev,
898 			    void *data __unused)
899 {
900 	int ecap, aercap;
901 
902 	/* Setup special device quirks */
903 	phb4_check_device_quirks(dev);
904 
905 	/* Common initialization for the device */
906 	pci_device_init(phb, dev);
907 
908 	ecap = pci_cap(dev, PCI_CFG_CAP_ID_EXP, false);
909 	aercap = pci_cap(dev, PCIECAP_ID_AER, true);
910 	if (dev->dev_type == PCIE_TYPE_ROOT_PORT)
911 		phb4_root_port_init(phb, dev, ecap, aercap);
912 	else if (dev->dev_type == PCIE_TYPE_SWITCH_UPPORT ||
913 		 dev->dev_type == PCIE_TYPE_SWITCH_DNPORT)
914 		phb4_switch_port_init(phb, dev, ecap, aercap);
915 	else
916 		phb4_endpoint_init(phb, dev, ecap, aercap);
917 
918 	return 0;
919 }
920 
phb4_pci_reinit(struct phb * phb,uint64_t scope,uint64_t data)921 static int64_t phb4_pci_reinit(struct phb *phb, uint64_t scope, uint64_t data)
922 {
923 	struct pci_device *pd;
924 	uint16_t bdfn = data;
925 	int ret;
926 
927 	if (scope != OPAL_REINIT_PCI_DEV)
928 		return OPAL_PARAMETER;
929 
930 	pd = pci_find_dev(phb, bdfn);
931 	if (!pd)
932 		return OPAL_PARAMETER;
933 
934 	ret = phb4_device_init(phb, pd, NULL);
935 	if (ret)
936 		return OPAL_HARDWARE;
937 
938 	return OPAL_SUCCESS;
939 }
940 
941 /* Default value for MBT0, see comments in init_ioda_cache() */
phb4_default_mbt0(struct phb4 * p,unsigned int bar_idx)942 static uint64_t phb4_default_mbt0(struct phb4 *p, unsigned int bar_idx)
943 {
944 	uint64_t mbt0;
945 
946 	switch (p->mbt_size - bar_idx - 1) {
947 	case 0:
948 		mbt0 = SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_MDT);
949 		mbt0 = SETFIELD(IODA3_MBT0_MDT_COLUMN, mbt0, 3);
950 		break;
951 	case 1:
952 		mbt0 = SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_MDT);
953 		mbt0 = SETFIELD(IODA3_MBT0_MDT_COLUMN, mbt0, 2);
954 		break;
955 	case 2:
956 		mbt0 = SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_MDT);
957 		mbt0 = SETFIELD(IODA3_MBT0_MDT_COLUMN, mbt0, 1);
958 		break;
959 	default:
960 		mbt0 = SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_PE_SEG);
961 	}
962 	return mbt0;
963 }
964 
965 /*
966  * Clear the saved (cached) IODA state.
967  *
968  * The caches here are used to save the configuration of the IODA tables
969  * done by the OS. When the PHB is reset it loses all of its internal state
970  * so we need to keep a copy to restore from. This function re-initialises
971  * the saved state to sane defaults.
972  */
phb4_init_ioda_cache(struct phb4 * p)973 static void phb4_init_ioda_cache(struct phb4 *p)
974 {
975 	uint32_t i;
976 
977 	/*
978 	 * The RTT entries (RTE) are supposed to be initialised to
979 	 * 0xFF which indicates an invalid PE# for that RTT index
980 	 * (the bdfn). However, we set them to 0x00 since Linux
981 	 * needs to find the devices first by scanning config space
982 	 * and this occurs before PEs have been assigned.
983 	 */
984 	for (i = 0; i < RTT_TABLE_ENTRIES; i++)
985 		p->tbl_rtt[i] = cpu_to_be16(PHB4_RESERVED_PE_NUM(p));
986 	memset(p->tbl_peltv, 0x0, p->tbl_peltv_size);
987 	memset(p->tve_cache, 0x0, sizeof(p->tve_cache));
988 
989 	/* XXX Should we mask them ? */
990 	memset(p->mist_cache, 0x0, sizeof(p->mist_cache));
991 
992 	/* Configure MBT entries 1...N */
993 
994 	/* Column 0 is left 0 and will be used fo M32 and configured
995 	 * by the OS. We use MDT column 1..3 for the last 3 BARs, thus
996 	 * allowing Linux to remap those, and setup all the other ones
997 	 * for now in mode 00 (segment# == PE#). By default those
998 	 * columns are set to map the same way.
999 	 */
1000 	for (i = 0; i < p->max_num_pes; i++) {
1001 		p->mdt_cache[i]  = SETFIELD(IODA3_MDT_PE_B, 0ull, i);
1002 		p->mdt_cache[i] |= SETFIELD(IODA3_MDT_PE_C, 0ull, i);
1003 		p->mdt_cache[i] |= SETFIELD(IODA3_MDT_PE_D, 0ull, i);
1004 	}
1005 
1006 	/* Initialize MBT entries for BARs 1...N */
1007 	for (i = 1; i < p->mbt_size; i++) {
1008 		p->mbt_cache[i][0] = phb4_default_mbt0(p, i);
1009 		p->mbt_cache[i][1] = 0;
1010 	}
1011 
1012 	/* Initialize M32 bar using MBT entry 0, MDT colunm A */
1013 	p->mbt_cache[0][0] = SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_MDT);
1014 	p->mbt_cache[0][0] |= SETFIELD(IODA3_MBT0_MDT_COLUMN, 0ull, 0);
1015 	p->mbt_cache[0][0] |= IODA3_MBT0_TYPE_M32 | (p->mm1_base & IODA3_MBT0_BASE_ADDR);
1016 	p->mbt_cache[0][1] = IODA3_MBT1_ENABLE | ((~(M32_PCI_SIZE - 1)) & IODA3_MBT1_MASK);
1017 }
1018 
phb4_wait_bit(struct phb4 * p,uint32_t reg,uint64_t mask,uint64_t want_val)1019 static int64_t phb4_wait_bit(struct phb4 *p, uint32_t reg,
1020 			     uint64_t mask, uint64_t want_val)
1021 {
1022 	uint64_t val;
1023 
1024 	/* Wait for all pending TCE kills to complete
1025 	 *
1026 	 * XXX Add timeout...
1027 	 */
1028 	/* XXX SIMICS is nasty... */
1029 	if ((reg == PHB_TCE_KILL || reg == PHB_DMA_READ_WRITE_SYNC) &&
1030 	    chip_quirk(QUIRK_SIMICS))
1031 		return OPAL_SUCCESS;
1032 
1033 	for (;;) {
1034 		val = in_be64(p->regs + reg);
1035 		if (val == 0xffffffffffffffffull) {
1036 			/* XXX Fenced ? */
1037 			return OPAL_HARDWARE;
1038 		}
1039 		if ((val & mask) == want_val)
1040 			break;
1041 
1042 	}
1043 	return OPAL_SUCCESS;
1044 }
1045 
phb4_tce_kill(struct phb * phb,uint32_t kill_type,uint64_t pe_number,uint32_t tce_size,uint64_t dma_addr,uint32_t npages)1046 static int64_t phb4_tce_kill(struct phb *phb, uint32_t kill_type,
1047 			     uint64_t pe_number, uint32_t tce_size,
1048 			     uint64_t dma_addr, uint32_t npages)
1049 {
1050 	struct phb4 *p = phb_to_phb4(phb);
1051 	uint64_t val;
1052 	int64_t rc;
1053 
1054 	sync();
1055 	switch(kill_type) {
1056 	case OPAL_PCI_TCE_KILL_PAGES:
1057 		while (npages--) {
1058 			/* Wait for a slot in the HW kill queue */
1059 			rc = phb4_wait_bit(p, PHB_TCE_KILL,
1060 					   PHB_TCE_KILL_ALL |
1061 					   PHB_TCE_KILL_PE |
1062 					   PHB_TCE_KILL_ONE, 0);
1063 			if (rc)
1064 				return rc;
1065 			val = SETFIELD(PHB_TCE_KILL_PENUM, dma_addr, pe_number);
1066 
1067 			/* Set appropriate page size */
1068 			switch(tce_size) {
1069 			case 0x1000:
1070 				if (dma_addr & 0xf000000000000fffull)
1071 					return OPAL_PARAMETER;
1072 				break;
1073 			case 0x10000:
1074 				if (dma_addr & 0xf00000000000ffffull)
1075 					return OPAL_PARAMETER;
1076 				val |= PHB_TCE_KILL_PSEL | PHB_TCE_KILL_64K;
1077 				break;
1078 			case 0x200000:
1079 				if (dma_addr & 0xf0000000001fffffull)
1080 					return OPAL_PARAMETER;
1081 				val |= PHB_TCE_KILL_PSEL | PHB_TCE_KILL_2M;
1082 				break;
1083 			case 0x40000000:
1084 				if (dma_addr & 0xf00000003fffffffull)
1085 					return OPAL_PARAMETER;
1086 				val |= PHB_TCE_KILL_PSEL | PHB_TCE_KILL_1G;
1087 				break;
1088 			default:
1089 				return OPAL_PARAMETER;
1090 			}
1091 			/* Perform kill */
1092 			out_be64(p->regs + PHB_TCE_KILL, PHB_TCE_KILL_ONE | val);
1093 			/* Next page */
1094 			dma_addr += tce_size;
1095 		}
1096 		break;
1097 	case OPAL_PCI_TCE_KILL_PE:
1098 		/* Wait for a slot in the HW kill queue */
1099 		rc = phb4_wait_bit(p, PHB_TCE_KILL,
1100 				   PHB_TCE_KILL_ALL |
1101 				   PHB_TCE_KILL_PE |
1102 				   PHB_TCE_KILL_ONE, 0);
1103 		if (rc)
1104 			return rc;
1105 		/* Perform kill */
1106 		out_be64(p->regs + PHB_TCE_KILL, PHB_TCE_KILL_PE |
1107 			 SETFIELD(PHB_TCE_KILL_PENUM, 0ull, pe_number));
1108 		break;
1109 	case OPAL_PCI_TCE_KILL_ALL:
1110 		/* Wait for a slot in the HW kill queue */
1111 		rc = phb4_wait_bit(p, PHB_TCE_KILL,
1112 				   PHB_TCE_KILL_ALL |
1113 				   PHB_TCE_KILL_PE |
1114 				   PHB_TCE_KILL_ONE, 0);
1115 		if (rc)
1116 			return rc;
1117 		/* Perform kill */
1118 		out_be64(p->regs + PHB_TCE_KILL, PHB_TCE_KILL_ALL);
1119 		break;
1120 	default:
1121 		return OPAL_PARAMETER;
1122 	}
1123 
1124 	/* Start DMA sync process */
1125 	if (is_phb5()){
1126 		val = in_be64(p->regs + PHB_DMA_READ_WRITE_SYNC) &
1127 					(PHB_DMA_READ_SYNC_COMPLETE |
1128 					 PHB_DMA_WRITE_SYNC_COMPLETE);
1129 		out_be64(p->regs + PHB_DMA_READ_WRITE_SYNC,
1130 					val | PHB_DMA_READ_SYNC_START);
1131 
1132 	} else {
1133 		out_be64(p->regs + PHB_DMA_READ_WRITE_SYNC,
1134 			 PHB_DMA_READ_SYNC_START);
1135 	}
1136 
1137 	/* Wait for kill to complete */
1138 	rc = phb4_wait_bit(p, PHB_Q_DMA_R, PHB_Q_DMA_R_TCE_KILL_STATUS, 0);
1139 	if (rc)
1140 		return rc;
1141 
1142 	/* Wait for DMA sync to complete */
1143 	return phb4_wait_bit(p, PHB_DMA_READ_WRITE_SYNC,
1144 			     PHB_DMA_READ_SYNC_COMPLETE,
1145 			     PHB_DMA_READ_SYNC_COMPLETE);
1146 }
1147 
1148 /* phb4_ioda_reset - Reset the IODA tables
1149  *
1150  * @purge: If true, the cache is cleared and the cleared values
1151  *         are applied to HW. If false, the cached values are
1152  *         applied to HW
1153  *
1154  * This reset the IODA tables in the PHB. It is called at
1155  * initialization time, on PHB reset, and can be called
1156  * explicitly from OPAL
1157  */
phb4_ioda_reset(struct phb * phb,bool purge)1158 static int64_t phb4_ioda_reset(struct phb *phb, bool purge)
1159 {
1160 	struct phb4 *p = phb_to_phb4(phb);
1161 	uint32_t i;
1162 	uint64_t val;
1163 
1164 	if (purge) {
1165 		PHBDBG(p, "Purging all IODA tables...\n");
1166 		if (phb->slot)
1167 			phb->slot->link_retries = PHB4_LINK_LINK_RETRIES;
1168 		phb4_init_ioda_cache(p);
1169 	}
1170 
1171 	/* Init_30..31 - Errata workaround, clear PESTA entry 0 */
1172 	phb4_ioda_sel(p, IODA3_TBL_PESTA, 0, false);
1173 	out_be64(p->regs + PHB_IODA_DATA0, 0);
1174 
1175 	/* Init_32..33 - MIST  */
1176 	phb4_ioda_sel(p, IODA3_TBL_MIST, 0, true);
1177 	val = in_be64(p->regs + PHB_IODA_ADDR);
1178 	val = SETFIELD(PHB_IODA_AD_MIST_PWV, val, 0xf);
1179 	out_be64(p->regs + PHB_IODA_ADDR, val);
1180 	for (i = 0; i < (p->num_irqs/4); i++)
1181 		out_be64(p->regs + PHB_IODA_DATA0, p->mist_cache[i]);
1182 
1183 	/* Init_34..35 - MRT */
1184 	phb4_ioda_sel(p, IODA3_TBL_MRT, 0, true);
1185 	for (i = 0; i < p->mrt_size; i++)
1186 		out_be64(p->regs + PHB_IODA_DATA0, 0);
1187 
1188 	/* Init_36..37 - TVT */
1189 	phb4_ioda_sel(p, IODA3_TBL_TVT, 0, true);
1190 	for (i = 0; i < p->tvt_size; i++)
1191 		out_be64(p->regs + PHB_IODA_DATA0, p->tve_cache[i]);
1192 
1193 	/* Init_38..39 - MBT */
1194 	phb4_ioda_sel(p, IODA3_TBL_MBT, 0, true);
1195 	for (i = 0; i < p->mbt_size; i++) {
1196 		out_be64(p->regs + PHB_IODA_DATA0, p->mbt_cache[i][0]);
1197 		out_be64(p->regs + PHB_IODA_DATA0, p->mbt_cache[i][1]);
1198 	}
1199 
1200 	/* Init_40..41 - MDT */
1201 	phb4_ioda_sel(p, IODA3_TBL_MDT, 0, true);
1202 	for (i = 0; i < p->max_num_pes; i++)
1203 		out_be64(p->regs + PHB_IODA_DATA0, p->mdt_cache[i]);
1204 
1205 	/* Additional OPAL specific inits */
1206 
1207 	/* Clear PEST & PEEV */
1208 	for (i = 0; i < p->max_num_pes; i++) {
1209 		phb4_ioda_sel(p, IODA3_TBL_PESTA, i, false);
1210 		out_be64(p->regs + PHB_IODA_DATA0, 0);
1211 		phb4_ioda_sel(p, IODA3_TBL_PESTB, i, false);
1212 		out_be64(p->regs + PHB_IODA_DATA0, 0);
1213 	}
1214 
1215 	phb4_ioda_sel(p, IODA3_TBL_PEEV, 0, true);
1216 	for (i = 0; i < p->max_num_pes/64; i++)
1217 		out_be64(p->regs + PHB_IODA_DATA0, 0);
1218 
1219 	/* Invalidate RTE, TCE cache */
1220 	out_be64(p->regs + PHB_RTC_INVALIDATE, PHB_RTC_INVALIDATE_ALL);
1221 
1222 	return phb4_tce_kill(&p->phb, OPAL_PCI_TCE_KILL_ALL, 0, 0, 0, 0);
1223 }
1224 
1225 /*
1226  * Clear anything we have in PAPR Error Injection registers. Though
1227  * the spec says the PAPR error injection should be one-shot without
1228  * the "sticky" bit. However, that's false according to the experiments
1229  * I had. So we have to clear it at appropriate point in kernel to
1230  * avoid endless frozen PE.
1231  */
phb4_papr_errinjct_reset(struct phb * phb)1232 static int64_t phb4_papr_errinjct_reset(struct phb *phb)
1233 {
1234 	struct phb4 *p = phb_to_phb4(phb);
1235 
1236 	out_be64(p->regs + PHB_PAPR_ERR_INJ_CTL, 0x0ul);
1237 	out_be64(p->regs + PHB_PAPR_ERR_INJ_ADDR, 0x0ul);
1238 	out_be64(p->regs + PHB_PAPR_ERR_INJ_MASK, 0x0ul);
1239 
1240 	return OPAL_SUCCESS;
1241 }
1242 
phb4_set_phb_mem_window(struct phb * phb,uint16_t window_type,uint16_t window_num,uint64_t addr,uint64_t pci_addr __unused,uint64_t size)1243 static int64_t phb4_set_phb_mem_window(struct phb *phb,
1244 				       uint16_t window_type,
1245 				       uint16_t window_num,
1246 				       uint64_t addr,
1247 				       uint64_t pci_addr __unused,
1248 				       uint64_t size)
1249 {
1250 	struct phb4 *p = phb_to_phb4(phb);
1251 	uint64_t mbt0, mbt1;
1252 
1253 	/*
1254 	 * We have a unified MBT for all BARs on PHB4.
1255 	 *
1256 	 * So we use it as follow:
1257 	 *
1258 	 *  - M32 is hard wired to be MBT[0] and uses MDT column 0
1259 	 *    for remapping.
1260 	 *
1261 	 *  - MBT[1..n] are available to the OS, currently only as
1262 	 *    fully segmented or single PE (we don't yet expose the
1263 	 *    new segmentation modes).
1264 	 *
1265 	 *  - We configure the 3 last BARs to columnt 1..3 initially
1266 	 *    set to segment# == PE#. We will need to provide some
1267 	 *    extensions to the existing APIs to enable remapping of
1268 	 *    segments on those BARs (and only those) as the current
1269 	 *    API forces single segment mode.
1270 	 */
1271 	switch (window_type) {
1272 	case OPAL_IO_WINDOW_TYPE:
1273 	case OPAL_M32_WINDOW_TYPE:
1274 		return OPAL_UNSUPPORTED;
1275 	case OPAL_M64_WINDOW_TYPE:
1276 		if (window_num == 0 || window_num >= p->mbt_size) {
1277 			PHBERR(p, "%s: Invalid window %d\n",
1278 			       __func__, window_num);
1279 			return OPAL_PARAMETER;
1280 		}
1281 
1282 		mbt0 = p->mbt_cache[window_num][0];
1283 		mbt1 = p->mbt_cache[window_num][1];
1284 
1285 		/* XXX For now we assume the 4K minimum alignment,
1286 		 * todo: check with the HW folks what the exact limits
1287 		 * are based on the segmentation model.
1288 		 */
1289 		if ((addr & 0xFFFul) || (size & 0xFFFul)) {
1290 			PHBERR(p, "%s: Bad addr/size alignment %llx/%llx\n",
1291 			       __func__, addr, size);
1292 			return OPAL_PARAMETER;
1293 		}
1294 
1295 		/* size should be 2^N */
1296 		if (!size || size & (size-1)) {
1297 			PHBERR(p, "%s: size not a power of 2: %llx\n",
1298 			       __func__,  size);
1299 			return OPAL_PARAMETER;
1300 		}
1301 
1302 		/* address should be size aligned */
1303 		if (addr & (size - 1)) {
1304 			PHBERR(p, "%s: addr not size aligned %llx/%llx\n",
1305 			       __func__, addr, size);
1306 			return OPAL_PARAMETER;
1307 		}
1308 
1309 		break;
1310 	default:
1311 		return OPAL_PARAMETER;
1312 	}
1313 
1314 	/* The BAR shouldn't be enabled yet */
1315 	if (mbt0 & IODA3_MBT0_ENABLE)
1316 		return OPAL_PARTIAL;
1317 
1318 	/* Apply the settings */
1319 	mbt0 = SETFIELD(IODA3_MBT0_BASE_ADDR, mbt0, addr >> 12);
1320 	mbt1 = SETFIELD(IODA3_MBT1_MASK, mbt1, ~((size >> 12) -1));
1321 	p->mbt_cache[window_num][0] = mbt0;
1322 	p->mbt_cache[window_num][1] = mbt1;
1323 
1324 	return OPAL_SUCCESS;
1325 }
1326 
1327 /*
1328  * For one specific M64 BAR, it can be shared by all PEs,
1329  * or owned by single PE exclusively.
1330  */
phb4_phb_mmio_enable(struct phb __unused * phb,uint16_t window_type,uint16_t window_num,uint16_t enable)1331 static int64_t phb4_phb_mmio_enable(struct phb __unused *phb,
1332 				    uint16_t window_type,
1333 				    uint16_t window_num,
1334 				    uint16_t enable)
1335 {
1336 	struct phb4 *p = phb_to_phb4(phb);
1337 	uint64_t mbt0, mbt1, base, mask;
1338 
1339 	/*
1340 	 * By design, PHB4 doesn't support IODT any more.
1341 	 * Besides, we can't enable M32 BAR as well. So
1342 	 * the function is used to do M64 mapping and each
1343 	 * BAR is supposed to be shared by all PEs.
1344 	 *
1345 	 * TODO: Add support for some of the new PHB4 split modes
1346 	 */
1347 	switch (window_type) {
1348 	case OPAL_IO_WINDOW_TYPE:
1349 	case OPAL_M32_WINDOW_TYPE:
1350 		return OPAL_UNSUPPORTED;
1351 	case OPAL_M64_WINDOW_TYPE:
1352 		/* Window 0 is reserved for M32 */
1353 		if (window_num == 0 || window_num >= p->mbt_size ||
1354 		    enable > OPAL_ENABLE_M64_NON_SPLIT) {
1355 			PHBDBG(p,
1356 			       "phb4_phb_mmio_enable wrong args (window %d enable %d)\n",
1357 			       window_num, enable);
1358 			return OPAL_PARAMETER;
1359 		}
1360 		break;
1361 	default:
1362 		return OPAL_PARAMETER;
1363 	}
1364 
1365 	/*
1366 	 * We need check the base/mask while enabling
1367 	 * the M64 BAR. Otherwise, invalid base/mask
1368 	 * might cause fenced AIB unintentionally
1369 	 */
1370 	mbt0 = p->mbt_cache[window_num][0];
1371 	mbt1 = p->mbt_cache[window_num][1];
1372 
1373 	if (enable == OPAL_DISABLE_M64) {
1374 		/* Reset the window to disabled & default mode */
1375 		mbt0 = phb4_default_mbt0(p, window_num);
1376 		mbt1 = 0;
1377 	} else {
1378 		/* Verify that the mode is valid and consistent */
1379 		if (enable == OPAL_ENABLE_M64_SPLIT) {
1380 			uint64_t mode = GETFIELD(IODA3_MBT0_MODE, mbt0);
1381 			if (mode != IODA3_MBT0_MODE_PE_SEG &&
1382 			    mode != IODA3_MBT0_MODE_MDT)
1383 				return OPAL_PARAMETER;
1384 		} else if (enable == OPAL_ENABLE_M64_NON_SPLIT) {
1385 			if (GETFIELD(IODA3_MBT0_MODE, mbt0) !=
1386 			    IODA3_MBT0_MODE_SINGLE_PE)
1387 				return OPAL_PARAMETER;
1388 		} else
1389 			return OPAL_PARAMETER;
1390 
1391 		base = GETFIELD(IODA3_MBT0_BASE_ADDR, mbt0);
1392 		base = (base << 12);
1393 		mask = GETFIELD(IODA3_MBT1_MASK, mbt1);
1394 		if (base < p->mm0_base || !mask)
1395 			return OPAL_PARTIAL;
1396 
1397 		mbt0 |= IODA3_MBT0_ENABLE;
1398 		mbt1 |= IODA3_MBT1_ENABLE;
1399 	}
1400 
1401 	/* Update HW and cache */
1402 	p->mbt_cache[window_num][0] = mbt0;
1403 	p->mbt_cache[window_num][1] = mbt1;
1404 	phb4_ioda_sel(p, IODA3_TBL_MBT, window_num << 1, true);
1405 	out_be64(p->regs + PHB_IODA_DATA0, mbt0);
1406 	out_be64(p->regs + PHB_IODA_DATA0, mbt1);
1407 
1408 	return OPAL_SUCCESS;
1409 }
1410 
phb4_map_pe_mmio_window(struct phb * phb,uint64_t pe_number,uint16_t window_type,uint16_t window_num,uint16_t segment_num)1411 static int64_t phb4_map_pe_mmio_window(struct phb *phb,
1412 				       uint64_t pe_number,
1413 				       uint16_t window_type,
1414 				       uint16_t window_num,
1415 				       uint16_t segment_num)
1416 {
1417 	struct phb4 *p = phb_to_phb4(phb);
1418 	uint64_t mbt0, mbt1, mdt0;
1419 
1420 	if (pe_number >= p->num_pes)
1421 		return OPAL_PARAMETER;
1422 
1423 	/*
1424 	 * We support a combined MDT that has 4 columns. We let the OS
1425 	 * use kernel 0 for M32.
1426 	 *
1427 	 * We configure the 3 last BARs to map column 3..1 which by default
1428 	 * are set to map segment# == pe#, but can be remapped here if we
1429 	 * extend this function.
1430 	 *
1431 	 * The problem is that the current API was "hijacked" so that an
1432 	 * attempt at remapping any segment of an M64 has the effect of
1433 	 * turning it into a single-PE mode BAR. So if we want to support
1434 	 * remapping we'll have to play around this for example by creating
1435 	 * a new API or a new window type...
1436 	 */
1437 	switch(window_type) {
1438 	case OPAL_IO_WINDOW_TYPE:
1439 		return OPAL_UNSUPPORTED;
1440 	case OPAL_M32_WINDOW_TYPE:
1441 		if (window_num != 0 || segment_num >= p->num_pes)
1442 			return OPAL_PARAMETER;
1443 
1444 		mdt0 = p->mdt_cache[segment_num];
1445 		mdt0 = SETFIELD(IODA3_MDT_PE_A, mdt0, pe_number);
1446 		phb4_ioda_sel(p, IODA3_TBL_MDT, segment_num, false);
1447 		out_be64(p->regs + PHB_IODA_DATA0, mdt0);
1448 		break;
1449 	case OPAL_M64_WINDOW_TYPE:
1450 		if (window_num == 0 || window_num >= p->mbt_size)
1451 			return OPAL_PARAMETER;
1452 
1453 		mbt0 = p->mbt_cache[window_num][0];
1454 		mbt1 = p->mbt_cache[window_num][1];
1455 
1456 		/* The BAR shouldn't be enabled yet */
1457 		if (mbt0 & IODA3_MBT0_ENABLE)
1458 			return OPAL_PARTIAL;
1459 
1460 		/* Set to single PE mode and configure the PE */
1461 		mbt0 = SETFIELD(IODA3_MBT0_MODE, mbt0,
1462 				IODA3_MBT0_MODE_SINGLE_PE);
1463 		mbt1 = SETFIELD(IODA3_MBT1_SINGLE_PE_NUM, mbt1, pe_number);
1464 		p->mbt_cache[window_num][0] = mbt0;
1465 		p->mbt_cache[window_num][1] = mbt1;
1466 		break;
1467 	default:
1468 		return OPAL_PARAMETER;
1469 	}
1470 
1471 	return OPAL_SUCCESS;
1472 }
1473 
phb4_map_pe_dma_window(struct phb * phb,uint64_t pe_number,uint16_t window_id,uint16_t tce_levels,uint64_t tce_table_addr,uint64_t tce_table_size,uint64_t tce_page_size)1474 static int64_t phb4_map_pe_dma_window(struct phb *phb,
1475 				      uint64_t pe_number,
1476 				      uint16_t window_id,
1477 				      uint16_t tce_levels,
1478 				      uint64_t tce_table_addr,
1479 				      uint64_t tce_table_size,
1480 				      uint64_t tce_page_size)
1481 {
1482 	struct phb4 *p = phb_to_phb4(phb);
1483 	uint64_t tts_encoded;
1484 	uint64_t data64 = 0;
1485 
1486 	/*
1487 	 * We configure the PHB in 2 TVE per PE mode to match phb3.
1488 	 * Current Linux implementation *requires* the two windows per
1489 	 * PE.
1490 	 *
1491 	 * Note: On DD2.0 this is the normal mode of operation.
1492 	 */
1493 
1494 	/*
1495 	 * Sanity check. We currently only support "2 window per PE" mode
1496 	 * ie, only bit 59 of the PCI address is used to select the window
1497 	 */
1498 	if (pe_number >= p->num_pes || (window_id >> 1) != pe_number)
1499 		return OPAL_PARAMETER;
1500 
1501 	/*
1502 	 * tce_table_size == 0 is used to disable an entry, in this case
1503 	 * we ignore other arguments
1504 	 */
1505 	if (tce_table_size == 0) {
1506 		phb4_ioda_sel(p, IODA3_TBL_TVT, window_id, false);
1507 		out_be64(p->regs + PHB_IODA_DATA0, 0);
1508 		p->tve_cache[window_id] = 0;
1509 		return OPAL_SUCCESS;
1510 	}
1511 
1512 	/* Additional arguments validation */
1513 	if (tce_levels < 1 || tce_levels > 5 ||
1514 	    !is_pow2(tce_table_size) ||
1515 	    tce_table_size < 0x1000)
1516 		return OPAL_PARAMETER;
1517 
1518 	/* Encode TCE table size */
1519 	data64 = SETFIELD(IODA3_TVT_TABLE_ADDR, 0ul, tce_table_addr >> 12);
1520 	tts_encoded = ilog2(tce_table_size) - 11;
1521 	if (tts_encoded > 31)
1522 		return OPAL_PARAMETER;
1523 	data64 = SETFIELD(IODA3_TVT_TCE_TABLE_SIZE, data64, tts_encoded);
1524 
1525 	/* Encode TCE page size */
1526 	switch (tce_page_size) {
1527 	case 0x1000:	/* 4K */
1528 		data64 = SETFIELD(IODA3_TVT_IO_PSIZE, data64, 1);
1529 		break;
1530 	case 0x10000:	/* 64K */
1531 		data64 = SETFIELD(IODA3_TVT_IO_PSIZE, data64, 5);
1532 		break;
1533 	case 0x200000:	/* 2M */
1534 		data64 = SETFIELD(IODA3_TVT_IO_PSIZE, data64, 10);
1535 		break;
1536 	case 0x40000000: /* 1G */
1537 		data64 = SETFIELD(IODA3_TVT_IO_PSIZE, data64, 19);
1538 		break;
1539 	default:
1540 		return OPAL_PARAMETER;
1541 	}
1542 
1543 	/* Encode number of levels */
1544 	data64 = SETFIELD(IODA3_TVT_NUM_LEVELS, data64, tce_levels - 1);
1545 
1546 	phb4_ioda_sel(p, IODA3_TBL_TVT, window_id, false);
1547 	out_be64(p->regs + PHB_IODA_DATA0, data64);
1548 	p->tve_cache[window_id] = data64;
1549 
1550 	return OPAL_SUCCESS;
1551 }
1552 
phb4_map_pe_dma_window_real(struct phb * phb,uint64_t pe_number,uint16_t window_id,uint64_t pci_start_addr,uint64_t pci_mem_size)1553 static int64_t phb4_map_pe_dma_window_real(struct phb *phb,
1554 					   uint64_t pe_number,
1555 					   uint16_t window_id,
1556 					   uint64_t pci_start_addr,
1557 					   uint64_t pci_mem_size)
1558 {
1559 	struct phb4 *p = phb_to_phb4(phb);
1560 	uint64_t end = pci_start_addr + pci_mem_size;
1561 	uint64_t tve;
1562 
1563 	if (pe_number >= p->num_pes ||
1564 	    (window_id >> 1) != pe_number)
1565 		return OPAL_PARAMETER;
1566 
1567 	if (pci_mem_size) {
1568 		/* Enable */
1569 
1570 		/*
1571 		 * Check that the start address has the right TVE index,
1572 		 * we only support the 1 bit mode where each PE has 2
1573 		 * TVEs
1574 		 */
1575 		if ((pci_start_addr >> 59) != (window_id & 1))
1576 			return OPAL_PARAMETER;
1577 		pci_start_addr &= ((1ull << 59) - 1);
1578 		end = pci_start_addr + pci_mem_size;
1579 
1580 		/* We have to be 16M aligned */
1581 		if ((pci_start_addr & 0x00ffffff) ||
1582 		    (pci_mem_size & 0x00ffffff))
1583 			return OPAL_PARAMETER;
1584 
1585 		/*
1586 		 * It *looks* like this is the max we can support (we need
1587 		 * to verify this. Also we are not checking for rollover,
1588 		 * but then we aren't trying too hard to protect ourselves
1589 		 * againt a completely broken OS.
1590 		 */
1591 		if (end > 0x0003ffffffffffffull)
1592 			return OPAL_PARAMETER;
1593 
1594 		/*
1595 		 * Put start address bits 49:24 into TVE[52:53]||[0:23]
1596 		 * and end address bits 49:24 into TVE[54:55]||[24:47]
1597 		 * and set TVE[51]
1598 		 */
1599 		tve  = (pci_start_addr << 16) & (0xffffffull << 40);
1600 		tve |= (pci_start_addr >> 38) & (3ull << 10);
1601 		tve |= (end >>  8) & (0xfffffful << 16);
1602 		tve |= (end >> 40) & (3ull << 8);
1603 		tve |= PPC_BIT(51) | IODA3_TVT_NON_TRANSLATE_50;
1604 	} else {
1605 		/* Disable */
1606 		tve = 0;
1607 	}
1608 
1609 	phb4_ioda_sel(p, IODA3_TBL_TVT, window_id, false);
1610 	out_be64(p->regs + PHB_IODA_DATA0, tve);
1611 	p->tve_cache[window_id] = tve;
1612 
1613 	return OPAL_SUCCESS;
1614 }
1615 
phb4_set_option(struct phb * phb,enum OpalPhbOption opt,uint64_t setting)1616 static int64_t phb4_set_option(struct phb *phb, enum OpalPhbOption opt,
1617 			       uint64_t setting)
1618 {
1619 	struct phb4 *p = phb_to_phb4(phb);
1620 	uint64_t data64;
1621 
1622 	data64 = phb4_read_reg(p, PHB_CTRLR);
1623 	switch (opt) {
1624 	case OPAL_PHB_OPTION_TVE1_4GB:
1625 		if (setting > 1)
1626 			return OPAL_PARAMETER;
1627 
1628 		PHBDBG(p, "4GB bypass mode = %lld\n", setting);
1629 		if (setting)
1630 			data64 |= PPC_BIT(24);
1631 		else
1632 			data64 &= ~PPC_BIT(24);
1633 		break;
1634 	case OPAL_PHB_OPTION_MMIO_EEH_DISABLE:
1635 		if (setting > 1)
1636 			return OPAL_PARAMETER;
1637 
1638 		PHBDBG(p, "MMIO EEH Disable = %lld\n", setting);
1639 		if (setting)
1640 			data64 |= PPC_BIT(14);
1641 		else
1642 			data64 &= ~PPC_BIT(14);
1643 		break;
1644 	default:
1645 		return OPAL_UNSUPPORTED;
1646 	}
1647 	phb4_write_reg(p, PHB_CTRLR, data64);
1648 
1649 	return OPAL_SUCCESS;
1650 }
1651 
phb4_get_option(struct phb * phb,enum OpalPhbOption opt,__be64 * setting)1652 static int64_t phb4_get_option(struct phb *phb, enum OpalPhbOption opt,
1653 			       __be64 *setting)
1654 {
1655 	struct phb4 *p = phb_to_phb4(phb);
1656 	uint64_t data64;
1657 
1658 	data64 = phb4_read_reg(p, PHB_CTRLR);
1659 	switch (opt) {
1660 	case OPAL_PHB_OPTION_TVE1_4GB:
1661 		*setting = cpu_to_be64((data64 & PPC_BIT(24)) ? 1 : 0);
1662 		break;
1663 	case OPAL_PHB_OPTION_MMIO_EEH_DISABLE:
1664 		*setting = cpu_to_be64((data64 & PPC_BIT(14)) ? 1 : 0);
1665 		break;
1666 	default:
1667 		return OPAL_UNSUPPORTED;
1668 	}
1669 
1670 	return OPAL_SUCCESS;
1671 }
1672 
phb4_set_ive_pe(struct phb * phb,uint64_t pe_number,uint32_t ive_num)1673 static int64_t phb4_set_ive_pe(struct phb *phb,
1674 			       uint64_t pe_number,
1675 			       uint32_t ive_num)
1676 {
1677 	struct phb4 *p = phb_to_phb4(phb);
1678 	uint32_t mist_idx;
1679 	uint32_t mist_quad;
1680 	uint32_t mist_shift;
1681 	uint64_t val;
1682 
1683 	if (pe_number >= p->num_pes || ive_num >= (p->num_irqs - 8))
1684 		return OPAL_PARAMETER;
1685 
1686 	mist_idx = ive_num >> 2;
1687 	mist_quad = ive_num & 3;
1688 	mist_shift = (3 - mist_quad) << 4;
1689 	p->mist_cache[mist_idx] &= ~(0x0fffull << mist_shift);
1690 	p->mist_cache[mist_idx] |=  ((uint64_t)pe_number) << mist_shift;
1691 
1692 	/* Note: This has the side effect of clearing P/Q, so this
1693 	 * shouldn't be called while the interrupt is "hot"
1694 	 */
1695 
1696 	phb4_ioda_sel(p, IODA3_TBL_MIST, mist_idx, false);
1697 
1698 	/* We need to inject the appropriate MIST write enable bit
1699 	 * in the IODA table address register
1700 	 */
1701 	val = in_be64(p->regs + PHB_IODA_ADDR);
1702 	val = SETFIELD(PHB_IODA_AD_MIST_PWV, val, 8 >> mist_quad);
1703 	out_be64(p->regs + PHB_IODA_ADDR, val);
1704 
1705 	/* Write entry */
1706 	out_be64(p->regs + PHB_IODA_DATA0, p->mist_cache[mist_idx]);
1707 
1708 	return OPAL_SUCCESS;
1709 }
1710 
phb4_get_msi_32(struct phb * phb,uint64_t pe_number,uint32_t ive_num,uint8_t msi_range,uint32_t * msi_address,uint32_t * message_data)1711 static int64_t phb4_get_msi_32(struct phb *phb,
1712 			       uint64_t pe_number,
1713 			       uint32_t ive_num,
1714 			       uint8_t msi_range,
1715 			       uint32_t *msi_address,
1716 			       uint32_t *message_data)
1717 {
1718 	struct phb4 *p = phb_to_phb4(phb);
1719 
1720 	/*
1721 	 * Sanity check. We needn't check on mve_number (PE#)
1722 	 * on PHB3 since the interrupt source is purely determined
1723 	 * by its DMA address and data, but the check isn't
1724 	 * harmful.
1725 	 */
1726 	if (pe_number >= p->num_pes ||
1727 	    ive_num >= (p->num_irqs - 8) ||
1728 	    msi_range != 1 || !msi_address|| !message_data)
1729 		return OPAL_PARAMETER;
1730 
1731 	/*
1732 	 * DMA address and data will form the IVE index.
1733 	 * For more details, please refer to IODA2 spec.
1734 	 */
1735 	*msi_address = 0xFFFF0000 | ((ive_num << 4) & 0xFFFFFE0F);
1736 	*message_data = ive_num & 0x1F;
1737 
1738 	return OPAL_SUCCESS;
1739 }
1740 
phb4_get_msi_64(struct phb * phb,uint64_t pe_number,uint32_t ive_num,uint8_t msi_range,uint64_t * msi_address,uint32_t * message_data)1741 static int64_t phb4_get_msi_64(struct phb *phb,
1742 			       uint64_t pe_number,
1743 			       uint32_t ive_num,
1744 			       uint8_t msi_range,
1745 			       uint64_t *msi_address,
1746 			       uint32_t *message_data)
1747 {
1748 	struct phb4 *p = phb_to_phb4(phb);
1749 
1750 	/* Sanity check */
1751 	if (pe_number >= p->num_pes ||
1752 	    ive_num >= (p->num_irqs - 8) ||
1753 	    msi_range != 1 || !msi_address || !message_data)
1754 		return OPAL_PARAMETER;
1755 
1756 	/*
1757 	 * DMA address and data will form the IVE index.
1758 	 * For more details, please refer to IODA2 spec.
1759 	 */
1760 	*msi_address = (0x1ul << 60) | ((ive_num << 4) & 0xFFFFFFFFFFFFFE0Ful);
1761 	*message_data = ive_num & 0x1F;
1762 
1763 	return OPAL_SUCCESS;
1764 }
1765 
phb4_rc_err_clear(struct phb4 * p)1766 static void phb4_rc_err_clear(struct phb4 *p)
1767 {
1768 	/* Init_47 - Clear errors */
1769 	phb4_pcicfg_write16(&p->phb, 0, PCI_CFG_SECONDARY_STATUS, 0xffff);
1770 
1771 	if (p->ecap <= 0)
1772 		return;
1773 
1774 	phb4_pcicfg_write16(&p->phb, 0, p->ecap + PCICAP_EXP_DEVSTAT,
1775 			     PCICAP_EXP_DEVSTAT_CE	|
1776 			     PCICAP_EXP_DEVSTAT_NFE	|
1777 			     PCICAP_EXP_DEVSTAT_FE	|
1778 			     PCICAP_EXP_DEVSTAT_UE);
1779 
1780 	if (p->aercap <= 0)
1781 		return;
1782 
1783 	/* Clear all UE status */
1784 	phb4_pcicfg_write32(&p->phb, 0, p->aercap + PCIECAP_AER_UE_STATUS,
1785 			     0xffffffff);
1786 	/* Clear all CE status */
1787 	phb4_pcicfg_write32(&p->phb, 0, p->aercap + PCIECAP_AER_CE_STATUS,
1788 			     0xffffffff);
1789 	/* Clear root error status */
1790 	phb4_pcicfg_write32(&p->phb, 0, p->aercap + PCIECAP_AER_RERR_STA,
1791 			     0xffffffff);
1792 }
1793 
phb4_err_clear_regb(struct phb4 * p)1794 static void phb4_err_clear_regb(struct phb4 *p)
1795 {
1796 	uint64_t val64;
1797 
1798 	val64 = phb4_read_reg(p, PHB_REGB_ERR_STATUS);
1799 	phb4_write_reg(p, PHB_REGB_ERR_STATUS, val64);
1800 	phb4_write_reg(p, PHB_REGB_ERR1_STATUS, 0x0ul);
1801 	phb4_write_reg(p, PHB_REGB_ERR_LOG_0, 0x0ul);
1802 	phb4_write_reg(p, PHB_REGB_ERR_LOG_1, 0x0ul);
1803 }
1804 
1805 /*
1806  * The function can be called during error recovery for all classes of
1807  * errors.  This is new to PHB4; previous revisions had separate
1808  * sequences for INF/ER/Fatal errors.
1809  *
1810  * "Rec #" in this function refer to "Recov_#" steps in the
1811  * PHB4 INF recovery sequence.
1812  */
phb4_err_clear(struct phb4 * p)1813 static void phb4_err_clear(struct phb4 *p)
1814 {
1815 	uint64_t val64;
1816 	uint64_t fir = phb4_read_reg(p, PHB_LEM_FIR_ACCUM);
1817 
1818 	/* Rec 1: Acquire the PCI config lock (we don't need to do this) */
1819 
1820 	/* Rec 2...15: Clear error status in RC config space */
1821 	phb4_rc_err_clear(p);
1822 
1823 	/* Rec 16...23: Clear PBL errors */
1824 	val64 = phb4_read_reg(p, PHB_PBL_ERR_STATUS);
1825 	phb4_write_reg(p, PHB_PBL_ERR_STATUS, val64);
1826 	phb4_write_reg(p, PHB_PBL_ERR1_STATUS, 0x0ul);
1827 	phb4_write_reg(p, PHB_PBL_ERR_LOG_0, 0x0ul);
1828 	phb4_write_reg(p, PHB_PBL_ERR_LOG_1, 0x0ul);
1829 
1830 	/* Rec 24...31: Clear REGB errors */
1831 	phb4_err_clear_regb(p);
1832 
1833 	/* Rec 32...59: Clear PHB error trap */
1834 	val64 = phb4_read_reg(p, PHB_TXE_ERR_STATUS);
1835 	phb4_write_reg(p, PHB_TXE_ERR_STATUS, val64);
1836 	phb4_write_reg(p, PHB_TXE_ERR1_STATUS, 0x0ul);
1837 	phb4_write_reg(p, PHB_TXE_ERR_LOG_0, 0x0ul);
1838 	phb4_write_reg(p, PHB_TXE_ERR_LOG_1, 0x0ul);
1839 
1840 	val64 = phb4_read_reg(p, PHB_RXE_ARB_ERR_STATUS);
1841 	phb4_write_reg(p, PHB_RXE_ARB_ERR_STATUS, val64);
1842 	phb4_write_reg(p, PHB_RXE_ARB_ERR1_STATUS, 0x0ul);
1843 	phb4_write_reg(p, PHB_RXE_ARB_ERR_LOG_0, 0x0ul);
1844 	phb4_write_reg(p, PHB_RXE_ARB_ERR_LOG_1, 0x0ul);
1845 
1846 	val64 = phb4_read_reg(p, PHB_RXE_MRG_ERR_STATUS);
1847 	phb4_write_reg(p, PHB_RXE_MRG_ERR_STATUS, val64);
1848 	phb4_write_reg(p, PHB_RXE_MRG_ERR1_STATUS, 0x0ul);
1849 	phb4_write_reg(p, PHB_RXE_MRG_ERR_LOG_0, 0x0ul);
1850 	phb4_write_reg(p, PHB_RXE_MRG_ERR_LOG_1, 0x0ul);
1851 
1852 	val64 = phb4_read_reg(p, PHB_RXE_TCE_ERR_STATUS);
1853 	phb4_write_reg(p, PHB_RXE_TCE_ERR_STATUS, val64);
1854 	phb4_write_reg(p, PHB_RXE_TCE_ERR1_STATUS, 0x0ul);
1855 	phb4_write_reg(p, PHB_RXE_TCE_ERR_LOG_0, 0x0ul);
1856 	phb4_write_reg(p, PHB_RXE_TCE_ERR_LOG_1, 0x0ul);
1857 
1858 	val64 = phb4_read_reg(p, PHB_ERR_STATUS);
1859 	phb4_write_reg(p, PHB_ERR_STATUS, val64);
1860 	phb4_write_reg(p, PHB_ERR1_STATUS, 0x0ul);
1861 	phb4_write_reg(p, PHB_ERR_LOG_0, 0x0ul);
1862 	phb4_write_reg(p, PHB_ERR_LOG_1, 0x0ul);
1863 
1864 	/* Rec 61/62: Clear FIR/WOF */
1865 	phb4_write_reg(p, PHB_LEM_FIR_AND_MASK, ~fir);
1866 	phb4_write_reg(p, PHB_LEM_WOF, 0x0ul);
1867 
1868 	/* Rec 63: Update LEM mask to its initial value */
1869 	phb4_write_reg(p, PHB_LEM_ERROR_MASK, 0x0ul);
1870 
1871 	/* Rec 64: Clear the PCI config lock (we don't need to do this) */
1872 }
1873 
phb4_read_phb_status(struct phb4 * p,struct OpalIoPhb4ErrorData * stat)1874 static void phb4_read_phb_status(struct phb4 *p,
1875 				 struct OpalIoPhb4ErrorData *stat)
1876 {
1877 	uint32_t i;
1878 	__be64 *pPEST;
1879 	uint16_t __16;
1880 	uint32_t __32;
1881 	uint64_t __64;
1882 
1883 	memset(stat, 0, sizeof(struct OpalIoPhb4ErrorData));
1884 
1885 	/* Error data common part */
1886 	stat->common.version = cpu_to_be32(OPAL_PHB_ERROR_DATA_VERSION_1);
1887 	stat->common.ioType  = cpu_to_be32(OPAL_PHB_ERROR_DATA_TYPE_PHB4);
1888 	stat->common.len     = cpu_to_be32(sizeof(struct OpalIoPhb4ErrorData));
1889 
1890 	/* Use ASB for config space if the PHB is fenced */
1891 	if (p->flags & PHB4_AIB_FENCED)
1892 		p->flags |= PHB4_CFG_USE_ASB;
1893 
1894 	/* Grab RC bridge control, make it 32-bit */
1895 	phb4_pcicfg_read16(&p->phb, 0, PCI_CFG_BRCTL, &__16);
1896 	stat->brdgCtl = cpu_to_be32(__16);
1897 
1898 	/*
1899 	 * Grab various RC PCIe capability registers. All device, slot
1900 	 * and link status are 16-bit, so we grab the pair control+status
1901 	 * for each of them
1902 	 */
1903 	phb4_pcicfg_read32(&p->phb, 0, p->ecap + PCICAP_EXP_DEVCTL, &__32);
1904 	stat->deviceStatus = cpu_to_be32(__32);
1905 	phb4_pcicfg_read32(&p->phb, 0, p->ecap + PCICAP_EXP_SLOTCTL, &__32);
1906 	stat->slotStatus = cpu_to_be32(__32);
1907 	phb4_pcicfg_read32(&p->phb, 0, p->ecap + PCICAP_EXP_LCTL, &__32);
1908 	stat->linkStatus = cpu_to_be32(__32);
1909 
1910 	 /*
1911 	 * I assume those are the standard config space header, cmd & status
1912 	 * together makes 32-bit. Secondary status is 16-bit so I'll clear
1913 	 * the top on that one
1914 	 */
1915 	phb4_pcicfg_read32(&p->phb, 0, PCI_CFG_CMD, &__32);
1916 	stat->devCmdStatus = cpu_to_be32(__32);
1917 	phb4_pcicfg_read16(&p->phb, 0, PCI_CFG_SECONDARY_STATUS, &__16);
1918 	stat->devSecStatus = cpu_to_be32(__16);
1919 
1920 	/* Grab a bunch of AER regs */
1921 	phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_RERR_STA, &__32);
1922 	stat->rootErrorStatus = cpu_to_be32(__32);
1923 	phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_UE_STATUS, &__32);
1924 	stat->uncorrErrorStatus = cpu_to_be32(__32);
1925 
1926 	phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_CE_STATUS, &__32);
1927 	stat->corrErrorStatus = cpu_to_be32(__32);
1928 
1929 	phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_HDR_LOG0, &__32);
1930 	stat->tlpHdr1 = cpu_to_be32(__32);
1931 
1932 	phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_HDR_LOG1, &__32);
1933 	stat->tlpHdr2 = cpu_to_be32(__32);
1934 
1935 	phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_HDR_LOG2, &__32);
1936 	stat->tlpHdr3 = cpu_to_be32(__32);
1937 
1938 	phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_HDR_LOG3, &__32);
1939 	stat->tlpHdr4 = cpu_to_be32(__32);
1940 
1941 	phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_SRCID, &__32);
1942 	stat->sourceId = cpu_to_be32(__32);
1943 
1944 
1945 	/* PEC NFIR, same as P8/PHB3 */
1946 	xscom_read(p->chip_id, p->pe_stk_xscom + 0x0, &__64);
1947 	stat->nFir = cpu_to_be64(__64);
1948 	xscom_read(p->chip_id, p->pe_stk_xscom + 0x3, &__64);
1949 	stat->nFirMask = cpu_to_be64(__64);
1950 	xscom_read(p->chip_id, p->pe_stk_xscom + 0x8, &__64);
1951 	stat->nFirWOF = cpu_to_be64(__64);
1952 
1953 	/* PHB4 inbound and outbound error Regs */
1954 	stat->phbPlssr = cpu_to_be64(phb4_read_reg_asb(p, PHB_CPU_LOADSTORE_STATUS));
1955 	stat->phbCsr = cpu_to_be64(phb4_read_reg_asb(p, PHB_DMA_CHAN_STATUS));
1956 	stat->lemFir = cpu_to_be64(phb4_read_reg_asb(p, PHB_LEM_FIR_ACCUM));
1957 	stat->lemErrorMask = cpu_to_be64(phb4_read_reg_asb(p, PHB_LEM_ERROR_MASK));
1958 	stat->lemWOF = cpu_to_be64(phb4_read_reg_asb(p, PHB_LEM_WOF));
1959 	stat->phbErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_ERR_STATUS));
1960 	stat->phbFirstErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_ERR1_STATUS));
1961 	stat->phbErrorLog0 = cpu_to_be64(phb4_read_reg_asb(p, PHB_ERR_LOG_0));
1962 	stat->phbErrorLog1 = cpu_to_be64(phb4_read_reg_asb(p, PHB_ERR_LOG_1));
1963 	stat->phbTxeErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_TXE_ERR_STATUS));
1964 	stat->phbTxeFirstErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_TXE_ERR1_STATUS));
1965 	stat->phbTxeErrorLog0 = cpu_to_be64(phb4_read_reg_asb(p, PHB_TXE_ERR_LOG_0));
1966 	stat->phbTxeErrorLog1 = cpu_to_be64(phb4_read_reg_asb(p, PHB_TXE_ERR_LOG_1));
1967 	stat->phbRxeArbErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_ARB_ERR_STATUS));
1968 	stat->phbRxeArbFirstErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_ARB_ERR1_STATUS));
1969 	stat->phbRxeArbErrorLog0 = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_ARB_ERR_LOG_0));
1970 	stat->phbRxeArbErrorLog1 = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_ARB_ERR_LOG_1));
1971 	stat->phbRxeMrgErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_MRG_ERR_STATUS));
1972 	stat->phbRxeMrgFirstErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_MRG_ERR1_STATUS));
1973 	stat->phbRxeMrgErrorLog0 = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_MRG_ERR_LOG_0));
1974 	stat->phbRxeMrgErrorLog1 = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_MRG_ERR_LOG_1));
1975 	stat->phbRxeTceErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_TCE_ERR_STATUS));
1976 	stat->phbRxeTceFirstErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_TCE_ERR1_STATUS));
1977 	stat->phbRxeTceErrorLog0 = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_TCE_ERR_LOG_0));
1978 	stat->phbRxeTceErrorLog1 = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_TCE_ERR_LOG_1));
1979 
1980 	/* PHB4 REGB error registers */
1981 	stat->phbPblErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_PBL_ERR_STATUS));
1982 	stat->phbPblFirstErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_PBL_ERR1_STATUS));
1983 	stat->phbPblErrorLog0 = cpu_to_be64(phb4_read_reg_asb(p, PHB_PBL_ERR_LOG_0));
1984 	stat->phbPblErrorLog1 = cpu_to_be64(phb4_read_reg_asb(p, PHB_PBL_ERR_LOG_1));
1985 
1986 	stat->phbPcieDlpErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_PCIE_DLP_ERR_STATUS));
1987 	stat->phbPcieDlpErrorLog1 = cpu_to_be64(phb4_read_reg_asb(p, PHB_PCIE_DLP_ERRLOG1));
1988 	stat->phbPcieDlpErrorLog2 = cpu_to_be64(phb4_read_reg_asb(p, PHB_PCIE_DLP_ERRLOG2));
1989 
1990 	stat->phbRegbErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_REGB_ERR_STATUS));
1991 	stat->phbRegbFirstErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_REGB_ERR1_STATUS));
1992 	stat->phbRegbErrorLog0 = cpu_to_be64(phb4_read_reg_asb(p, PHB_REGB_ERR_LOG_0));
1993 	stat->phbRegbErrorLog1 = cpu_to_be64(phb4_read_reg_asb(p, PHB_REGB_ERR_LOG_1));
1994 
1995 	/*
1996 	 * Grab PESTA & B content. The error bit (bit#0) should
1997 	 * be fetched from IODA and the left content from memory
1998 	 * resident tables.
1999 	 */
2000 	 pPEST = (__be64 *)p->tbl_pest;
2001 	 phb4_ioda_sel(p, IODA3_TBL_PESTA, 0, true);
2002 	 for (i = 0; i < p->max_num_pes; i++) {
2003 		 stat->pestA[i] = cpu_to_be64(phb4_read_reg_asb(p, PHB_IODA_DATA0));
2004 		 stat->pestA[i] |= pPEST[2 * i];
2005 	 }
2006 
2007 	 phb4_ioda_sel(p, IODA3_TBL_PESTB, 0, true);
2008 	 for (i = 0; i < p->max_num_pes; i++) {
2009 		 stat->pestB[i] = cpu_to_be64(phb4_read_reg_asb(p, PHB_IODA_DATA0));
2010 		 stat->pestB[i] |= pPEST[2 * i + 1];
2011 	 }
2012 }
2013 
phb4_dump_peltv(struct phb4 * p)2014 static void __unused phb4_dump_peltv(struct phb4 *p)
2015 {
2016 	int stride = p->max_num_pes / 64;
2017 	uint64_t *tbl = (void *) p->tbl_peltv;
2018 	unsigned int pe;
2019 
2020 	PHBERR(p, "PELT-V: base addr: %p size: %llx (%d PEs, stride = %d)\n",
2021 			tbl, p->tbl_peltv_size, p->max_num_pes, stride);
2022 
2023 	for (pe = 0; pe < p->max_num_pes; pe++) {
2024 		unsigned int i, j;
2025 		uint64_t sum = 0;
2026 
2027 		i = pe * stride;
2028 
2029 		/*
2030 		 * Only print an entry if there's bits set in the PE's
2031 		 * PELT-V entry. There's a few hundred possible PEs and
2032 		 * generally only a handful will be in use.
2033 		 */
2034 
2035 		for (j = 0; j < stride; j++)
2036 			sum |= tbl[i + j];
2037 		if (!sum)
2038 			continue; /* unused PE, skip it */
2039 
2040 		if (p->max_num_pes == 512) {
2041 			PHBERR(p, "PELT-V[%03x] = "
2042 				"%016llx %016llx %016llx %016llx"
2043 				"%016llx %016llx %016llx %016llx\n", pe,
2044 				tbl[i + 0], tbl[i + 1], tbl[i + 2], tbl[i + 3],
2045 				tbl[i + 4], tbl[i + 5], tbl[i + 6], tbl[i + 7]);
2046 		} else if (p->max_num_pes == 256) {
2047 			PHBERR(p, "PELT-V[%03x] = "
2048 				"%016llx %016llx %016llx %016llx\n", pe,
2049 				tbl[i + 0], tbl[i + 1], tbl[i + 2], tbl[i + 3]);
2050 		}
2051 	}
2052 }
2053 
phb4_dump_ioda_table(struct phb4 * p,int table)2054 static void __unused phb4_dump_ioda_table(struct phb4 *p, int table)
2055 {
2056 	const char *name;
2057 	int entries, i;
2058 
2059 	switch (table) {
2060 	case IODA3_TBL_LIST:
2061 		name = "LIST";
2062 		entries = 8;
2063 		break;
2064 	case IODA3_TBL_MIST:
2065 		name = "MIST";
2066 		entries = 1024;
2067 		break;
2068 	case IODA3_TBL_RCAM:
2069 		name = "RCAM";
2070 		entries = 128;
2071 		break;
2072 	case IODA3_TBL_MRT:
2073 		name = "MRT";
2074 		entries = 16;
2075 		break;
2076 	case IODA3_TBL_PESTA:
2077 		name = "PESTA";
2078 		entries = 512;
2079 		break;
2080 	case IODA3_TBL_PESTB:
2081 		name = "PESTB";
2082 		entries = 512;
2083 		break;
2084 	case IODA3_TBL_TVT:
2085 		name = "TVT";
2086 		entries = 512;
2087 		break;
2088 	case IODA3_TBL_TCAM:
2089 		name = "TCAM";
2090 		entries = 1024;
2091 		break;
2092 	case IODA3_TBL_TDR:
2093 		name = "TDR";
2094 		entries = 1024;
2095 		break;
2096 	case IODA3_TBL_MBT: /* special case, see below */
2097 		name = "MBT";
2098 		entries = 64;
2099 		break;
2100 	case IODA3_TBL_MDT:
2101 		name = "MDT";
2102 		entries = 512;
2103 		break;
2104 	case IODA3_TBL_PEEV:
2105 		name = "PEEV";
2106 		entries = 8;
2107 		break;
2108 	default:
2109 		PHBERR(p, "Invalid IODA table %d!\n", table);
2110 		return;
2111 	}
2112 
2113 	PHBERR(p, "Start %s dump (only non-zero entries are printed):\n", name);
2114 
2115 	phb4_ioda_sel(p, table, 0, true);
2116 
2117 	/*
2118 	 * Each entry in the MBT is 16 bytes. Every other table has 8 byte
2119 	 * entries so we special case the MDT to keep the output readable.
2120 	 */
2121 	if (table == IODA3_TBL_MBT) {
2122 		for (i = 0; i < 32; i++) {
2123 			uint64_t v1 = phb4_read_reg_asb(p, PHB_IODA_DATA0);
2124 			uint64_t v2 = phb4_read_reg_asb(p, PHB_IODA_DATA0);
2125 
2126 			if (!v1 && !v2)
2127 				continue;
2128 			PHBERR(p, "MBT[%03x] = %016llx %016llx\n", i, v1, v2);
2129 		}
2130 	} else {
2131 		for (i = 0; i < entries; i++) {
2132 			uint64_t v = phb4_read_reg_asb(p, PHB_IODA_DATA0);
2133 
2134 			if (!v)
2135 				continue;
2136 			PHBERR(p, "%s[%03x] = %016llx\n", name, i, v);
2137 		}
2138 	}
2139 
2140 	PHBERR(p, "End %s dump\n", name);
2141 }
2142 
phb4_eeh_dump_regs(struct phb4 * p)2143 static void phb4_eeh_dump_regs(struct phb4 *p)
2144 {
2145 	struct OpalIoPhb4ErrorData *s;
2146 	uint16_t reg;
2147 	unsigned int i;
2148 
2149 	if (!verbose_eeh)
2150 		return;
2151 
2152 	s = zalloc(sizeof(struct OpalIoPhb4ErrorData));
2153 	if (!s) {
2154 		PHBERR(p, "Failed to allocate error info !\n");
2155 		return;
2156 	}
2157 	phb4_read_phb_status(p, s);
2158 
2159 	PHBERR(p, "                 brdgCtl = %08x\n", be32_to_cpu(s->brdgCtl));
2160 
2161 	/* PHB4 cfg regs */
2162 	PHBERR(p, "            deviceStatus = %08x\n", be32_to_cpu(s->deviceStatus));
2163 	PHBERR(p, "              slotStatus = %08x\n", be32_to_cpu(s->slotStatus));
2164 	PHBERR(p, "              linkStatus = %08x\n", be32_to_cpu(s->linkStatus));
2165 	PHBERR(p, "            devCmdStatus = %08x\n", be32_to_cpu(s->devCmdStatus));
2166 	PHBERR(p, "            devSecStatus = %08x\n", be32_to_cpu(s->devSecStatus));
2167 	PHBERR(p, "         rootErrorStatus = %08x\n", be32_to_cpu(s->rootErrorStatus));
2168 	PHBERR(p, "         corrErrorStatus = %08x\n", be32_to_cpu(s->corrErrorStatus));
2169 	PHBERR(p, "       uncorrErrorStatus = %08x\n", be32_to_cpu(s->uncorrErrorStatus));
2170 
2171 	/* Two non OPAL API registers that are useful */
2172 	phb4_pcicfg_read16(&p->phb, 0, p->ecap + PCICAP_EXP_DEVCTL, &reg);
2173 	PHBERR(p, "                  devctl = %08x\n", reg);
2174 	phb4_pcicfg_read16(&p->phb, 0, p->ecap + PCICAP_EXP_DEVSTAT,
2175 			   &reg);
2176 	PHBERR(p, "                 devStat = %08x\n", reg);
2177 
2178 	/* Byte swap TLP headers so they are the same as the PCIe spec */
2179 	PHBERR(p, "                 tlpHdr1 = %08x\n", cpu_to_le32(be32_to_cpu(s->tlpHdr1)));
2180 	PHBERR(p, "                 tlpHdr2 = %08x\n", cpu_to_le32(be32_to_cpu(s->tlpHdr2)));
2181 	PHBERR(p, "                 tlpHdr3 = %08x\n", cpu_to_le32(be32_to_cpu(s->tlpHdr3)));
2182 	PHBERR(p, "                 tlpHdr4 = %08x\n", cpu_to_le32(be32_to_cpu(s->tlpHdr4)));
2183 	PHBERR(p, "                sourceId = %08x\n", be32_to_cpu(s->sourceId));
2184 	PHBERR(p, "                    nFir = %016llx\n", be64_to_cpu(s->nFir));
2185 	PHBERR(p, "                nFirMask = %016llx\n", be64_to_cpu(s->nFirMask));
2186 	PHBERR(p, "                 nFirWOF = %016llx\n", be64_to_cpu(s->nFirWOF));
2187 	PHBERR(p, "                phbPlssr = %016llx\n", be64_to_cpu(s->phbPlssr));
2188 	PHBERR(p, "                  phbCsr = %016llx\n", be64_to_cpu(s->phbCsr));
2189 	PHBERR(p, "                  lemFir = %016llx\n", be64_to_cpu(s->lemFir));
2190 	PHBERR(p, "            lemErrorMask = %016llx\n", be64_to_cpu(s->lemErrorMask));
2191 	PHBERR(p, "                  lemWOF = %016llx\n", be64_to_cpu(s->lemWOF));
2192 	PHBERR(p, "          phbErrorStatus = %016llx\n", be64_to_cpu(s->phbErrorStatus));
2193 	PHBERR(p, "     phbFirstErrorStatus = %016llx\n", be64_to_cpu(s->phbFirstErrorStatus));
2194 	PHBERR(p, "            phbErrorLog0 = %016llx\n", be64_to_cpu(s->phbErrorLog0));
2195 	PHBERR(p, "            phbErrorLog1 = %016llx\n", be64_to_cpu(s->phbErrorLog1));
2196 	PHBERR(p, "       phbTxeErrorStatus = %016llx\n", be64_to_cpu(s->phbTxeErrorStatus));
2197 	PHBERR(p, "  phbTxeFirstErrorStatus = %016llx\n", be64_to_cpu(s->phbTxeFirstErrorStatus));
2198 	PHBERR(p, "         phbTxeErrorLog0 = %016llx\n", be64_to_cpu(s->phbTxeErrorLog0));
2199 	PHBERR(p, "         phbTxeErrorLog1 = %016llx\n", be64_to_cpu(s->phbTxeErrorLog1));
2200 	PHBERR(p, "    phbRxeArbErrorStatus = %016llx\n", be64_to_cpu(s->phbRxeArbErrorStatus));
2201 	PHBERR(p, "phbRxeArbFrstErrorStatus = %016llx\n", be64_to_cpu(s->phbRxeArbFirstErrorStatus));
2202 	PHBERR(p, "      phbRxeArbErrorLog0 = %016llx\n", be64_to_cpu(s->phbRxeArbErrorLog0));
2203 	PHBERR(p, "      phbRxeArbErrorLog1 = %016llx\n", be64_to_cpu(s->phbRxeArbErrorLog1));
2204 	PHBERR(p, "    phbRxeMrgErrorStatus = %016llx\n", be64_to_cpu(s->phbRxeMrgErrorStatus));
2205 	PHBERR(p, "phbRxeMrgFrstErrorStatus = %016llx\n", be64_to_cpu(s->phbRxeMrgFirstErrorStatus));
2206 	PHBERR(p, "      phbRxeMrgErrorLog0 = %016llx\n", be64_to_cpu(s->phbRxeMrgErrorLog0));
2207 	PHBERR(p, "      phbRxeMrgErrorLog1 = %016llx\n", be64_to_cpu(s->phbRxeMrgErrorLog1));
2208 	PHBERR(p, "    phbRxeTceErrorStatus = %016llx\n", be64_to_cpu(s->phbRxeTceErrorStatus));
2209 	PHBERR(p, "phbRxeTceFrstErrorStatus = %016llx\n", be64_to_cpu(s->phbRxeTceFirstErrorStatus));
2210 	PHBERR(p, "      phbRxeTceErrorLog0 = %016llx\n", be64_to_cpu(s->phbRxeTceErrorLog0));
2211 	PHBERR(p, "      phbRxeTceErrorLog1 = %016llx\n", be64_to_cpu(s->phbRxeTceErrorLog1));
2212 	PHBERR(p, "       phbPblErrorStatus = %016llx\n", be64_to_cpu(s->phbPblErrorStatus));
2213 	PHBERR(p, "  phbPblFirstErrorStatus = %016llx\n", be64_to_cpu(s->phbPblFirstErrorStatus));
2214 	PHBERR(p, "         phbPblErrorLog0 = %016llx\n", be64_to_cpu(s->phbPblErrorLog0));
2215 	PHBERR(p, "         phbPblErrorLog1 = %016llx\n", be64_to_cpu(s->phbPblErrorLog1));
2216 	PHBERR(p, "     phbPcieDlpErrorLog1 = %016llx\n", be64_to_cpu(s->phbPcieDlpErrorLog1));
2217 	PHBERR(p, "     phbPcieDlpErrorLog2 = %016llx\n", be64_to_cpu(s->phbPcieDlpErrorLog2));
2218 	PHBERR(p, "   phbPcieDlpErrorStatus = %016llx\n", be64_to_cpu(s->phbPcieDlpErrorStatus));
2219 
2220 	PHBERR(p, "      phbRegbErrorStatus = %016llx\n", be64_to_cpu(s->phbRegbErrorStatus));
2221 	PHBERR(p, " phbRegbFirstErrorStatus = %016llx\n", be64_to_cpu(s->phbRegbFirstErrorStatus));
2222 	PHBERR(p, "        phbRegbErrorLog0 = %016llx\n", be64_to_cpu(s->phbRegbErrorLog0));
2223 	PHBERR(p, "        phbRegbErrorLog1 = %016llx\n", be64_to_cpu(s->phbRegbErrorLog1));
2224 
2225 	for (i = 0; i < p->max_num_pes; i++) {
2226 		if (!s->pestA[i] && !s->pestB[i])
2227 			continue;
2228 		PHBERR(p, "               PEST[%03x] = %016llx %016llx\n",
2229 		       i, be64_to_cpu(s->pestA[i]), be64_to_cpu(s->pestB[i]));
2230 	}
2231 	free(s);
2232 }
2233 
phb4_set_pe(struct phb * phb,uint64_t pe_number,uint64_t bdfn,uint8_t bcompare,uint8_t dcompare,uint8_t fcompare,uint8_t action)2234 static int64_t phb4_set_pe(struct phb *phb,
2235 			   uint64_t pe_number,
2236 			   uint64_t bdfn,
2237 			   uint8_t bcompare,
2238 			   uint8_t dcompare,
2239 			   uint8_t fcompare,
2240 			   uint8_t action)
2241 {
2242 	struct phb4 *p = phb_to_phb4(phb);
2243 	uint64_t mask, idx;
2244 
2245 	/* Sanity check */
2246 	if (action != OPAL_MAP_PE && action != OPAL_UNMAP_PE)
2247 		return OPAL_PARAMETER;
2248 	if (pe_number >= p->num_pes || bdfn > 0xffff ||
2249 	    bcompare > OpalPciBusAll ||
2250 	    dcompare > OPAL_COMPARE_RID_DEVICE_NUMBER ||
2251 	    fcompare > OPAL_COMPARE_RID_FUNCTION_NUMBER)
2252 		return OPAL_PARAMETER;
2253 
2254 	/* match everything by default */
2255 	mask = 0;
2256 
2257 	/* Figure out the RID range */
2258 	if (bcompare != OpalPciBusAny)
2259 		mask  = ((0x1 << (bcompare + 1)) - 1) << (15 - bcompare);
2260 
2261 	if (dcompare == OPAL_COMPARE_RID_DEVICE_NUMBER)
2262 		mask |= 0xf8;
2263 
2264 	if (fcompare == OPAL_COMPARE_RID_FUNCTION_NUMBER)
2265 		mask |= 0x7;
2266 
2267 	if (action == OPAL_UNMAP_PE)
2268 		pe_number = PHB4_RESERVED_PE_NUM(p);
2269 
2270 	/* Map or unmap the RTT range */
2271 	for (idx = 0; idx < RTT_TABLE_ENTRIES; idx++)
2272 		if ((idx & mask) == (bdfn & mask))
2273 			p->tbl_rtt[idx] = cpu_to_be16(pe_number);
2274 
2275 	/* Invalidate the RID Translation Cache (RTC) inside the PHB */
2276 	out_be64(p->regs + PHB_RTC_INVALIDATE, PHB_RTC_INVALIDATE_ALL);
2277 
2278 	return OPAL_SUCCESS;
2279 }
2280 
phb4_set_peltv(struct phb * phb,uint32_t parent_pe,uint32_t child_pe,uint8_t state)2281 static int64_t phb4_set_peltv(struct phb *phb,
2282 			      uint32_t parent_pe,
2283 			      uint32_t child_pe,
2284 			      uint8_t state)
2285 {
2286 	struct phb4 *p = phb_to_phb4(phb);
2287 	uint32_t idx, mask;
2288 
2289 	/* Sanity check */
2290 	if (parent_pe >= p->num_pes || child_pe >= p->num_pes)
2291 		return OPAL_PARAMETER;
2292 
2293 	/* Find index for parent PE */
2294 	idx = parent_pe * (p->max_num_pes / 8);
2295 	idx += (child_pe / 8);
2296 	mask = 0x1 << (7 - (child_pe % 8));
2297 
2298 	if (state)
2299 		p->tbl_peltv[idx] |= mask;
2300 	else
2301 		p->tbl_peltv[idx] &= ~mask;
2302 
2303 	return OPAL_SUCCESS;
2304 }
2305 
phb4_prepare_link_change(struct pci_slot * slot,bool is_up)2306 static void phb4_prepare_link_change(struct pci_slot *slot, bool is_up)
2307 {
2308 	struct phb4 *p = phb_to_phb4(slot->phb);
2309 	uint32_t reg32;
2310 
2311 	p->has_link = is_up;
2312 
2313 	if (is_up) {
2314 		/* Clear AER receiver error status */
2315 		phb4_pcicfg_write32(&p->phb, 0, p->aercap +
2316 				    PCIECAP_AER_CE_STATUS,
2317 				    PCIECAP_AER_CE_RECVR_ERR);
2318 		/* Unmask receiver error status in AER */
2319 		phb4_pcicfg_read32(&p->phb, 0, p->aercap +
2320 				   PCIECAP_AER_CE_MASK, &reg32);
2321 		reg32 &= ~PCIECAP_AER_CE_RECVR_ERR;
2322 		phb4_pcicfg_write32(&p->phb, 0, p->aercap +
2323 				    PCIECAP_AER_CE_MASK, reg32);
2324 
2325 		/* Don't block PCI-CFG */
2326 		p->flags &= ~PHB4_CFG_BLOCKED;
2327 
2328 		/* Re-enable link down errors */
2329 		out_be64(p->regs + PHB_PCIE_MISC_STRAP,
2330 			 0x0000060000000000ull);
2331 
2332 		/* Re-enable error status indicators that trigger irqs */
2333 		out_be64(p->regs + PHB_REGB_ERR_INF_ENABLE,
2334 			 0x2130006efca8bc00ull);
2335 		out_be64(p->regs + PHB_REGB_ERR_ERC_ENABLE,
2336 			 0x0080000000000000ull);
2337 		out_be64(p->regs + PHB_REGB_ERR_FAT_ENABLE,
2338 			 0xde0fff91035743ffull);
2339 
2340 	} else {
2341 		/* Mask AER receiver error */
2342 		phb4_pcicfg_read32(&p->phb, 0, p->aercap +
2343 				   PCIECAP_AER_CE_MASK, &reg32);
2344 		reg32 |= PCIECAP_AER_CE_RECVR_ERR;
2345 		phb4_pcicfg_write32(&p->phb, 0, p->aercap +
2346 				    PCIECAP_AER_CE_MASK, reg32);
2347 
2348 		/* Clear error link enable & error link down kill enable */
2349 		out_be64(p->regs + PHB_PCIE_MISC_STRAP, 0);
2350 
2351 		/* Disable all error status indicators that trigger irqs */
2352 		out_be64(p->regs + PHB_REGB_ERR_INF_ENABLE, 0);
2353 		out_be64(p->regs + PHB_REGB_ERR_ERC_ENABLE, 0);
2354 		out_be64(p->regs + PHB_REGB_ERR_FAT_ENABLE, 0);
2355 
2356 		/* Block PCI-CFG access */
2357 		p->flags |= PHB4_CFG_BLOCKED;
2358 	}
2359 }
2360 
phb4_get_presence_state(struct pci_slot * slot,uint8_t * val)2361 static int64_t phb4_get_presence_state(struct pci_slot *slot, uint8_t *val)
2362 {
2363 	struct phb4 *p = phb_to_phb4(slot->phb);
2364 	uint64_t hps, dtctl;
2365 
2366 	/* Test for PHB in error state ? */
2367 	if (p->broken)
2368 		return OPAL_HARDWARE;
2369 
2370 	/* Check hotplug status */
2371 	hps = in_be64(p->regs + PHB_PCIE_HOTPLUG_STATUS);
2372 	if (!(hps & PHB_PCIE_HPSTAT_PRESENCE)) {
2373 		*val = OPAL_PCI_SLOT_PRESENT;
2374 	} else {
2375 		/*
2376 		 * If it says not present but link is up, then we assume
2377 		 * we are on a broken simulation environment and still
2378 		 * return a valid presence. Otherwise, not present.
2379 		 */
2380 		dtctl = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
2381 		if (dtctl & PHB_PCIE_DLP_TL_LINKACT) {
2382 			PHBERR(p, "Presence detect 0 but link set !\n");
2383 			*val = OPAL_PCI_SLOT_PRESENT;
2384 		} else {
2385 			*val = OPAL_PCI_SLOT_EMPTY;
2386 		}
2387 	}
2388 
2389 	return OPAL_SUCCESS;
2390 }
2391 
phb4_get_link_info(struct pci_slot * slot,uint8_t * speed,uint8_t * width)2392 static int64_t phb4_get_link_info(struct pci_slot *slot, uint8_t *speed,
2393 				   uint8_t *width)
2394 {
2395 	struct phb4 *p = phb_to_phb4(slot->phb);
2396 	uint64_t reg;
2397 	uint16_t state;
2398 	int64_t rc;
2399 	uint8_t s;
2400 
2401 	/* Link is up, let's find the actual speed */
2402 	reg = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
2403 	if (!(reg & PHB_PCIE_DLP_TL_LINKACT)) {
2404 		*width = 0;
2405 		if (speed)
2406 			*speed = 0;
2407 		return OPAL_SUCCESS;
2408 	}
2409 
2410 	rc = phb4_pcicfg_read16(&p->phb, 0,
2411 				p->ecap + PCICAP_EXP_LSTAT, &state);
2412 	if (rc != OPAL_SUCCESS) {
2413 		PHBERR(p, "%s: Error %lld getting link state\n", __func__, rc);
2414 		return OPAL_HARDWARE;
2415 	}
2416 
2417 	if (state & PCICAP_EXP_LSTAT_DLLL_ACT) {
2418 		*width = ((state & PCICAP_EXP_LSTAT_WIDTH) >> 4);
2419 		s =  state & PCICAP_EXP_LSTAT_SPEED;
2420 	} else {
2421 		*width = 0;
2422 		s = 0;
2423 	}
2424 
2425 	if (speed)
2426 		*speed = s;
2427 
2428 	return OPAL_SUCCESS;
2429 }
2430 
phb4_get_link_state(struct pci_slot * slot,uint8_t * val)2431 static int64_t phb4_get_link_state(struct pci_slot *slot, uint8_t *val)
2432 {
2433 	return phb4_get_link_info(slot, NULL, val);
2434 }
2435 
phb4_retry_state(struct pci_slot * slot)2436 static int64_t phb4_retry_state(struct pci_slot *slot)
2437 {
2438 	struct phb4 *p = phb_to_phb4(slot->phb);
2439 
2440 	/* Mark link as down */
2441 	phb4_prepare_link_change(slot, false);
2442 
2443 	/* Last attempt to activate link */
2444 	if (slot->link_retries == 1) {
2445 		if (slot->state == PHB4_SLOT_LINK_WAIT) {
2446 			PHBERR(p, "Falling back to GEN1 training\n");
2447 			p->max_link_speed = 1;
2448 		}
2449 	}
2450 
2451 	if (!slot->link_retries--) {
2452 		switch (slot->state) {
2453 		case PHB4_SLOT_LINK_WAIT_ELECTRICAL:
2454 			PHBERR(p, "Presence detected but no electrical link\n");
2455 			break;
2456 		case PHB4_SLOT_LINK_WAIT:
2457 			PHBERR(p, "Electrical link detected but won't train\n");
2458 			break;
2459 		case PHB4_SLOT_LINK_STABLE:
2460 			PHBERR(p, "Linked trained but was degraded or unstable\n");
2461 			break;
2462 		default:
2463 			PHBERR(p, "Unknown link issue\n");
2464 		}
2465 		return OPAL_HARDWARE;
2466 	}
2467 
2468 	pci_slot_set_state(slot, PHB4_SLOT_CRESET_START);
2469 	return pci_slot_set_sm_timeout(slot, msecs_to_tb(1));
2470 }
2471 
phb4_train_info(struct phb4 * p,uint64_t reg,unsigned long dt)2472 static uint64_t phb4_train_info(struct phb4 *p, uint64_t reg, unsigned long dt)
2473 {
2474 	uint64_t ltssm_state = GETFIELD(PHB_PCIE_DLP_LTSSM_TRC, reg);
2475 	char s[80];
2476 
2477 	snprintf(s, sizeof(s), "TRACE:0x%016llx % 2lims",
2478 		 reg, tb_to_msecs(dt));
2479 
2480 	if (reg & PHB_PCIE_DLP_TL_LINKACT)
2481 		snprintf(s, sizeof(s), "%s trained ", s);
2482 	else if (reg & PHB_PCIE_DLP_TRAINING)
2483 		snprintf(s, sizeof(s), "%s training", s);
2484 	else if (reg & PHB_PCIE_DLP_INBAND_PRESENCE)
2485 		snprintf(s, sizeof(s), "%s presence", s);
2486 	else
2487 		snprintf(s, sizeof(s), "%s         ", s);
2488 
2489 	snprintf(s, sizeof(s), "%s GEN%lli:x%02lli:", s,
2490 		 GETFIELD(PHB_PCIE_DLP_LINK_SPEED, reg),
2491 		 GETFIELD(PHB_PCIE_DLP_LINK_WIDTH, reg));
2492 
2493 	switch (ltssm_state) {
2494 	case PHB_PCIE_DLP_LTSSM_RESET:
2495 		snprintf(s, sizeof(s), "%sreset", s);
2496 		break;
2497 	case PHB_PCIE_DLP_LTSSM_DETECT:
2498 		snprintf(s, sizeof(s), "%sdetect", s);
2499 		break;
2500 	case PHB_PCIE_DLP_LTSSM_POLLING:
2501 		snprintf(s, sizeof(s), "%spolling", s);
2502 		break;
2503 	case PHB_PCIE_DLP_LTSSM_CONFIG:
2504 		snprintf(s, sizeof(s), "%sconfig", s);
2505 		break;
2506 	case PHB_PCIE_DLP_LTSSM_L0:
2507 		snprintf(s, sizeof(s), "%sL0", s);
2508 		break;
2509 	case PHB_PCIE_DLP_LTSSM_REC:
2510 		snprintf(s, sizeof(s), "%srecovery", s);
2511 		break;
2512 	case PHB_PCIE_DLP_LTSSM_L1:
2513 		snprintf(s, sizeof(s), "%sL1", s);
2514 		break;
2515 	case PHB_PCIE_DLP_LTSSM_L2:
2516 		snprintf(s, sizeof(s), "%sL2", s);
2517 		break;
2518 	case PHB_PCIE_DLP_LTSSM_HOTRESET:
2519 		snprintf(s, sizeof(s), "%shotreset", s);
2520 		break;
2521 	case PHB_PCIE_DLP_LTSSM_DISABLED:
2522 		snprintf(s, sizeof(s), "%sdisabled", s);
2523 		break;
2524 	case PHB_PCIE_DLP_LTSSM_LOOPBACK:
2525 		snprintf(s, sizeof(s), "%sloopback", s);
2526 		break;
2527 	default:
2528 		snprintf(s, sizeof(s), "%sunvalid", s);
2529 	}
2530 	PHBNOTICE(p, "%s\n", s);
2531 
2532 	return ltssm_state;
2533 }
2534 
phb4_dump_pec_err_regs(struct phb4 * p)2535 static void phb4_dump_pec_err_regs(struct phb4 *p)
2536 {
2537 	uint64_t nfir_p_wof, nfir_n_wof, err_aib;
2538 	uint64_t err_rpt0, err_rpt1;
2539 
2540 	/* Read the PCI and NEST FIRs and dump them. Also cache PCI/NEST FIRs */
2541 	xscom_read(p->chip_id,
2542 		   p->pci_stk_xscom + XPEC_PCI_STK_PCI_FIR,  &p->pfir_cache);
2543 	xscom_read(p->chip_id,
2544 		   p->pci_stk_xscom + XPEC_PCI_STK_PCI_FIR_WOF, &nfir_p_wof);
2545 	xscom_read(p->chip_id,
2546 		   p->pe_stk_xscom + XPEC_NEST_STK_PCI_NFIR, &p->nfir_cache);
2547 	xscom_read(p->chip_id,
2548 		   p->pe_stk_xscom + XPEC_NEST_STK_PCI_NFIR_WOF, &nfir_n_wof);
2549 	xscom_read(p->chip_id,
2550 		   p->pe_stk_xscom + XPEC_NEST_STK_ERR_RPT0, &err_rpt0);
2551 	xscom_read(p->chip_id,
2552 		   p->pe_stk_xscom + XPEC_NEST_STK_ERR_RPT1, &err_rpt1);
2553 	xscom_read(p->chip_id,
2554 		   p->pci_stk_xscom + XPEC_PCI_STK_PBAIB_ERR_REPORT, &err_aib);
2555 
2556 	PHBERR(p, "            PCI FIR=%016llx\n", p->pfir_cache);
2557 	PHBERR(p, "        PCI FIR WOF=%016llx\n", nfir_p_wof);
2558 	PHBERR(p, "           NEST FIR=%016llx\n", p->nfir_cache);
2559 	PHBERR(p, "       NEST FIR WOF=%016llx\n", nfir_n_wof);
2560 	PHBERR(p, "           ERR RPT0=%016llx\n", err_rpt0);
2561 	PHBERR(p, "           ERR RPT1=%016llx\n", err_rpt1);
2562 	PHBERR(p, "            AIB ERR=%016llx\n", err_aib);
2563 }
2564 
phb4_dump_capp_err_regs(struct phb4 * p)2565 static void phb4_dump_capp_err_regs(struct phb4 *p)
2566 {
2567 	uint64_t fir, apc_master_err, snoop_err, transport_err;
2568 	uint64_t tlbi_err, capp_err_status;
2569 	uint64_t offset = PHB4_CAPP_REG_OFFSET(p);
2570 
2571 	xscom_read(p->chip_id, CAPP_FIR + offset, &fir);
2572 	xscom_read(p->chip_id, CAPP_APC_MASTER_ERR_RPT + offset,
2573 		   &apc_master_err);
2574 	xscom_read(p->chip_id, CAPP_SNOOP_ERR_RTP + offset, &snoop_err);
2575 	xscom_read(p->chip_id, CAPP_TRANSPORT_ERR_RPT + offset, &transport_err);
2576 	xscom_read(p->chip_id, CAPP_TLBI_ERR_RPT + offset, &tlbi_err);
2577 	xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, &capp_err_status);
2578 
2579 	PHBERR(p, "           CAPP FIR=%016llx\n", fir);
2580 	PHBERR(p, "CAPP APC MASTER ERR=%016llx\n", apc_master_err);
2581 	PHBERR(p, "     CAPP SNOOP ERR=%016llx\n", snoop_err);
2582 	PHBERR(p, " CAPP TRANSPORT ERR=%016llx\n", transport_err);
2583 	PHBERR(p, "      CAPP TLBI ERR=%016llx\n", tlbi_err);
2584 	PHBERR(p, "    CAPP ERR STATUS=%016llx\n", capp_err_status);
2585 }
2586 
2587 /* Check if AIB is fenced via PBCQ NFIR */
phb4_fenced(struct phb4 * p)2588 static bool phb4_fenced(struct phb4 *p)
2589 {
2590 
2591 	/* Already fenced ? */
2592 	if (p->flags & PHB4_AIB_FENCED)
2593 		return true;
2594 
2595 	/*
2596 	 * An all 1's from the PHB indicates a PHB freeze/fence. We
2597 	 * don't really differenciate them at this point.
2598 	 */
2599 	if (in_be64(p->regs + PHB_CPU_LOADSTORE_STATUS)!= 0xfffffffffffffffful)
2600 		return false;
2601 
2602 	/* Mark ourselves fenced */
2603 	p->flags |= PHB4_AIB_FENCED;
2604 
2605 	PHBERR(p, "PHB Freeze/Fence detected !\n");
2606 	phb4_dump_pec_err_regs(p);
2607 
2608 	/*
2609 	 * dump capp error registers in case phb was fenced due to capp.
2610 	 * Expect p->nfir_cache already updated in phb4_dump_pec_err_regs()
2611 	 */
2612 	if (p->nfir_cache & XPEC_NEST_STK_PCI_NFIR_CXA_PE_CAPP)
2613 		phb4_dump_capp_err_regs(p);
2614 
2615 	phb4_eeh_dump_regs(p);
2616 
2617 	return true;
2618 }
2619 
phb4_check_reg(struct phb4 * p,uint64_t reg)2620 static bool phb4_check_reg(struct phb4 *p, uint64_t reg)
2621 {
2622 	if (reg == 0xffffffffffffffffUL)
2623 		return !phb4_fenced(p);
2624 	return true;
2625 }
2626 
phb4_get_info(struct phb * phb,uint16_t bdfn,uint8_t * speed,uint8_t * width)2627 static void phb4_get_info(struct phb *phb, uint16_t bdfn, uint8_t *speed,
2628 			  uint8_t *width)
2629 {
2630 	int32_t ecap;
2631 	uint32_t cap;
2632 
2633 	ecap = pci_find_cap(phb, bdfn, PCI_CFG_CAP_ID_EXP);
2634 	pci_cfg_read32(phb, bdfn, ecap + PCICAP_EXP_LCAP, &cap);
2635 	*width = (cap & PCICAP_EXP_LCAP_MAXWDTH) >> 4;
2636 	*speed = cap & PCICAP_EXP_LCAP_MAXSPD;
2637 }
2638 
2639 #define PVR_POWER9_CUMULUS		0x00002000
2640 
phb4_chip_retry_workaround(void)2641 static bool phb4_chip_retry_workaround(void)
2642 {
2643 	unsigned int pvr;
2644 
2645 	if (pci_retry_all)
2646 		return true;
2647 
2648 	/* Chips that need this retry are:
2649 	 *  - CUMULUS DD1.0
2650 	 *  - NIMBUS DD2.0 (and DD1.0, but it is unsupported so no check).
2651 	 */
2652 	pvr = mfspr(SPR_PVR);
2653 	if (pvr & PVR_POWER9_CUMULUS) {
2654 		if ((PVR_VERS_MAJ(pvr) == 1) && (PVR_VERS_MIN(pvr) == 0))
2655 			return true;
2656 	} else { /* NIMBUS */
2657 		if ((PVR_VERS_MAJ(pvr) == 2) && (PVR_VERS_MIN(pvr) == 0))
2658 			return true;
2659 	}
2660 	return false;
2661 }
2662 
2663 struct pci_card_id {
2664 	uint16_t vendor;
2665 	uint16_t device;
2666 };
2667 
2668 static struct pci_card_id retry_allowlist[] = {
2669 	{ 0x1000, 0x005d }, /* LSI Logic MegaRAID SAS-3 3108 */
2670 	{ 0x1000, 0x00c9 }, /* LSI MPT SAS-3 */
2671 	{ 0x104c, 0x8241 }, /* TI xHCI USB */
2672 	{ 0x1077, 0x2261 }, /* QLogic ISP2722-based 16/32Gb FC */
2673 	{ 0x10b5, 0x8725 }, /* PLX Switch: p9dsu, witherspoon */
2674 	{ 0x10b5, 0x8748 }, /* PLX Switch: ZZ */
2675 	{ 0x11f8, 0xf117 }, /* PMC-Sierra/MicroSemi NV1604 */
2676 	{ 0x15b3, 0x1013 }, /* Mellanox ConnectX-4 */
2677 	{ 0x15b3, 0x1017 }, /* Mellanox ConnectX-5 */
2678 	{ 0x15b3, 0x1019 }, /* Mellanox ConnectX-5 Ex */
2679 	{ 0x1a03, 0x1150 }, /* ASPEED AST2500 Switch */
2680 	{ 0x8086, 0x10fb }, /* Intel x520 10G Eth */
2681 	{ 0x9005, 0x028d }, /* MicroSemi PM8069 */
2682 };
2683 
2684 #define VENDOR(vdid) ((vdid) & 0xffff)
2685 #define DEVICE(vdid) (((vdid) >> 16) & 0xffff)
2686 
phb4_adapter_in_allowlist(uint32_t vdid)2687 static bool phb4_adapter_in_allowlist(uint32_t vdid)
2688 {
2689 	int i;
2690 
2691 	if (pci_retry_all)
2692 		return true;
2693 
2694 	for (i = 0; i < ARRAY_SIZE(retry_allowlist); i++)
2695 		if ((retry_allowlist[i].vendor == VENDOR(vdid)) &&
2696 		    (retry_allowlist[i].device == DEVICE(vdid)))
2697 			return true;
2698 
2699 	return false;
2700 }
2701 
2702 static struct pci_card_id lane_eq_disable[] = {
2703 	{ 0x10de, 0x17fd }, /* Nvidia GM200GL [Tesla M40] */
2704 	{ 0x10de, 0x1db4 }, /* Nvidia GV100 */
2705 };
2706 
phb4_lane_eq_retry_allowlist(uint32_t vdid)2707 static bool phb4_lane_eq_retry_allowlist(uint32_t vdid)
2708 {
2709 	int i;
2710 
2711 	for (i = 0; i < ARRAY_SIZE(lane_eq_disable); i++)
2712 		if ((lane_eq_disable[i].vendor == VENDOR(vdid)) &&
2713 		    (lane_eq_disable[i].device == DEVICE(vdid)))
2714 			return true;
2715 	return false;
2716 }
2717 
phb4_lane_eq_change(struct phb4 * p,uint32_t vdid)2718 static void phb4_lane_eq_change(struct phb4 *p, uint32_t vdid)
2719 {
2720 	p->lane_eq_en = !phb4_lane_eq_retry_allowlist(vdid);
2721 }
2722 
phb4_link_optimal(struct pci_slot * slot,uint32_t * vdid)2723 static bool phb4_link_optimal(struct pci_slot *slot, uint32_t *vdid)
2724 {
2725 	struct phb4 *p = phb_to_phb4(slot->phb);
2726 	uint64_t reg;
2727 	uint32_t id;
2728 	uint16_t bdfn, lane_errs;
2729 	uint8_t trained_speed, dev_speed, target_speed, rx_errs;
2730 	uint8_t trained_width, dev_width, target_width;
2731 	bool optimal_speed, optimal_width, optimal, retry_enabled, rx_err_ok;
2732 
2733 
2734 	/* Current trained state */
2735 	phb4_get_link_info(slot, &trained_speed, &trained_width);
2736 
2737 	/* Get device capability */
2738 	bdfn = 0x0100; /* bus=1 dev=0 device=0 */
2739 	/* Since this is the first access, we need to wait for CRS */
2740 	if (!pci_wait_crs(slot->phb, bdfn , &id))
2741 		return true;
2742 	phb4_get_info(slot->phb, bdfn, &dev_speed, &dev_width);
2743 
2744 	/* Work out if we are optimally trained */
2745 	target_speed = MIN(p->max_link_speed, dev_speed);
2746 	optimal_speed = (trained_speed >= target_speed);
2747 	target_width = MIN(p->max_link_width, dev_width);
2748 	optimal_width = (trained_width >= target_width);
2749 	optimal = optimal_width && optimal_speed;
2750 	retry_enabled = (phb4_chip_retry_workaround() &&
2751 			 phb4_adapter_in_allowlist(id)) ||
2752 		phb4_lane_eq_retry_allowlist(id);
2753 	reg = in_be64(p->regs + PHB_PCIE_DLP_ERR_COUNTERS);
2754 	rx_errs =  GETFIELD(PHB_PCIE_DLP_RX_ERR_CNT, reg);
2755 	rx_err_ok = (rx_errs < rx_err_max);
2756 	reg = in_be64(p->regs + PHB_PCIE_DLP_ERR_STATUS);
2757 	lane_errs = GETFIELD(PHB_PCIE_DLP_LANE_ERR, reg);
2758 
2759 	PHBDBG(p, "LINK: Card [%04x:%04x] %s Retry:%s\n", VENDOR(id),
2760 	       DEVICE(id), optimal ? "Optimal" : "Degraded",
2761 	       retry_enabled ? "enabled" : "disabled");
2762 	PHBDBG(p, "LINK: Speed Train:GEN%i PHB:GEN%i DEV:GEN%i%s\n",
2763 	       trained_speed, p->max_link_speed, dev_speed,
2764 	       optimal_speed ? "" : " *");
2765 	PHBDBG(p, "LINK: Width Train:x%02i PHB:x%02i DEV:x%02i%s\n",
2766 	       trained_width, p->max_link_width, dev_width,
2767 	       optimal_width ? "" : " *");
2768 	PHBDBG(p, "LINK: RX Errors Now:%i Max:%i Lane:0x%04x%s\n",
2769 	       rx_errs, rx_err_max, lane_errs, rx_err_ok ? "" : " *");
2770 
2771 	if (vdid)
2772 		*vdid = id;
2773 
2774 	/* Always do RX error retry irrespective of chip and card */
2775 	if (!rx_err_ok)
2776 		return false;
2777 
2778 	if (!retry_enabled)
2779 		return true;
2780 
2781 	return optimal;
2782 }
2783 
2784 /*
2785  * This is a trace function to watch what's happening duing pcie link
2786  * training.  If any errors are detected it simply returns so the
2787  * normal code can deal with it.
2788  */
phb4_link_trace(struct phb4 * p,uint64_t target_state,int max_ms)2789 static void phb4_link_trace(struct phb4 *p, uint64_t target_state, int max_ms)
2790 {
2791 	unsigned long now, end, start = mftb(), state = 0;
2792 	uint64_t trwctl, reg, reglast = -1;
2793 	bool enabled;
2794 
2795 	/*
2796 	 * Enable the DLP trace outputs. If we don't the LTSSM state in
2797 	 * PHB_PCIE_DLP_TRAIN_CTL won't be updated and always reads zero.
2798 	 */
2799 	trwctl = phb4_read_reg(p, PHB_PCIE_DLP_TRWCTL);
2800 	enabled = !!(trwctl & PHB_PCIE_DLP_TRWCTL_EN);
2801 	if (!enabled) {
2802 		phb4_write_reg(p, PHB_PCIE_DLP_TRWCTL,
2803 				trwctl | PHB_PCIE_DLP_TRWCTL_EN);
2804 	}
2805 
2806 	end = start + msecs_to_tb(max_ms);
2807 	now = start;
2808 
2809 	do {
2810 		reg = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
2811 		if (reg != reglast)
2812 			state = phb4_train_info(p, reg, now - start);
2813 		reglast = reg;
2814 
2815 		if (!phb4_check_reg(p, reg)) {
2816 			PHBNOTICE(p, "TRACE: PHB fenced.\n");
2817 			goto out;
2818 		}
2819 
2820 		if (tb_compare(now, end) == TB_AAFTERB) {
2821 			PHBNOTICE(p, "TRACE: Timed out after %dms\n", max_ms);
2822 			goto out;
2823 		}
2824 
2825 		now = mftb();
2826 	} while (state != target_state);
2827 
2828 	PHBNOTICE(p, "TRACE: Reached target state\n");
2829 
2830 out:
2831 	/*
2832 	 * The trace enable bit is a clock gate for the tracing logic. Turn
2833 	 * it off to save power if we're not using it otherwise.
2834 	 */
2835 	if (!enabled)
2836 		phb4_write_reg(p, PHB_PCIE_DLP_TRWCTL, trwctl);
2837 }
2838 
2839 /*
2840  * This helper is called repeatedly by the host sync notifier mechanism, which
2841  * relies on the kernel to regularly poll the OPAL_SYNC_HOST_REBOOT call as it
2842  * shuts down.
2843  */
phb4_host_sync_reset(void * data)2844 static bool phb4_host_sync_reset(void *data)
2845 {
2846 	struct phb4 *p = (struct phb4 *)data;
2847 	struct phb *phb = &p->phb;
2848 	int64_t rc = 0;
2849 
2850 	/* Make sure no-one modifies the phb flags while we are active */
2851 	phb_lock(phb);
2852 
2853 	/* Make sure CAPP is attached to the PHB */
2854 	if (p->capp)
2855 		/* Call phb ops to disable capi */
2856 		rc = phb->ops->set_capi_mode(phb, OPAL_PHB_CAPI_MODE_PCIE,
2857 				       p->capp->attached_pe);
2858 	else
2859 		rc = OPAL_SUCCESS;
2860 
2861 	/* Continue kicking state-machine if in middle of a mode transition */
2862 	if (rc == OPAL_BUSY)
2863 		rc = phb->slot->ops.run_sm(phb->slot);
2864 
2865 	phb_unlock(phb);
2866 
2867 	return rc <= OPAL_SUCCESS;
2868 }
2869 
2870 /*
2871  * Notification from the pci-core that a pci slot state machine completed.
2872  * We use this callback to mark the CAPP disabled if we were waiting for it.
2873  */
phb4_slot_sm_run_completed(struct pci_slot * slot,uint64_t err)2874 static int64_t phb4_slot_sm_run_completed(struct pci_slot *slot, uint64_t err)
2875 {
2876 	struct phb4 *p = phb_to_phb4(slot->phb);
2877 
2878 	/* Check if we are disabling the capp */
2879 	if (p->flags & PHB4_CAPP_DISABLE) {
2880 
2881 		/* Unset struct capp so that we dont fall into a creset loop */
2882 		p->flags &= ~(PHB4_CAPP_DISABLE);
2883 		p->capp->phb = NULL;
2884 		p->capp->attached_pe = phb4_get_reserved_pe_number(&p->phb);
2885 
2886 		/* Remove the host sync notifier is we are done.*/
2887 		opal_del_host_sync_notifier(phb4_host_sync_reset, p);
2888 		if (err) {
2889 			/* Force a CEC ipl reboot */
2890 			disable_fast_reboot("CAPP: reset failed");
2891 			PHBERR(p, "CAPP: Unable to reset. Error=%lld\n", err);
2892 		} else {
2893 			PHBINF(p, "CAPP: reset complete\n");
2894 		}
2895 	}
2896 
2897 	return OPAL_SUCCESS;
2898 }
2899 
phb4_poll_link(struct pci_slot * slot)2900 static int64_t phb4_poll_link(struct pci_slot *slot)
2901 {
2902 	struct phb4 *p = phb_to_phb4(slot->phb);
2903 	uint64_t reg;
2904 	uint32_t vdid;
2905 
2906 	switch (slot->state) {
2907 	case PHB4_SLOT_NORMAL:
2908 	case PHB4_SLOT_LINK_START:
2909 		PHBDBG(p, "LINK: Start polling\n");
2910 		slot->retries = PHB4_LINK_ELECTRICAL_RETRIES;
2911 		pci_slot_set_state(slot, PHB4_SLOT_LINK_WAIT_ELECTRICAL);
2912 		/* Polling early here has no chance of a false positive */
2913 		return pci_slot_set_sm_timeout(slot, msecs_to_tb(1));
2914 	case PHB4_SLOT_LINK_WAIT_ELECTRICAL:
2915 		/*
2916 		 * Wait for the link electrical connection to be
2917 		 * established (shorter timeout). This allows us to
2918 		 * workaround spurrious presence detect on some machines
2919 		 * without waiting 10s each time
2920 		 *
2921 		 * Note: We *also* check for the full link up bit here
2922 		 * because simics doesn't seem to implement the electrical
2923 		 * link bit at all
2924 		 */
2925 		reg = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
2926 		if (!phb4_check_reg(p, reg)) {
2927 			PHBERR(p, "PHB fence waiting for electrical link\n");
2928 			return phb4_retry_state(slot);
2929 		}
2930 
2931 		if (reg & (PHB_PCIE_DLP_INBAND_PRESENCE |
2932 			   PHB_PCIE_DLP_TL_LINKACT)) {
2933 			PHBDBG(p, "LINK: Electrical link detected\n");
2934 			pci_slot_set_state(slot, PHB4_SLOT_LINK_WAIT);
2935 			slot->retries = PHB4_LINK_WAIT_RETRIES;
2936 			/* No wait here since already have an elec link */
2937 			return pci_slot_set_sm_timeout(slot, msecs_to_tb(1));
2938 		}
2939 
2940 		if (slot->retries-- == 0) {
2941 			PHBDBG(p, "LINK: No in-band presence\n");
2942 			return OPAL_SUCCESS;
2943 		}
2944 		/* Retry */
2945 		return pci_slot_set_sm_timeout(slot, msecs_to_tb(10));
2946 	case PHB4_SLOT_LINK_WAIT:
2947 		reg = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
2948 		if (!phb4_check_reg(p, reg)) {
2949 			PHBERR(p, "LINK: PHB fence waiting for link training\n");
2950 			return phb4_retry_state(slot);
2951 		}
2952 		if (reg & PHB_PCIE_DLP_TL_LINKACT) {
2953 			PHBDBG(p, "LINK: Link is up\n");
2954 			phb4_prepare_link_change(slot, true);
2955 			pci_slot_set_state(slot, PHB4_SLOT_LINK_STABLE);
2956 			return pci_slot_set_sm_timeout(slot, secs_to_tb(1));
2957 		}
2958 
2959 		if (slot->retries-- == 0) {
2960 			PHBERR(p, "LINK: Timeout waiting for link up\n");
2961 			PHBDBG(p, "LINK: DLP train control: 0x%016llx\n", reg);
2962 			return phb4_retry_state(slot);
2963 		}
2964 		/* Retry */
2965 		return pci_slot_set_sm_timeout(slot, msecs_to_tb(10));
2966 	case PHB4_SLOT_LINK_STABLE:
2967 		/* Sanity check link */
2968 		if (phb4_fenced(p)) {
2969 			PHBERR(p, "LINK: PHB fenced waiting for stabilty\n");
2970 			return phb4_retry_state(slot);
2971 		}
2972 		reg = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
2973 		if (!phb4_check_reg(p, reg)) {
2974 			PHBERR(p, "LINK: PHB fence reading training control\n");
2975 			return phb4_retry_state(slot);
2976 		}
2977 		if (reg & PHB_PCIE_DLP_TL_LINKACT) {
2978 			PHBDBG(p, "LINK: Link is stable\n");
2979 			if (!phb4_link_optimal(slot, &vdid)) {
2980 				PHBDBG(p, "LINK: Link degraded\n");
2981 				if (slot->link_retries) {
2982 					phb4_lane_eq_change(p, vdid);
2983 					return phb4_retry_state(slot);
2984 				}
2985 				/*
2986 				 * Link is degraded but no more retries, so
2987 				 * settle for what we have :-(
2988 				 */
2989 				PHBERR(p, "LINK: Degraded but no more retries\n");
2990 			}
2991 			pci_restore_slot_bus_configs(slot);
2992 			pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
2993 			return OPAL_SUCCESS;
2994 		}
2995 		PHBERR(p, "LINK: Went down waiting for stabilty\n");
2996 		PHBDBG(p, "LINK: DLP train control: 0x%016llx\n", reg);
2997 		return phb4_retry_state(slot);
2998 	default:
2999 		PHBERR(p, "LINK: Unexpected slot state %08x\n",
3000 		       slot->state);
3001 	}
3002 
3003 	pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
3004 	return OPAL_HARDWARE;
3005 }
3006 
phb4_get_max_link_speed(struct phb4 * p,struct dt_node * np)3007 static unsigned int phb4_get_max_link_speed(struct phb4 *p, struct dt_node *np)
3008 {
3009 	unsigned int max_link_speed, hw_max_link_speed;
3010 	struct proc_chip *chip;
3011 	chip = get_chip(p->chip_id);
3012 
3013 	hw_max_link_speed = 4;
3014 	if (is_phb5() && (p->index == 0 || p->index == 3))
3015 		hw_max_link_speed = 5;
3016 
3017 	/* Priority order: NVRAM -> dt -> GEN3 dd2.00 -> hw default */
3018 	max_link_speed = hw_max_link_speed;
3019 	if (p->rev == PHB4_REV_NIMBUS_DD20 &&
3020 	    ((0xf & chip->ec_level) == 0) && chip->ec_rev == 0)
3021 		max_link_speed = 3;
3022 	if (np) {
3023 		if (dt_has_node_property(np, "ibm,max-link-speed", NULL)) {
3024 			max_link_speed = dt_prop_get_u32(np, "ibm,max-link-speed");
3025 			p->dt_max_link_speed = max_link_speed;
3026 		}
3027 		else {
3028 			p->dt_max_link_speed = 0;
3029 		}
3030 	}
3031 	else {
3032 		if (p->dt_max_link_speed > 0) {
3033 			max_link_speed = p->dt_max_link_speed;
3034 		}
3035 	}
3036 	if (pcie_max_link_speed)
3037 		max_link_speed = pcie_max_link_speed;
3038 	if (max_link_speed > hw_max_link_speed)
3039 		max_link_speed = hw_max_link_speed;
3040 
3041 	return max_link_speed;
3042 }
3043 
__phb4_get_max_link_width(struct phb4 * p)3044 static unsigned int __phb4_get_max_link_width(struct phb4 *p)
3045 {
3046 	uint64_t addr, reg;
3047 	unsigned int lane_config, width = 16;
3048 
3049 	/*
3050 	 * On P9, only PEC2 is configurable (no-/bi-/tri-furcation)
3051 	 */
3052 	switch (p->pec) {
3053 	case 0:
3054 		width = 16;
3055 		break;
3056 	case 1:
3057 		width = 8;
3058 		break;
3059 	case 2:
3060 		addr = XPEC_P9_PCI_CPLT_CONF1 + 2 * XPEC_PCI_CPLT_OFFSET;
3061 		xscom_read(p->chip_id, addr, &reg);
3062 		lane_config = GETFIELD(XPEC_P9_PCI_LANE_CFG, reg);
3063 
3064 		if (lane_config == 0b10 && p->index >= 4)
3065 			width = 4;
3066 		else
3067 			width = 8;
3068 	}
3069 	return width;
3070 }
3071 
__phb5_get_max_link_width(struct phb4 * p)3072 static unsigned int __phb5_get_max_link_width(struct phb4 *p)
3073 {
3074 	uint64_t addr, reg;
3075 	unsigned int lane_config, width = 16;
3076 
3077 	/*
3078 	 * On P10, the 2 PECs are identical and each can have a
3079 	 * different furcation, so we always need to check the PEC
3080 	 * config
3081 	 */
3082 	addr = XPEC_P10_PCI_CPLT_CONF1 + p->pec * XPEC_PCI_CPLT_OFFSET;
3083 	xscom_read(p->chip_id, addr, &reg);
3084 	lane_config = GETFIELD(XPEC_P10_PCI_LANE_CFG, reg);
3085 
3086 	switch (lane_config) {
3087 	case 0b00:
3088 		width = 16;
3089 		break;
3090 	case 0b01:
3091 		width = 8;
3092 		break;
3093 	case 0b10:
3094 		if (p->index == 0 || p->index == 3)
3095 			width = 8;
3096 		else
3097 			width = 4;
3098 		break;
3099 	default:
3100 		PHBERR(p, "Unexpected PEC lane config value %#x\n",
3101 		       lane_config);
3102 	}
3103 	return width;
3104 }
3105 
phb4_get_max_link_width(struct phb4 * p)3106 static unsigned int phb4_get_max_link_width(struct phb4 *p)
3107 {
3108 	if (is_phb5())
3109 		return __phb5_get_max_link_width(p);
3110 	else
3111 		return __phb4_get_max_link_width(p);
3112 }
3113 
phb4_assert_perst(struct pci_slot * slot,bool assert)3114 static void phb4_assert_perst(struct pci_slot *slot, bool assert)
3115 {
3116 	struct phb4 *p = phb_to_phb4(slot->phb);
3117 	uint16_t linkctl;
3118 	uint64_t reg;
3119 
3120 	/*
3121 	 * Disable the link before asserting PERST. The Cursed RAID card
3122 	 * in ozrom1 (9005:028c) has problems coming back if PERST is asserted
3123 	 * while link is active. To work around the problem we assert the link
3124 	 * disable bit before asserting PERST. Asserting the secondary reset
3125 	 * bit in the btctl register also works.
3126 	 */
3127 	phb4_pcicfg_read16(&p->phb, 0, p->ecap + PCICAP_EXP_LCTL, &linkctl);
3128 	reg = phb4_read_reg(p, PHB_PCIE_CRESET);
3129 
3130 	if (assert) {
3131 		linkctl |= PCICAP_EXP_LCTL_LINK_DIS;
3132 		reg &= ~PHB_PCIE_CRESET_PERST_N;
3133 	} else {
3134 		linkctl &= ~PCICAP_EXP_LCTL_LINK_DIS;
3135 		reg |= PHB_PCIE_CRESET_PERST_N;
3136 	}
3137 
3138 	phb4_write_reg(p, PHB_PCIE_CRESET, reg);
3139 	phb4_pcicfg_write16(&p->phb, 0, p->ecap + PCICAP_EXP_LCTL, linkctl);
3140 }
3141 
set_sys_disable_detect(struct phb4 * p,bool set)3142 static void set_sys_disable_detect(struct phb4 *p, bool set)
3143 {
3144 	uint64_t val;
3145 
3146 	val = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
3147 	if (set)
3148 		val |= PHB_PCIE_DLP_SYS_DISABLEDETECT;
3149 	else
3150 		val &= ~PHB_PCIE_DLP_SYS_DISABLEDETECT;
3151 	out_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL, val);
3152 }
3153 
phb4_hreset(struct pci_slot * slot)3154 static int64_t phb4_hreset(struct pci_slot *slot)
3155 {
3156 	struct phb4 *p = phb_to_phb4(slot->phb);
3157 	uint16_t brctl;
3158 	uint8_t presence = 1;
3159 
3160 	switch (slot->state) {
3161 	case PHB4_SLOT_NORMAL:
3162 		PHBDBG(p, "HRESET: Starts\n");
3163 		if (slot->ops.get_presence_state)
3164 			slot->ops.get_presence_state(slot, &presence);
3165 		if (!presence) {
3166 			PHBDBG(p, "HRESET: No device\n");
3167 			return OPAL_SUCCESS;
3168 		}
3169 
3170 		/* circumvention for HW551382 */
3171 		if (is_phb5()) {
3172 			PHBINF(p, "HRESET: Workaround for HW551382\n");
3173 			set_sys_disable_detect(p, true);
3174 		}
3175 
3176 		PHBDBG(p, "HRESET: Prepare for link down\n");
3177 		phb4_prepare_link_change(slot, false);
3178 		/* fall through */
3179 	case PHB4_SLOT_HRESET_START:
3180 		PHBDBG(p, "HRESET: Assert\n");
3181 
3182 		phb4_pcicfg_read16(&p->phb, 0, PCI_CFG_BRCTL, &brctl);
3183 		brctl |= PCI_CFG_BRCTL_SECONDARY_RESET;
3184 		phb4_pcicfg_write16(&p->phb, 0, PCI_CFG_BRCTL, brctl);
3185 		pci_slot_set_state(slot, PHB4_SLOT_HRESET_DELAY);
3186 
3187 		return pci_slot_set_sm_timeout(slot, secs_to_tb(1));
3188 	case PHB4_SLOT_HRESET_DELAY:
3189 		PHBDBG(p, "HRESET: Deassert\n");
3190 
3191 		/* Clear link errors before we deassert reset */
3192 		phb4_err_clear_regb(p);
3193 
3194 		phb4_pcicfg_read16(&p->phb, 0, PCI_CFG_BRCTL, &brctl);
3195 		brctl &= ~PCI_CFG_BRCTL_SECONDARY_RESET;
3196 		phb4_pcicfg_write16(&p->phb, 0, PCI_CFG_BRCTL, brctl);
3197 
3198 		/*
3199 		 * Due to some oddball adapters bouncing the link
3200 		 * training a couple of times, we wait for a full second
3201 		 * before we start checking the link status, otherwise
3202 		 * we can get a spurrious link down interrupt which
3203 		 * causes us to EEH immediately.
3204 		 */
3205 		pci_slot_set_state(slot, PHB4_SLOT_HRESET_DELAY2);
3206 		return pci_slot_set_sm_timeout(slot, secs_to_tb(1));
3207 	case PHB4_SLOT_HRESET_DELAY2:
3208 		if (is_phb5())
3209 			set_sys_disable_detect(p, false);
3210 		pci_slot_set_state(slot, PHB4_SLOT_LINK_START);
3211 		return slot->ops.poll_link(slot);
3212 	default:
3213 		PHBERR(p, "Unexpected slot state %08x\n", slot->state);
3214 	}
3215 
3216 	pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
3217 	return OPAL_HARDWARE;
3218 }
3219 
phb4_freset(struct pci_slot * slot)3220 static int64_t phb4_freset(struct pci_slot *slot)
3221 {
3222 	struct phb4 *p = phb_to_phb4(slot->phb);
3223 
3224 	switch(slot->state) {
3225 	case PHB4_SLOT_NORMAL:
3226 	case PHB4_SLOT_FRESET_START:
3227 		PHBDBG(p, "FRESET: Starts\n");
3228 
3229 		/* Reset max link speed for training */
3230 		p->max_link_speed = phb4_get_max_link_speed(p, NULL);
3231 
3232 		PHBDBG(p, "FRESET: Prepare for link down\n");
3233 		phb4_prepare_link_change(slot, false);
3234 
3235 		if (!p->skip_perst) {
3236 			/* circumvention for HW551382 */
3237 			if (is_phb5()) {
3238 				PHBINF(p, "FRESET: Workaround for HW551382\n");
3239 				set_sys_disable_detect(p, true);
3240 			}
3241 
3242 			PHBDBG(p, "FRESET: Assert\n");
3243 			phb4_assert_perst(slot, true);
3244 			pci_slot_set_state(slot, PHB4_SLOT_FRESET_ASSERT_DELAY);
3245 
3246 			/* 250ms assert time aligns with powernv */
3247 			return pci_slot_set_sm_timeout(slot, msecs_to_tb(250));
3248 		}
3249 
3250 		/* To skip the assert during boot time */
3251 		PHBDBG(p, "FRESET: Assert skipped\n");
3252 		pci_slot_set_state(slot, PHB4_SLOT_FRESET_ASSERT_DELAY);
3253 		p->skip_perst = false;
3254 		/* fall through */
3255 	case PHB4_SLOT_FRESET_ASSERT_DELAY:
3256 		/* Clear link errors before we deassert PERST */
3257 		phb4_err_clear_regb(p);
3258 
3259 		PHBDBG(p, "FRESET: Deassert\n");
3260 		phb4_assert_perst(slot, false);
3261 
3262 		if (pci_tracing)
3263 			phb4_link_trace(p, PHB_PCIE_DLP_LTSSM_L0, 3000);
3264 
3265 		if (is_phb5())
3266 			set_sys_disable_detect(p, false);
3267 
3268 		pci_slot_set_state(slot, PHB4_SLOT_LINK_START);
3269 		return slot->ops.poll_link(slot);
3270 	default:
3271 		PHBERR(p, "Unexpected slot state %08x\n", slot->state);
3272 	}
3273 
3274 	pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
3275 	return OPAL_HARDWARE;
3276 }
3277 
load_capp_ucode(struct phb4 * p)3278 static int64_t load_capp_ucode(struct phb4 *p)
3279 {
3280 	int64_t rc;
3281 
3282 	if (p->index != CAPP0_PHB_INDEX && p->index != CAPP1_PHB_INDEX)
3283 		return OPAL_HARDWARE;
3284 
3285 	/* 0x434150504c494448 = 'CAPPLIDH' in ASCII */
3286 	rc = capp_load_ucode(p->chip_id, p->phb.opal_id, p->index,
3287 			0x434150504c494448UL, PHB4_CAPP_REG_OFFSET(p),
3288 			CAPP_APC_MASTER_ARRAY_ADDR_REG,
3289 			CAPP_APC_MASTER_ARRAY_WRITE_REG,
3290 			CAPP_SNP_ARRAY_ADDR_REG,
3291 			CAPP_SNP_ARRAY_WRITE_REG);
3292 	return rc;
3293 }
3294 
do_capp_recovery_scoms(struct phb4 * p)3295 static int do_capp_recovery_scoms(struct phb4 *p)
3296 {
3297 	uint64_t rc, reg, end;
3298 	uint64_t offset = PHB4_CAPP_REG_OFFSET(p);
3299 
3300 
3301 	/* Get the status of CAPP recovery */
3302 	xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, &reg);
3303 
3304 	/* No recovery in progress ignore */
3305 	if ((reg & PPC_BIT(0)) == 0) {
3306 		PHBDBG(p, "CAPP: No recovery in progress\n");
3307 		return OPAL_SUCCESS;
3308 	}
3309 
3310 	PHBDBG(p, "CAPP: Waiting for recovery to complete\n");
3311 	/* recovery timer failure period 168ms */
3312 	end = mftb() + msecs_to_tb(168);
3313 	while ((reg & (PPC_BIT(1) | PPC_BIT(5) | PPC_BIT(9))) == 0) {
3314 
3315 		time_wait_ms(5);
3316 		xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, &reg);
3317 
3318 		if (tb_compare(mftb(), end) != TB_ABEFOREB) {
3319 			PHBERR(p, "CAPP: Capp recovery Timed-out.\n");
3320 			end = 0;
3321 			break;
3322 		}
3323 	}
3324 
3325 	/* Check if the recovery failed or passed */
3326 	if (reg & PPC_BIT(1)) {
3327 		uint64_t act0, act1, mask, fir;
3328 
3329 		/* Use the Action0/1 and mask to only clear the bits
3330 		 * that cause local checkstop. Other bits needs attention
3331 		 * of the PRD daemon.
3332 		 */
3333 		xscom_read(p->chip_id, CAPP_FIR_ACTION0 + offset, &act0);
3334 		xscom_read(p->chip_id, CAPP_FIR_ACTION1 + offset, &act1);
3335 		xscom_read(p->chip_id, CAPP_FIR_MASK + offset, &mask);
3336 		xscom_read(p->chip_id, CAPP_FIR + offset, &fir);
3337 
3338 		fir = ~(fir & ~mask & act0 & act1);
3339 		PHBDBG(p, "Doing CAPP recovery scoms\n");
3340 
3341 		/* update capp fir clearing bits causing local checkstop */
3342 		PHBDBG(p, "Resetting CAPP Fir with mask 0x%016llX\n", fir);
3343 		xscom_write(p->chip_id, CAPP_FIR_CLEAR + offset, fir);
3344 
3345 		/* disable snoops */
3346 		xscom_write(p->chip_id, SNOOP_CAPI_CONFIG + offset, 0);
3347 		load_capp_ucode(p);
3348 
3349 		/* clear err rpt reg*/
3350 		xscom_write(p->chip_id, CAPP_ERR_RPT_CLR + offset, 0);
3351 
3352 		/* clear capp fir */
3353 		xscom_write(p->chip_id, CAPP_FIR + offset, 0);
3354 
3355 		/* Just reset Bit-0,1 and dont touch any other bit */
3356 		xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, &reg);
3357 		reg &= ~(PPC_BIT(0) | PPC_BIT(1));
3358 		xscom_write(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, reg);
3359 
3360 		PHBDBG(p, "CAPP recovery complete\n");
3361 		rc = OPAL_SUCCESS;
3362 
3363 	} else {
3364 		/* Most likely will checkstop here due to FIR ACTION for
3365 		 * failed recovery. So this message would never be logged.
3366 		 * But if we still enter here then return an error forcing a
3367 		 * fence of the PHB.
3368 		 */
3369 		if (reg  & PPC_BIT(5))
3370 			PHBERR(p, "CAPP: Capp recovery Failed\n");
3371 		else if (reg  & PPC_BIT(9))
3372 			PHBERR(p, "CAPP: Capp recovery hang detected\n");
3373 		else if (end != 0)
3374 			PHBERR(p, "CAPP: Unknown recovery failure\n");
3375 
3376 		PHBDBG(p, "CAPP: Err/Status-reg=0x%016llx\n", reg);
3377 		rc = OPAL_HARDWARE;
3378 	}
3379 
3380 	return rc;
3381 }
3382 
3383 /*
3384  * Disable CAPI mode on a PHB. Must be done while PHB is fenced and
3385  * not in recovery.
3386  */
disable_capi_mode(struct phb4 * p)3387 static void disable_capi_mode(struct phb4 *p)
3388 {
3389 	uint64_t reg;
3390 	struct capp *capp = p->capp;
3391 
3392 	PHBINF(p, "CAPP: Deactivating\n");
3393 
3394 	/* Check if CAPP attached to the PHB and active */
3395 	if (!capp || capp->phb != &p->phb) {
3396 		PHBDBG(p, "CAPP: Not attached to this PHB!\n");
3397 		return;
3398 	}
3399 
3400 	xscom_read(p->chip_id, p->pe_xscom + XPEC_NEST_CAPP_CNTL, &reg);
3401 	if (!(reg & PPC_BIT(0))) {
3402 		/* Not in CAPI mode, no action required */
3403 		PHBERR(p, "CAPP: Not enabled!\n");
3404 		return;
3405 	}
3406 
3407 	/* CAPP should already be out of recovery in this function */
3408 	capp_xscom_read(capp, CAPP_ERR_STATUS_CTRL, &reg);
3409 	if (reg & PPC_BIT(0)) {
3410 		PHBERR(p, "CAPP: Can't disable while still in recovery!\n");
3411 		return;
3412 	}
3413 
3414 	PHBINF(p, "CAPP: Disabling CAPI mode\n");
3415 
3416 	/* First Phase Reset CAPP Registers */
3417 	/* CAPP about to be disabled mark TLBI_FENCED and tlbi_psl_is_dead */
3418 	capp_xscom_write(capp, CAPP_ERR_STATUS_CTRL, PPC_BIT(3) | PPC_BIT(4));
3419 
3420 	/* Flush SUE uOP1 Register */
3421 	if (p->rev != PHB4_REV_NIMBUS_DD10)
3422 		capp_xscom_write(capp, FLUSH_SUE_UOP1, 0);
3423 
3424 	/* Release DMA/STQ engines */
3425 	capp_xscom_write(capp, APC_FSM_READ_MASK, 0ull);
3426 	capp_xscom_write(capp, XPT_FSM_RMM, 0ull);
3427 
3428 	/* Disable snoop */
3429 	capp_xscom_write(capp, SNOOP_CAPI_CONFIG, 0);
3430 
3431 	/* Clear flush SUE state map register */
3432 	capp_xscom_write(capp, FLUSH_SUE_STATE_MAP, 0);
3433 
3434 	/* Disable epoch timer */
3435 	capp_xscom_write(capp, EPOCH_RECOVERY_TIMERS_CTRL, 0);
3436 
3437 	/* CAPP Transport Control Register */
3438 	capp_xscom_write(capp, TRANSPORT_CONTROL, PPC_BIT(15));
3439 
3440 	/* Disable snooping */
3441 	capp_xscom_write(capp, SNOOP_CONTROL, 0);
3442 	capp_xscom_write(capp, SNOOP_CAPI_CONFIG, 0);
3443 
3444 	/* APC Master PB Control Register - disable examining cResps */
3445 	capp_xscom_write(capp, APC_MASTER_PB_CTRL, 0);
3446 
3447 	/* APC Master Config Register - de-select PHBs */
3448 	xscom_write_mask(p->chip_id, capp->capp_xscom_offset +
3449 			 APC_MASTER_CAPI_CTRL, 0, PPC_BITMASK(2, 3));
3450 
3451 	/* Clear all error registers */
3452 	capp_xscom_write(capp, CAPP_ERR_RPT_CLR, 0);
3453 	capp_xscom_write(capp, CAPP_FIR, 0);
3454 	capp_xscom_write(capp, CAPP_FIR_ACTION0, 0);
3455 	capp_xscom_write(capp, CAPP_FIR_ACTION1, 0);
3456 	capp_xscom_write(capp, CAPP_FIR_MASK, 0);
3457 
3458 	/* Second Phase Reset PEC/PHB Registers */
3459 
3460 	/* Reset the stack overrides if any */
3461 	xscom_write(p->chip_id, p->pci_xscom + XPEC_PCI_PRDSTKOVR, 0);
3462 	xscom_write(p->chip_id, p->pe_xscom +
3463 		    XPEC_NEST_READ_STACK_OVERRIDE, 0);
3464 
3465 	/* PE Bus AIB Mode Bits. Disable Tracing. Leave HOL Blocking as it is */
3466 	if (!(p->rev == PHB4_REV_NIMBUS_DD10) && p->index == CAPP1_PHB_INDEX)
3467 		xscom_write_mask(p->chip_id,
3468 				 p->pci_xscom + XPEC_PCI_PBAIB_HW_CONFIG, 0,
3469 				 PPC_BIT(30));
3470 
3471 	/* Reset for PCI to PB data movement */
3472 	xscom_write_mask(p->chip_id, p->pe_xscom + XPEC_NEST_PBCQ_HW_CONFIG,
3473 			 0, XPEC_NEST_PBCQ_HW_CONFIG_PBINIT);
3474 
3475 	/* Disable CAPP mode in PEC CAPP Control Register */
3476 	xscom_write(p->chip_id, p->pe_xscom + XPEC_NEST_CAPP_CNTL, 0ull);
3477 }
3478 
phb4_creset(struct pci_slot * slot)3479 static int64_t phb4_creset(struct pci_slot *slot)
3480 {
3481 	struct phb4 *p = phb_to_phb4(slot->phb);
3482 	struct capp *capp = p->capp;
3483 	uint64_t pbcq_status;
3484 	uint64_t creset_time, wait_time;
3485 
3486 	/* Don't even try fixing a broken PHB */
3487 	if (p->broken)
3488 		return OPAL_HARDWARE;
3489 
3490 	switch (slot->state) {
3491 	case PHB4_SLOT_NORMAL:
3492 	case PHB4_SLOT_CRESET_START:
3493 		PHBDBG(p, "CRESET: Starts\n");
3494 
3495 		p->creset_start_time = mftb();
3496 
3497 		/* circumvention for HW551382 */
3498 		if (is_phb5()) {
3499 			PHBINF(p, "CRESET: Workaround for HW551382\n");
3500 			set_sys_disable_detect(p, true);
3501 		}
3502 
3503 		phb4_prepare_link_change(slot, false);
3504 		/* Clear error inject register, preventing recursive errors */
3505 		xscom_write(p->chip_id, p->pe_xscom + 0x2, 0x0);
3506 
3507 		/* Prevent HMI when PHB gets fenced as we are disabling CAPP */
3508 		if (p->flags & PHB4_CAPP_DISABLE &&
3509 		    capp && capp->phb == slot->phb) {
3510 			/* Since no HMI, So set the recovery flag manually. */
3511 			p->flags |= PHB4_CAPP_RECOVERY;
3512 			xscom_write_mask(p->chip_id, capp->capp_xscom_offset +
3513 					 CAPP_FIR_MASK,
3514 					 PPC_BIT(31), PPC_BIT(31));
3515 		}
3516 
3517 		/* Force fence on the PHB to work around a non-existent PE */
3518 		if (!phb4_fenced(p))
3519 			xscom_write(p->chip_id, p->pe_stk_xscom + 0x2,
3520 				    0x0000002000000000UL);
3521 
3522 		/*
3523 		 * Force use of ASB for register access until the PHB has
3524 		 * been fully reset.
3525 		 */
3526 		p->flags |= PHB4_CFG_USE_ASB | PHB4_AIB_FENCED;
3527 
3528 		/* Assert PREST before clearing errors */
3529 		phb4_assert_perst(slot, true);
3530 
3531 		/* Clear errors, following the proper sequence */
3532 		phb4_err_clear(p);
3533 
3534 		/* Actual reset */
3535 		p->flags |= PHB4_ETU_IN_RESET;
3536 		xscom_write(p->chip_id, p->pci_stk_xscom + XPEC_PCI_STK_ETU_RESET,
3537 			    0x8000000000000000UL);
3538 
3539 		/* Read errors in PFIR and NFIR */
3540 		xscom_read(p->chip_id, p->pci_stk_xscom + 0x0, &p->pfir_cache);
3541 		xscom_read(p->chip_id, p->pe_stk_xscom + 0x0, &p->nfir_cache);
3542 
3543 		pci_slot_set_state(slot, PHB4_SLOT_CRESET_WAIT_CQ);
3544 		slot->retries = 500;
3545 		return pci_slot_set_sm_timeout(slot, msecs_to_tb(10));
3546 	case PHB4_SLOT_CRESET_WAIT_CQ:
3547 
3548 		// Wait until operations are complete
3549 		xscom_read(p->chip_id, p->pe_stk_xscom + 0xc, &pbcq_status);
3550 		if (!(pbcq_status & 0xC000000000000000UL)) {
3551 			PHBDBG(p, "CRESET: No pending transactions\n");
3552 
3553 			/* capp recovery */
3554 			if ((p->flags & PHB4_CAPP_RECOVERY) &&
3555 			    (do_capp_recovery_scoms(p) != OPAL_SUCCESS))
3556 				goto error;
3557 
3558 			if (p->flags & PHB4_CAPP_DISABLE)
3559 				disable_capi_mode(p);
3560 
3561 			/* Clear errors in PFIR and NFIR */
3562 			xscom_write(p->chip_id, p->pci_stk_xscom + 0x1,
3563 				    ~p->pfir_cache);
3564 			xscom_write(p->chip_id, p->pe_stk_xscom + 0x1,
3565 				    ~p->nfir_cache);
3566 
3567 			/* Re-read errors in PFIR and NFIR and reset any new
3568 			 * error reported.
3569 			 */
3570 			xscom_read(p->chip_id, p->pci_stk_xscom +
3571 				   XPEC_PCI_STK_PCI_FIR, &p->pfir_cache);
3572 			xscom_read(p->chip_id, p->pe_stk_xscom +
3573 				   XPEC_NEST_STK_PCI_NFIR, &p->nfir_cache);
3574 
3575 			if (p->pfir_cache || p->nfir_cache) {
3576 				PHBERR(p, "CRESET: PHB still fenced !!\n");
3577 				phb4_dump_pec_err_regs(p);
3578 
3579 				/* Reset the PHB errors */
3580 				xscom_write(p->chip_id, p->pci_stk_xscom +
3581 					    XPEC_PCI_STK_PCI_FIR, 0);
3582 				xscom_write(p->chip_id, p->pe_stk_xscom +
3583 					    XPEC_NEST_STK_PCI_NFIR, 0);
3584 			}
3585 
3586 			/* Clear PHB from reset */
3587 			xscom_write(p->chip_id,
3588 				    p->pci_stk_xscom + XPEC_PCI_STK_ETU_RESET, 0x0);
3589 			p->flags &= ~PHB4_ETU_IN_RESET;
3590 
3591 			pci_slot_set_state(slot, PHB4_SLOT_CRESET_REINIT);
3592 			/* After lifting PHB reset, wait while logic settles */
3593 			return pci_slot_set_sm_timeout(slot, msecs_to_tb(10));
3594 		}
3595 
3596 		if (slot->retries-- == 0) {
3597 			PHBERR(p, "Timeout waiting for pending transaction\n");
3598 			goto error;
3599 		}
3600 		return pci_slot_set_sm_timeout(slot, msecs_to_tb(100));
3601 	case PHB4_SLOT_CRESET_REINIT:
3602 		PHBDBG(p, "CRESET: Reinitialization\n");
3603 		p->flags &= ~PHB4_AIB_FENCED;
3604 		p->flags &= ~PHB4_CAPP_RECOVERY;
3605 		p->flags &= ~PHB4_CFG_USE_ASB;
3606 		phb4_init_hw(p);
3607 		pci_slot_set_state(slot, PHB4_SLOT_CRESET_FRESET);
3608 
3609 		/*
3610 		 * The PERST is sticky across resets, but LINK_DIS isn't.
3611 		 * Re-assert it here now that we've reset the PHB.
3612 		 */
3613 		phb4_assert_perst(slot, true);
3614 
3615 		/*
3616 		 * wait either 100ms (for the ETU logic) or until we've had
3617 		 * PERST asserted for 250ms.
3618 		 */
3619 		creset_time = tb_to_msecs(mftb() - p->creset_start_time);
3620 		if (creset_time < 250)
3621 			wait_time = MAX(100, 250 - creset_time);
3622 		else
3623 			wait_time = 100;
3624 		PHBDBG(p, "CRESET: wait_time = %lld\n", wait_time);
3625 		return pci_slot_set_sm_timeout(slot, msecs_to_tb(wait_time));
3626 
3627 	case PHB4_SLOT_CRESET_FRESET:
3628 		/*
3629 		 * We asserted PERST at the beginning of the CRESET and we
3630 		 * have waited long enough, so we can skip it in the freset
3631 		 * procedure.
3632 		 */
3633 		p->skip_perst = true;
3634 		pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
3635 		return slot->ops.freset(slot);
3636 	default:
3637 		PHBERR(p, "CRESET: Unexpected slot state %08x, resetting...\n",
3638 		       slot->state);
3639 		pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
3640 		return slot->ops.creset(slot);
3641 
3642 	}
3643 
3644 error:
3645 	/* Mark the PHB as dead and expect it to be removed */
3646 	p->broken = true;
3647 	return OPAL_HARDWARE;
3648 }
3649 
3650 /*
3651  * Initialize root complex slot, which is mainly used to
3652  * do fundamental reset before PCI enumeration in PCI core.
3653  * When probing root complex and building its real slot,
3654  * the operations will be copied over.
3655  */
phb4_slot_create(struct phb * phb)3656 static struct pci_slot *phb4_slot_create(struct phb *phb)
3657 {
3658 	struct pci_slot *slot;
3659 
3660 	slot = pci_slot_alloc(phb, NULL);
3661 	if (!slot)
3662 		return slot;
3663 
3664 	/* Elementary functions */
3665 	slot->ops.get_presence_state  = phb4_get_presence_state;
3666 	slot->ops.get_link_state      = phb4_get_link_state;
3667 	slot->ops.get_power_state     = NULL;
3668 	slot->ops.get_attention_state = NULL;
3669 	slot->ops.get_latch_state     = NULL;
3670 	slot->ops.set_power_state     = NULL;
3671 	slot->ops.set_attention_state = NULL;
3672 
3673 	/*
3674 	 * For PHB slots, we have to split the fundamental reset
3675 	 * into 2 steps. We might not have the first step which
3676 	 * is to power off/on the slot, or it's controlled by
3677 	 * individual platforms.
3678 	 */
3679 	slot->ops.prepare_link_change	= phb4_prepare_link_change;
3680 	slot->ops.poll_link		= phb4_poll_link;
3681 	slot->ops.hreset		= phb4_hreset;
3682 	slot->ops.freset		= phb4_freset;
3683 	slot->ops.creset		= phb4_creset;
3684 	slot->ops.completed_sm_run	= phb4_slot_sm_run_completed;
3685 	slot->link_retries		= PHB4_LINK_LINK_RETRIES;
3686 
3687 	return slot;
3688 }
3689 
phb4_int_unmask_all(struct phb4 * p)3690 static void phb4_int_unmask_all(struct phb4 *p)
3691 {
3692 	/* Init_126..130 - Re-enable error interrupts */
3693 	out_be64(p->regs + PHB_ERR_IRQ_ENABLE,         0xca8880cc00000000ull);
3694 
3695 	if (is_phb5())
3696 		out_be64(p->regs + PHB_TXE_ERR_IRQ_ENABLE, 0x200850be08200020ull);
3697 	else
3698 		out_be64(p->regs + PHB_TXE_ERR_IRQ_ENABLE, 0x2008400e08200000ull);
3699 	out_be64(p->regs + PHB_RXE_ARB_ERR_IRQ_ENABLE, 0xc40038fc01804070ull);
3700 	out_be64(p->regs + PHB_RXE_MRG_ERR_IRQ_ENABLE, 0x00006100008000a8ull);
3701 	out_be64(p->regs + PHB_RXE_TCE_ERR_IRQ_ENABLE, 0x60510050c0000000ull);
3702 }
3703 
3704 /*
3705  * Mask the IRQ for any currently set error bits. This prevents the PHB's ERR
3706  * and INF interrupts from being re-fired before the kernel can handle the
3707  * underlying condition.
3708  */
phb4_int_mask_active(struct phb4 * p)3709 static void phb4_int_mask_active(struct phb4 *p)
3710 {
3711 	const uint64_t error_regs[] = {
3712 		PHB_ERR_STATUS,
3713 		PHB_TXE_ERR_STATUS,
3714 		PHB_RXE_ARB_ERR_STATUS,
3715 		PHB_RXE_MRG_ERR_STATUS,
3716 		PHB_RXE_TCE_ERR_STATUS
3717 	};
3718 	int i;
3719 
3720 	for (i = 0; i < ARRAY_SIZE(error_regs); i++) {
3721 		uint64_t stat, mask;
3722 
3723 		/* The IRQ mask reg is always offset 0x20 from the status reg */
3724 		stat = phb4_read_reg(p, error_regs[i]);
3725 		mask = phb4_read_reg(p, error_regs[i] + 0x20);
3726 
3727 		phb4_write_reg(p, error_regs[i] + 0x20, mask & ~stat);
3728 	}
3729 }
3730 
phb4_get_pesta(struct phb4 * p,uint64_t pe_number)3731 static uint64_t phb4_get_pesta(struct phb4 *p, uint64_t pe_number)
3732 {
3733 	uint64_t pesta;
3734 	__be64 *pPEST;
3735 
3736 	pPEST = (__be64 *)p->tbl_pest;
3737 
3738 	phb4_ioda_sel(p, IODA3_TBL_PESTA, pe_number, false);
3739 	pesta = phb4_read_reg(p, PHB_IODA_DATA0);
3740 	if (pesta & IODA3_PESTA_MMIO_FROZEN)
3741 		pesta |= be64_to_cpu(pPEST[2*pe_number]);
3742 
3743 	return pesta;
3744 }
3745 
3746 /* Check if the chip requires escalating a freeze to fence on MMIO loads */
phb4_escalation_required(void)3747 static bool phb4_escalation_required(void)
3748 {
3749 	uint64_t pvr = mfspr(SPR_PVR);
3750 
3751 	/* Only on Power9 */
3752 	if (proc_gen != proc_gen_p9)
3753 		return false;
3754 
3755 	/*
3756 	 * Escalation is required on the following chip versions:
3757 	 * - Cumulus DD1.0
3758 	 * - Nimbus DD2.0, DD2.1 (and DD1.0, but it is unsupported so no check).
3759 	 */
3760 	if (pvr & PVR_POWER9_CUMULUS) {
3761 		if (PVR_VERS_MAJ(pvr) == 1 && PVR_VERS_MIN(pvr) == 0)
3762 			return true;
3763 	} else { /* Nimbus */
3764 		if (PVR_VERS_MAJ(pvr) == 2 && PVR_VERS_MIN(pvr) < 2)
3765 			return true;
3766 	}
3767 
3768 	return false;
3769 }
3770 
phb4_freeze_escalate(uint64_t pesta)3771 static bool phb4_freeze_escalate(uint64_t pesta)
3772 {
3773 	if ((GETFIELD(IODA3_PESTA_TRANS_TYPE, pesta) ==
3774 	     IODA3_PESTA_TRANS_TYPE_MMIOLOAD) &&
3775 	    (pesta & (IODA3_PESTA_CA_CMPLT_TMT | IODA3_PESTA_UR)))
3776 		return true;
3777 	return false;
3778 }
3779 
phb4_eeh_freeze_status(struct phb * phb,uint64_t pe_number,uint8_t * freeze_state,uint16_t * pci_error_type,uint16_t * severity)3780 static int64_t phb4_eeh_freeze_status(struct phb *phb, uint64_t pe_number,
3781 				      uint8_t *freeze_state,
3782 				      uint16_t *pci_error_type,
3783 				      uint16_t *severity)
3784 {
3785 	struct phb4 *p = phb_to_phb4(phb);
3786 	uint64_t peev_bit = PPC_BIT(pe_number & 0x3f);
3787 	uint64_t peev, pesta, pestb;
3788 
3789 	/* Defaults: not frozen */
3790 	*freeze_state = OPAL_EEH_STOPPED_NOT_FROZEN;
3791 	*pci_error_type = OPAL_EEH_NO_ERROR;
3792 
3793 	/* Check dead */
3794 	if (p->broken) {
3795 		*freeze_state = OPAL_EEH_STOPPED_MMIO_DMA_FREEZE;
3796 		*pci_error_type = OPAL_EEH_PHB_ERROR;
3797 		if (severity)
3798 			*severity = OPAL_EEH_SEV_PHB_DEAD;
3799 		return OPAL_HARDWARE;
3800 	}
3801 
3802 	/* Check fence and CAPP recovery */
3803 	if (phb4_fenced(p) || (p->flags & PHB4_CAPP_RECOVERY)) {
3804 		*freeze_state = OPAL_EEH_STOPPED_MMIO_DMA_FREEZE;
3805 		*pci_error_type = OPAL_EEH_PHB_ERROR;
3806 		if (severity)
3807 			*severity = OPAL_EEH_SEV_PHB_FENCED;
3808 		return OPAL_SUCCESS;
3809 	}
3810 
3811 	/* Check the PEEV */
3812 	phb4_ioda_sel(p, IODA3_TBL_PEEV, pe_number / 64, false);
3813 	peev = in_be64(p->regs + PHB_IODA_DATA0);
3814 	if (!(peev & peev_bit))
3815 		return OPAL_SUCCESS;
3816 
3817 	/* Indicate that we have an ER pending */
3818 	phb4_set_err_pending(p, true);
3819 	if (severity)
3820 		*severity = OPAL_EEH_SEV_PE_ER;
3821 
3822 	/* Read the full PESTA */
3823 	pesta = phb4_get_pesta(p, pe_number);
3824 	/* Check if we need to escalate to fence */
3825 	if (phb4_escalation_required() && phb4_freeze_escalate(pesta)) {
3826 		PHBERR(p, "Escalating freeze to fence PESTA[%lli]=%016llx\n",
3827 		       pe_number, pesta);
3828 		*severity = OPAL_EEH_SEV_PHB_FENCED;
3829 		*pci_error_type = OPAL_EEH_PHB_ERROR;
3830 	}
3831 
3832 	/* Read the PESTB in the PHB */
3833 	phb4_ioda_sel(p, IODA3_TBL_PESTB, pe_number, false);
3834 	pestb = phb4_read_reg(p, PHB_IODA_DATA0);
3835 
3836 	/* Convert PESTA/B to freeze_state */
3837 	if (pesta & IODA3_PESTA_MMIO_FROZEN)
3838 		*freeze_state |= OPAL_EEH_STOPPED_MMIO_FREEZE;
3839 	if (pestb & IODA3_PESTB_DMA_STOPPED)
3840 		*freeze_state |= OPAL_EEH_STOPPED_DMA_FREEZE;
3841 
3842 	return OPAL_SUCCESS;
3843 }
3844 
phb4_eeh_freeze_clear(struct phb * phb,uint64_t pe_number,uint64_t eeh_action_token)3845 static int64_t phb4_eeh_freeze_clear(struct phb *phb, uint64_t pe_number,
3846 				     uint64_t eeh_action_token)
3847 {
3848 	struct phb4 *p = phb_to_phb4(phb);
3849 	uint64_t err, peev;
3850 	int32_t i;
3851 	bool frozen_pe = false;
3852 
3853 	if (p->broken)
3854 		return OPAL_HARDWARE;
3855 
3856 	/* Summary. If nothing, move to clearing the PESTs which can
3857 	 * contain a freeze state from a previous error or simply set
3858 	 * explicitely by the user
3859 	 */
3860 	err = in_be64(p->regs + PHB_ETU_ERR_SUMMARY);
3861 	if (err == 0xffffffffffffffffUL) {
3862 		if (phb4_fenced(p)) {
3863 			PHBERR(p, "eeh_freeze_clear on fenced PHB\n");
3864 			return OPAL_HARDWARE;
3865 		}
3866 	}
3867 	if (err != 0)
3868 		phb4_err_clear(p);
3869 
3870 	/*
3871 	 * We have PEEV in system memory. It would give more performance
3872 	 * to access that directly.
3873 	 */
3874 	if (eeh_action_token & OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO) {
3875 		phb4_ioda_sel(p, IODA3_TBL_PESTA, pe_number, false);
3876 		out_be64(p->regs + PHB_IODA_DATA0, 0);
3877 	}
3878 	if (eeh_action_token & OPAL_EEH_ACTION_CLEAR_FREEZE_DMA) {
3879 		phb4_ioda_sel(p, IODA3_TBL_PESTB, pe_number, false);
3880 		out_be64(p->regs + PHB_IODA_DATA0, 0);
3881 	}
3882 
3883 
3884 	/* Update ER pending indication */
3885 	phb4_ioda_sel(p, IODA3_TBL_PEEV, 0, true);
3886 	for (i = 0; i < p->num_pes/64; i++) {
3887 		peev = in_be64(p->regs + PHB_IODA_DATA0);
3888 		if (peev) {
3889 			frozen_pe = true;
3890 			break;
3891 		}
3892 	}
3893 	if (frozen_pe) {
3894 		p->err.err_src	 = PHB4_ERR_SRC_PHB;
3895 		p->err.err_class = PHB4_ERR_CLASS_ER;
3896 		p->err.err_bit   = -1;
3897 		phb4_set_err_pending(p, true);
3898 	} else
3899 		phb4_set_err_pending(p, false);
3900 
3901 	return OPAL_SUCCESS;
3902 }
3903 
phb4_eeh_freeze_set(struct phb * phb,uint64_t pe_number,uint64_t eeh_action_token)3904 static int64_t phb4_eeh_freeze_set(struct phb *phb, uint64_t pe_number,
3905 				   uint64_t eeh_action_token)
3906 {
3907 	struct phb4 *p = phb_to_phb4(phb);
3908 	uint64_t data;
3909 
3910 	if (p->broken)
3911 		return OPAL_HARDWARE;
3912 
3913 	if (pe_number >= p->num_pes)
3914 		return OPAL_PARAMETER;
3915 
3916 	if (eeh_action_token != OPAL_EEH_ACTION_SET_FREEZE_MMIO &&
3917 	    eeh_action_token != OPAL_EEH_ACTION_SET_FREEZE_DMA &&
3918 	    eeh_action_token != OPAL_EEH_ACTION_SET_FREEZE_ALL)
3919 		return OPAL_PARAMETER;
3920 
3921 	if (eeh_action_token & OPAL_EEH_ACTION_SET_FREEZE_MMIO) {
3922 		phb4_ioda_sel(p, IODA3_TBL_PESTA, pe_number, false);
3923 		data = in_be64(p->regs + PHB_IODA_DATA0);
3924 		data |= IODA3_PESTA_MMIO_FROZEN;
3925 		out_be64(p->regs + PHB_IODA_DATA0, data);
3926 	}
3927 
3928 	if (eeh_action_token & OPAL_EEH_ACTION_SET_FREEZE_DMA) {
3929 		phb4_ioda_sel(p, IODA3_TBL_PESTB, pe_number, false);
3930 		data = in_be64(p->regs + PHB_IODA_DATA0);
3931 		data |= IODA3_PESTB_DMA_STOPPED;
3932 		out_be64(p->regs + PHB_IODA_DATA0, data);
3933 	}
3934 
3935 	return OPAL_SUCCESS;
3936 }
3937 
phb4_eeh_next_error(struct phb * phb,uint64_t * first_frozen_pe,uint16_t * pci_error_type,uint16_t * severity)3938 static int64_t phb4_eeh_next_error(struct phb *phb,
3939 				   uint64_t *first_frozen_pe,
3940 				   uint16_t *pci_error_type,
3941 				   uint16_t *severity)
3942 {
3943 	struct phb4 *p = phb_to_phb4(phb);
3944 	uint64_t peev, pesta;
3945 	uint32_t peev_size = p->num_pes/64;
3946 	int32_t i, j;
3947 
3948 	/* If the PHB is broken, we needn't go forward */
3949 	if (p->broken) {
3950 		*pci_error_type = OPAL_EEH_PHB_ERROR;
3951 		*severity = OPAL_EEH_SEV_PHB_DEAD;
3952 		return OPAL_SUCCESS;
3953 	}
3954 
3955 	if ((p->flags & PHB4_CAPP_RECOVERY)) {
3956 		*pci_error_type = OPAL_EEH_PHB_ERROR;
3957 		*severity = OPAL_EEH_SEV_PHB_FENCED;
3958 		return OPAL_SUCCESS;
3959 	}
3960 
3961 	/*
3962 	 * Check if we already have pending errors. If that's
3963 	 * the case, then to get more information about the
3964 	 * pending errors. Here we try PBCQ prior to PHB.
3965 	 */
3966 	if (phb4_err_pending(p) /*&&
3967 	    !phb4_err_check_pbcq(p) &&
3968 	    !phb4_err_check_lem(p) */)
3969 		phb4_set_err_pending(p, false);
3970 
3971 	/* Clear result */
3972 	*pci_error_type  = OPAL_EEH_NO_ERROR;
3973 	*severity	 = OPAL_EEH_SEV_NO_ERROR;
3974 	*first_frozen_pe = (uint64_t)-1;
3975 
3976 	/* Check frozen PEs */
3977 	if (!phb4_err_pending(p)) {
3978 		phb4_ioda_sel(p, IODA3_TBL_PEEV, 0, true);
3979 		for (i = 0; i < peev_size; i++) {
3980 			peev = in_be64(p->regs + PHB_IODA_DATA0);
3981 			if (peev) {
3982 				p->err.err_src	 = PHB4_ERR_SRC_PHB;
3983 				p->err.err_class = PHB4_ERR_CLASS_ER;
3984 				p->err.err_bit	 = -1;
3985 				phb4_set_err_pending(p, true);
3986 				break;
3987 			}
3988 		}
3989 	}
3990 
3991 	if (!phb4_err_pending(p))
3992 		return OPAL_SUCCESS;
3993 	/*
3994 	 * If the frozen PE is caused by a malfunctioning TLP, we
3995 	 * need reset the PHB. So convert ER to PHB-fatal error
3996 	 * for the case.
3997 	 */
3998 	if (p->err.err_class == PHB4_ERR_CLASS_ER) {
3999 		for (i = peev_size - 1; i >= 0; i--) {
4000 			phb4_ioda_sel(p, IODA3_TBL_PEEV, i, false);
4001 			peev = in_be64(p->regs + PHB_IODA_DATA0);
4002 			for (j = 0; j < 64; j++) {
4003 				if (peev & PPC_BIT(j)) {
4004 					*first_frozen_pe = i * 64 + j;
4005 					break;
4006 				}
4007 			}
4008 			if (*first_frozen_pe != (uint64_t)(-1))
4009 				break;
4010 		}
4011 	}
4012 
4013 	if (*first_frozen_pe != (uint64_t)(-1)) {
4014 		pesta = phb4_get_pesta(p, *first_frozen_pe);
4015 		if (phb4_escalation_required() && phb4_freeze_escalate(pesta)) {
4016 			PHBINF(p, "Escalating freeze to fence. PESTA[%lli]=%016llx\n",
4017 			       *first_frozen_pe, pesta);
4018 			p->err.err_class = PHB4_ERR_CLASS_FENCED;
4019 		}
4020 	}
4021 
4022 	switch (p->err.err_class) {
4023 	case PHB4_ERR_CLASS_DEAD:
4024 		*pci_error_type = OPAL_EEH_PHB_ERROR;
4025 		*severity = OPAL_EEH_SEV_PHB_DEAD;
4026 		break;
4027 	case PHB4_ERR_CLASS_FENCED:
4028 		*pci_error_type = OPAL_EEH_PHB_ERROR;
4029 		*severity = OPAL_EEH_SEV_PHB_FENCED;
4030 		break;
4031 	case PHB4_ERR_CLASS_ER:
4032 		*pci_error_type = OPAL_EEH_PE_ERROR;
4033 		*severity = OPAL_EEH_SEV_PE_ER;
4034 
4035 		/* No frozen PE ? */
4036 		if (*first_frozen_pe == (uint64_t)-1) {
4037 			*pci_error_type = OPAL_EEH_NO_ERROR;
4038 			*severity = OPAL_EEH_SEV_NO_ERROR;
4039 			phb4_set_err_pending(p, false);
4040 		}
4041 
4042 		break;
4043 	case PHB4_ERR_CLASS_INF:
4044 		*pci_error_type = OPAL_EEH_PHB_ERROR;
4045 		*severity = OPAL_EEH_SEV_INF;
4046 		break;
4047 	default:
4048 		*pci_error_type = OPAL_EEH_NO_ERROR;
4049 		*severity = OPAL_EEH_SEV_NO_ERROR;
4050 		phb4_set_err_pending(p, false);
4051 	}
4052 
4053 	/*
4054 	 * Unmask all our error interrupts once all pending errors
4055 	 * have been handled.
4056 	 */
4057 	if (!phb4_err_pending(p))
4058 		phb4_int_unmask_all(p);
4059 
4060 	return OPAL_SUCCESS;
4061 }
4062 
phb4_err_inject_finalize(struct phb4 * phb,uint64_t addr,uint64_t mask,uint64_t ctrl,bool is_write)4063 static int64_t phb4_err_inject_finalize(struct phb4 *phb, uint64_t addr,
4064 					uint64_t mask, uint64_t ctrl,
4065 					bool is_write)
4066 {
4067 	if (is_write)
4068 		ctrl |= PHB_PAPR_ERR_INJ_CTL_WR;
4069 	else
4070 		ctrl |= PHB_PAPR_ERR_INJ_CTL_RD;
4071 
4072 	out_be64(phb->regs + PHB_PAPR_ERR_INJ_ADDR, addr);
4073 	out_be64(phb->regs + PHB_PAPR_ERR_INJ_MASK, mask);
4074 	out_be64(phb->regs + PHB_PAPR_ERR_INJ_CTL, ctrl);
4075 
4076 	return OPAL_SUCCESS;
4077 }
4078 
phb4_err_inject_mem32(struct phb4 * phb __unused,uint64_t pe_number __unused,uint64_t addr __unused,uint64_t mask __unused,bool is_write __unused)4079 static int64_t phb4_err_inject_mem32(struct phb4 *phb __unused,
4080 				     uint64_t pe_number __unused,
4081 				     uint64_t addr __unused,
4082 				     uint64_t mask __unused,
4083 				     bool is_write __unused)
4084 {
4085 	return OPAL_UNSUPPORTED;
4086 }
4087 
phb4_err_inject_mem64(struct phb4 * phb __unused,uint64_t pe_number __unused,uint64_t addr __unused,uint64_t mask __unused,bool is_write __unused)4088 static int64_t phb4_err_inject_mem64(struct phb4 *phb __unused,
4089 				     uint64_t pe_number __unused,
4090 				     uint64_t addr __unused,
4091 				     uint64_t mask __unused,
4092 				     bool is_write __unused)
4093 {
4094 	return OPAL_UNSUPPORTED;
4095 }
4096 
phb4_err_inject_cfg(struct phb4 * phb,uint64_t pe_number,uint64_t addr,uint64_t mask,bool is_write)4097 static int64_t phb4_err_inject_cfg(struct phb4 *phb, uint64_t pe_number,
4098 				   uint64_t addr, uint64_t mask,
4099 				   bool is_write)
4100 {
4101 	uint64_t a, m, prefer, ctrl;
4102 	int bdfn;
4103 	bool is_bus_pe = false;
4104 
4105 	a = 0xffffull;
4106 	prefer = 0xffffull;
4107 	m = PHB_PAPR_ERR_INJ_MASK_CFG_ALL;
4108 	ctrl = PHB_PAPR_ERR_INJ_CTL_CFG;
4109 
4110 	for (bdfn = 0; bdfn < RTT_TABLE_ENTRIES; bdfn++) {
4111 		if (be16_to_cpu(phb->tbl_rtt[bdfn]) != pe_number)
4112 			continue;
4113 
4114 		/* The PE can be associated with PCI bus or device */
4115 		is_bus_pe = false;
4116 		if ((bdfn + 8) < RTT_TABLE_ENTRIES &&
4117 		    be16_to_cpu(phb->tbl_rtt[bdfn + 8]) == pe_number)
4118 			is_bus_pe = true;
4119 
4120 		/* Figure out the PCI config address */
4121 		if (prefer == 0xffffull) {
4122 			if (is_bus_pe) {
4123 				m = PHB_PAPR_ERR_INJ_MASK_CFG;
4124 				prefer = SETFIELD(m, 0x0ull, PCI_BUS_NUM(bdfn));
4125 			} else {
4126 				m = PHB_PAPR_ERR_INJ_MASK_CFG_ALL;
4127 				prefer = SETFIELD(m, 0x0ull, bdfn);
4128 			}
4129 		}
4130 
4131 		/* Check the input address is valid or not */
4132 		if (!is_bus_pe &&
4133 		    GETFIELD(PHB_PAPR_ERR_INJ_MASK_CFG_ALL, addr) == bdfn) {
4134 			a = addr;
4135 			break;
4136 		}
4137 
4138 		if (is_bus_pe &&
4139 		    GETFIELD(PHB_PAPR_ERR_INJ_MASK_CFG, addr) == PCI_BUS_NUM(bdfn)) {
4140 			a = addr;
4141 			break;
4142 		}
4143 	}
4144 
4145 	/* Invalid PE number */
4146 	if (prefer == 0xffffull)
4147 		return OPAL_PARAMETER;
4148 
4149 	/* Specified address is out of range */
4150 	if (a == 0xffffull)
4151 		a = prefer;
4152 	else
4153 		m = mask;
4154 
4155 	return phb4_err_inject_finalize(phb, a, m, ctrl, is_write);
4156 }
4157 
phb4_err_inject_dma(struct phb4 * phb __unused,uint64_t pe_number __unused,uint64_t addr __unused,uint64_t mask __unused,bool is_write __unused,bool is_64bits __unused)4158 static int64_t phb4_err_inject_dma(struct phb4 *phb __unused,
4159 				   uint64_t pe_number __unused,
4160 				   uint64_t addr __unused,
4161 				   uint64_t mask __unused,
4162 				   bool is_write __unused,
4163 				   bool is_64bits __unused)
4164 {
4165 	return OPAL_UNSUPPORTED;
4166 }
4167 
phb4_err_inject_dma32(struct phb4 * phb,uint64_t pe_number,uint64_t addr,uint64_t mask,bool is_write)4168 static int64_t phb4_err_inject_dma32(struct phb4 *phb, uint64_t pe_number,
4169 				     uint64_t addr, uint64_t mask,
4170 				     bool is_write)
4171 {
4172 	return phb4_err_inject_dma(phb, pe_number, addr, mask, is_write, false);
4173 }
4174 
phb4_err_inject_dma64(struct phb4 * phb,uint64_t pe_number,uint64_t addr,uint64_t mask,bool is_write)4175 static int64_t phb4_err_inject_dma64(struct phb4 *phb, uint64_t pe_number,
4176 				     uint64_t addr, uint64_t mask,
4177 				     bool is_write)
4178 {
4179 	return phb4_err_inject_dma(phb, pe_number, addr, mask, is_write, true);
4180 }
4181 
4182 
phb4_err_inject(struct phb * phb,uint64_t pe_number,uint32_t type,uint32_t func,uint64_t addr,uint64_t mask)4183 static int64_t phb4_err_inject(struct phb *phb, uint64_t pe_number,
4184 			       uint32_t type, uint32_t func,
4185 			       uint64_t addr, uint64_t mask)
4186 {
4187 	struct phb4 *p = phb_to_phb4(phb);
4188 	int64_t (*handler)(struct phb4 *p, uint64_t pe_number,
4189 			   uint64_t addr, uint64_t mask, bool is_write);
4190 	bool is_write;
4191 
4192 	/* We can't inject error to the reserved PE */
4193 	if (pe_number == PHB4_RESERVED_PE_NUM(p) || pe_number >= p->num_pes)
4194 		return OPAL_PARAMETER;
4195 
4196 	/* Clear leftover from last time */
4197 	out_be64(p->regs + PHB_PAPR_ERR_INJ_CTL, 0x0ul);
4198 
4199 	switch (func) {
4200 	case OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR:
4201 	case OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_DATA:
4202 		is_write = false;
4203 		if (type == OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64)
4204 			handler = phb4_err_inject_mem64;
4205 		else
4206 			handler = phb4_err_inject_mem32;
4207 		break;
4208 	case OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_ADDR:
4209 	case OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_DATA:
4210 		is_write = true;
4211 		if (type == OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64)
4212 			handler = phb4_err_inject_mem64;
4213 		else
4214 			handler = phb4_err_inject_mem32;
4215 		break;
4216 	case OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_ADDR:
4217 	case OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_DATA:
4218 		is_write = false;
4219 		handler = phb4_err_inject_cfg;
4220 		break;
4221 	case OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_ADDR:
4222 	case OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_DATA:
4223 		is_write = true;
4224 		handler = phb4_err_inject_cfg;
4225 		break;
4226 	case OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_ADDR:
4227 	case OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_DATA:
4228 	case OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_MASTER:
4229 	case OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_TARGET:
4230 		is_write = false;
4231 		if (type == OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64)
4232 			handler = phb4_err_inject_dma64;
4233 		else
4234 			handler = phb4_err_inject_dma32;
4235 		break;
4236 	case OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_ADDR:
4237 	case OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_DATA:
4238 	case OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_MASTER:
4239 	case OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET:
4240 		is_write = true;
4241 		if (type == OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64)
4242 			handler = phb4_err_inject_dma64;
4243 		else
4244 			handler = phb4_err_inject_dma32;
4245 		break;
4246 	default:
4247 		return OPAL_PARAMETER;
4248 	}
4249 
4250 	return handler(p, pe_number, addr, mask, is_write);
4251 }
4252 
phb4_get_diag_data(struct phb * phb,void * diag_buffer,uint64_t diag_buffer_len)4253 static int64_t phb4_get_diag_data(struct phb *phb,
4254 				  void *diag_buffer,
4255 				  uint64_t diag_buffer_len)
4256 {
4257 	bool fenced;
4258 	struct phb4 *p = phb_to_phb4(phb);
4259 	struct OpalIoPhb4ErrorData *data = diag_buffer;
4260 
4261 	if (diag_buffer_len < sizeof(struct OpalIoPhb4ErrorData))
4262 		return OPAL_PARAMETER;
4263 	if (p->broken)
4264 		return OPAL_HARDWARE;
4265 
4266 	/*
4267 	 * Dummy check for fence so that phb4_read_phb_status knows
4268 	 * whether to use ASB or AIB
4269 	 */
4270 	fenced = phb4_fenced(p);
4271 	phb4_read_phb_status(p, data);
4272 
4273 	if (!fenced)
4274 		phb4_eeh_dump_regs(p);
4275 
4276 	/*
4277 	 * We're running to here probably because of errors
4278 	 * (INF class). For that case, we need clear the error
4279 	 * explicitly.
4280 	 */
4281 	if (phb4_err_pending(p) &&
4282 	    p->err.err_class == PHB4_ERR_CLASS_INF &&
4283 	    p->err.err_src == PHB4_ERR_SRC_PHB) {
4284 		phb4_err_clear(p);
4285 		phb4_set_err_pending(p, false);
4286 	}
4287 
4288 	return OPAL_SUCCESS;
4289 }
4290 
tve_encode_50b_noxlate(uint64_t start_addr,uint64_t end_addr)4291 static uint64_t tve_encode_50b_noxlate(uint64_t start_addr, uint64_t end_addr)
4292 {
4293 	uint64_t tve;
4294 
4295 	/*
4296 	 * Put start address bits 49:24 into TVE[52:53]||[0:23]
4297 	 * and end address bits 49:24 into TVE[54:55]||[24:47]
4298 	 * and set TVE[51]
4299 	 */
4300 	tve  = (start_addr << 16) & (0xffffffull << 40);
4301 	tve |= (start_addr >> 38) & (3ull << 10);
4302 	tve |= (end_addr >>  8) & (0xfffffful << 16);
4303 	tve |= (end_addr >> 40) & (3ull << 8);
4304 	tve |= PPC_BIT(51) | IODA3_TVT_NON_TRANSLATE_50;
4305 	return tve;
4306 }
4307 
phb4_is_dd20(struct phb4 * p)4308 static bool phb4_is_dd20(struct phb4 *p)
4309 {
4310 	struct proc_chip *chip = get_chip(p->chip_id);
4311 
4312 	if (p->rev == PHB4_REV_NIMBUS_DD20 && ((0xf & chip->ec_level) == 0))
4313 		return true;
4314 	return false;
4315 }
4316 
phb4_get_capp_info(int chip_id,struct phb * phb,struct capp_info * info)4317 static int64_t phb4_get_capp_info(int chip_id, struct phb *phb,
4318 				  struct capp_info *info)
4319 {
4320 	struct phb4 *p = phb_to_phb4(phb);
4321 	uint32_t offset;
4322 
4323 	/* Not even supposed to be here on P10, but doesn't hurt */
4324 	if (is_phb5())
4325 		return OPAL_UNSUPPORTED;
4326 
4327 	if (chip_id != p->chip_id)
4328 		return OPAL_PARAMETER;
4329 
4330 	/* Check is CAPP is attached to the PHB */
4331 	if (p->capp == NULL || p->capp->phb != phb)
4332 		return OPAL_PARAMETER;
4333 
4334 	offset = PHB4_CAPP_REG_OFFSET(p);
4335 
4336 	if (p->index == CAPP0_PHB_INDEX)
4337 		info->capp_index = 0;
4338 	if (p->index == CAPP1_PHB_INDEX)
4339 		info->capp_index = 1;
4340 	info->phb_index = p->index;
4341 	info->capp_fir_reg = CAPP_FIR + offset;
4342 	info->capp_fir_mask_reg = CAPP_FIR_MASK + offset;
4343 	info->capp_fir_action0_reg = CAPP_FIR_ACTION0 + offset;
4344 	info->capp_fir_action1_reg = CAPP_FIR_ACTION1 + offset;
4345 	info->capp_err_status_ctrl_reg = CAPP_ERR_STATUS_CTRL + offset;
4346 
4347 	return OPAL_SUCCESS;
4348 }
4349 
phb4_init_capp_regs(struct phb4 * p,uint32_t capp_eng)4350 static void phb4_init_capp_regs(struct phb4 *p, uint32_t capp_eng)
4351 {
4352 	uint64_t addr, reg;
4353 	uint32_t offset;
4354 	uint8_t link_width_x16 = 1;
4355 
4356 	offset = PHB4_CAPP_REG_OFFSET(p);
4357 
4358 	/* Calculate the phb link width if card is attached to PEC2 */
4359 	if (p->index == CAPP1_PHB_INDEX) {
4360 		/* Check if PEC2 is in x8 or x16 mode.
4361 		 * PEC0 is always in x16
4362 		 */
4363 		addr = XPEC_P9_PCI_CPLT_CONF1 + 2 * XPEC_PCI_CPLT_OFFSET;
4364 		xscom_read(p->chip_id, addr, &reg);
4365 		link_width_x16 = ((reg & XPEC_P9_PCI_IOVALID_MASK) ==
4366 				  XPEC_P9_PCI_IOVALID_X16);
4367 	}
4368 
4369 	/* APC Master PowerBus Control Register */
4370 	xscom_read(p->chip_id, APC_MASTER_PB_CTRL + offset, &reg);
4371 	reg |= PPC_BIT(0); /* enable cResp exam */
4372 	reg |= PPC_BIT(3); /* disable vg not sys */
4373 	reg |= PPC_BIT(12);/* HW417025: disable capp virtual machines */
4374 	reg |= PPC_BIT(2); /* disable nn rn */
4375 	reg |= PPC_BIT(4); /* disable g */
4376 	reg |= PPC_BIT(5); /* disable ln */
4377 	xscom_write(p->chip_id, APC_MASTER_PB_CTRL + offset, reg);
4378 
4379 	/* Set PHB mode, HPC Dir State and P9 mode */
4380 	xscom_write(p->chip_id, APC_MASTER_CAPI_CTRL + offset,
4381 		    0x1772000000000000UL);
4382 	PHBINF(p, "CAPP: port attached\n");
4383 
4384 	/* Set snoop ttype decoding , dir size to 512K */
4385 	xscom_write(p->chip_id, SNOOP_CAPI_CONFIG + offset, 0x9000000000000000UL);
4386 
4387 	/* Use Read Epsilon Tier2 for all scopes.
4388 	 * Set Tier2 Read Epsilon.
4389 	 */
4390 	xscom_read(p->chip_id, SNOOP_CONTROL + offset, &reg);
4391 	reg |= PPC_BIT(0);
4392 	reg |= PPC_BIT(35);
4393 	reg |= PPC_BIT(45);
4394 	reg |= PPC_BIT(46);
4395 	reg |= PPC_BIT(47);
4396 	reg |= PPC_BIT(50);
4397 	xscom_write(p->chip_id, SNOOP_CONTROL + offset, reg);
4398 
4399 	/* Transport Control Register */
4400 	xscom_read(p->chip_id, TRANSPORT_CONTROL + offset, &reg);
4401 	if (p->index == CAPP0_PHB_INDEX) {
4402 		reg |= PPC_BIT(1); /* Send Packet Timer Value */
4403 		reg |= PPC_BITMASK(10, 13); /* Send Packet Timer Value */
4404 		reg &= ~PPC_BITMASK(14, 17); /* Set Max LPC CI store buffer to zeros */
4405 		reg &= ~PPC_BITMASK(18, 21); /* Set Max tlbi divider */
4406 		if (capp_eng & CAPP_MIN_STQ_ENGINES) {
4407 			/* 2 CAPP msg engines */
4408 			reg |= PPC_BIT(58);
4409 			reg |= PPC_BIT(59);
4410 			reg |= PPC_BIT(60);
4411 		}
4412 		if (capp_eng & CAPP_MAX_STQ_ENGINES) {
4413 			/* 14 CAPP msg engines */
4414 			reg |= PPC_BIT(60);
4415 		}
4416 		reg |= PPC_BIT(62);
4417 	}
4418 	if (p->index == CAPP1_PHB_INDEX) {
4419 		reg |= PPC_BIT(4); /* Send Packet Timer Value */
4420 		reg &= ~PPC_BIT(10); /* Set CI Store Buffer Threshold=5 */
4421 		reg |= PPC_BIT(11);  /* Set CI Store Buffer Threshold=5 */
4422 		reg &= ~PPC_BIT(12); /* Set CI Store Buffer Threshold=5 */
4423 		reg |= PPC_BIT(13);  /* Set CI Store Buffer Threshold=5 */
4424 		reg &= ~PPC_BITMASK(14, 17); /* Set Max LPC CI store buffer to zeros */
4425 		reg &= ~PPC_BITMASK(18, 21); /* Set Max tlbi divider */
4426 		if (capp_eng & CAPP_MIN_STQ_ENGINES) {
4427 			/* 2 CAPP msg engines */
4428 			reg |= PPC_BIT(59);
4429 			reg |= PPC_BIT(60);
4430 
4431 		} else if (capp_eng & CAPP_MAX_STQ_ENGINES) {
4432 
4433 			if (link_width_x16)
4434 				/* 14 CAPP msg engines */
4435 				reg |= PPC_BIT(60) | PPC_BIT(62);
4436 			else
4437 				/* 6 CAPP msg engines */
4438 				reg |= PPC_BIT(60);
4439 		}
4440 	}
4441 	xscom_write(p->chip_id, TRANSPORT_CONTROL + offset, reg);
4442 
4443 	/* The transport control register needs to be loaded in two
4444 	 * steps. Once the register values have been set, we have to
4445 	 * write bit 63 to a '1', which loads the register values into
4446 	 * the ci store buffer logic.
4447 	 */
4448 	xscom_read(p->chip_id, TRANSPORT_CONTROL + offset, &reg);
4449 	reg |= PPC_BIT(63);
4450 	xscom_write(p->chip_id, TRANSPORT_CONTROL + offset, reg);
4451 
4452 	/* Enable epoch timer */
4453 	xscom_write(p->chip_id, EPOCH_RECOVERY_TIMERS_CTRL + offset,
4454 		    0xC0000000FFF8FFE0UL);
4455 
4456 	/* Flush SUE State Map Register */
4457 	xscom_write(p->chip_id, FLUSH_SUE_STATE_MAP + offset,
4458 		    0x08020A0000000000UL);
4459 
4460 	/* Flush SUE uOP1 Register */
4461 	xscom_write(p->chip_id, FLUSH_SUE_UOP1 + offset,
4462 		    0xDCE0280428000000);
4463 
4464 	/* capp owns PHB read buffers */
4465 	if (p->index == CAPP0_PHB_INDEX) {
4466 		/* max PHB read buffers 0-47 */
4467 		reg = 0xFFFFFFFFFFFF0000UL;
4468 		if (capp_eng & CAPP_MAX_DMA_READ_ENGINES)
4469 			reg = 0xF000000000000000UL;
4470 		xscom_write(p->chip_id, APC_FSM_READ_MASK + offset, reg);
4471 		xscom_write(p->chip_id, XPT_FSM_RMM + offset, reg);
4472 	}
4473 	if (p->index == CAPP1_PHB_INDEX) {
4474 
4475 		if (capp_eng & CAPP_MAX_DMA_READ_ENGINES) {
4476 			reg = 0xF000000000000000ULL;
4477 		} else if (link_width_x16) {
4478 			/* 0-47 (Read machines) are available for
4479 			 * capp use
4480 			 */
4481 			reg = 0x0000FFFFFFFFFFFFULL;
4482 		} else {
4483 			/* Set 30 Read machines for CAPP Minus
4484 			 * 20-27 for DMA
4485 			 */
4486 			reg = 0xFFFFF00E00000000ULL;
4487 		}
4488 		xscom_write(p->chip_id, APC_FSM_READ_MASK + offset, reg);
4489 		xscom_write(p->chip_id, XPT_FSM_RMM + offset, reg);
4490 	}
4491 
4492 	/* CAPP FIR Action 0 */
4493 	xscom_write(p->chip_id, CAPP_FIR_ACTION0 + offset, 0x0b1c000104060000UL);
4494 
4495 	/* CAPP FIR Action 1 */
4496 	xscom_write(p->chip_id, CAPP_FIR_ACTION1 + offset, 0x2b9c0001240E0000UL);
4497 
4498 	/* CAPP FIR MASK */
4499 	xscom_write(p->chip_id, CAPP_FIR_MASK + offset, 0x80031f98d8717000UL);
4500 
4501 	/* Mask the CAPP PSL Credit Timeout Register error */
4502 	xscom_write_mask(p->chip_id, CAPP_FIR_MASK + offset,
4503 			 PPC_BIT(46), PPC_BIT(46));
4504 
4505 	/* Deassert TLBI_FENCED and tlbi_psl_is_dead */
4506 	xscom_write(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, 0);
4507 }
4508 
4509 /* override some inits with CAPI defaults */
phb4_init_capp_errors(struct phb4 * p)4510 static void phb4_init_capp_errors(struct phb4 *p)
4511 {
4512 	/* Init_77: TXE Error AIB Fence Enable Register */
4513 	if (phb4_is_dd20(p))
4514 		out_be64(p->regs + 0x0d30,	0xdfffbf0ff7ddfff0ull);
4515 	else
4516 		out_be64(p->regs + 0x0d30,	0xdff7bf0ff7ddfff0ull);
4517 	/* Init_86: RXE_ARB Error AIB Fence Enable Register */
4518 	out_be64(p->regs + 0x0db0,	0xfbffd7bbfb7fbfefull);
4519 
4520 	/* Init_95: RXE_MRG Error AIB Fence Enable Register */
4521 	out_be64(p->regs + 0x0e30,	0xfffffeffff7fff57ull);
4522 
4523 	/* Init_104: RXE_TCE Error AIB Fence Enable Register */
4524 	out_be64(p->regs + 0x0eb0,	0xffaeffafffffffffull);
4525 
4526 	/* Init_113: PHB Error AIB Fence Enable Register */
4527 	out_be64(p->regs + 0x0cb0,	0x35777073ff000000ull);
4528 }
4529 
4530 /*
4531  * The capi, NBW and ASN indicators are used only on P9 to flag some
4532  * types of incoming traffic for the PHB and have been removed on P10.
4533  *
4534  * The capi indicator is over the 8 most significant bits (and
4535  * not 16). We stay away from bits 59 (TVE select), 60 and 61 (MSI)
4536  *
4537  * For the mask, we keep bit 59 in, as capi messages must hit TVE#0.
4538  * Bit 56 is not part of the mask, so that a NBW message (see below)
4539  * is also considered a capi message.
4540  */
4541 #define CAPIIND		0x0200
4542 #define CAPIMASK	0xFE00
4543 
4544 /*
4545  * Non-Blocking Write messages are a subset of capi messages, so the
4546  * indicator is the same as capi + an extra bit (56) to differentiate.
4547  * Mask is the same as capi + the extra bit
4548  */
4549 #define NBWIND		0x0300
4550 #define NBWMASK		0xFF00
4551 
4552 /*
4553  * The ASN indicator is used for tunneled operations (as_notify and
4554  * atomics).  Tunneled operation messages can be sent in PCI mode as
4555  * well as CAPI mode.
4556  *
4557  * The format of those messages is specific and, for as_notify
4558  * messages, the address field is hijacked to encode the LPID/PID/TID
4559  * of the target thread, so those messages should not go through
4560  * translation. They must hit TVE#1. Therefore bit 59 is part of the
4561  * indicator.
4562  */
4563 #define ASNIND		0x0C00
4564 #define ASNMASK		0xFF00
4565 
4566 /* Power Bus Common Queue Registers
4567  * All PBCQ and PBAIB registers are accessed via SCOM
4568  * NestBase = 4010C00 for PEC0
4569  *            4011000 for PEC1
4570  *            4011400 for PEC2
4571  * PCIBase  = D010800 for PE0
4572  *            E010800 for PE1
4573  *            F010800 for PE2
4574  *
4575  * Some registers are shared amongst all of the stacks and will only
4576  * have 1 copy. Other registers are implemented one per stack.
4577  * Registers that are duplicated will have an additional offset
4578  * of “StackBase” so that they have a unique address.
4579  * Stackoffset = 00000040 for Stack0
4580  *             = 00000080 for Stack1
4581  *             = 000000C0 for Stack2
4582  */
enable_capi_mode(struct phb4 * p,uint64_t pe_number,uint32_t capp_eng)4583 static int64_t enable_capi_mode(struct phb4 *p, uint64_t pe_number,
4584 				uint32_t capp_eng)
4585 {
4586 	uint64_t addr, reg, start_addr, end_addr, stq_eng, dma_eng;
4587 	uint64_t mbt0, mbt1;
4588 	int i, window_num = -1;
4589 
4590 	/* CAPP Control Register */
4591 	xscom_read(p->chip_id, p->pe_xscom + XPEC_NEST_CAPP_CNTL, &reg);
4592 	if (reg & PPC_BIT(0)) {
4593 		PHBDBG(p, "Already in CAPP mode\n");
4594 	}
4595 
4596 	for (i = 0; i < 500000; i++) {
4597 		/* PBCQ General Status Register */
4598 		xscom_read(p->chip_id,
4599 			   p->pe_stk_xscom + XPEC_NEST_STK_PBCQ_STAT,
4600 			   &reg);
4601 		if (!(reg & 0xC000000000000000UL))
4602 			break;
4603 		time_wait_us(10);
4604 	}
4605 	if (reg & 0xC000000000000000UL) {
4606 		PHBERR(p, "CAPP: Timeout waiting for pending transaction\n");
4607 		return OPAL_HARDWARE;
4608 	}
4609 
4610 	stq_eng = 0x0000000000000000ULL;
4611 	dma_eng = 0x0000000000000000ULL;
4612 	if (p->index == CAPP0_PHB_INDEX) {
4613 		/* PBCQ is operating as a x16 stack
4614 		 * - The maximum number of engines give to CAPP will be
4615 		 * 14 and will be assigned in the order of STQ 15 to 2.
4616 		 * - 0-47 (Read machines) are available for capp use.
4617 		 */
4618 		stq_eng = 0x000E000000000000ULL; /* 14 CAPP msg engines */
4619 		dma_eng = 0x0000FFFFFFFFFFFFULL; /* 48 CAPP Read machines */
4620 	}
4621 
4622 	if (p->index == CAPP1_PHB_INDEX) {
4623 		/* Check if PEC is in x8 or x16 mode */
4624 		addr = XPEC_P9_PCI_CPLT_CONF1 + 2 * XPEC_PCI_CPLT_OFFSET;
4625 		xscom_read(p->chip_id, addr, &reg);
4626 		if ((reg & XPEC_P9_PCI_IOVALID_MASK) == XPEC_P9_PCI_IOVALID_X16) {
4627 			/* PBCQ is operating as a x16 stack
4628 			 * - The maximum number of engines give to CAPP will be
4629 			 * 14 and will be assigned in the order of STQ 15 to 2.
4630 			 * - 0-47 (Read machines) are available for capp use.
4631 			 */
4632 			stq_eng = 0x000E000000000000ULL;
4633 			dma_eng = 0x0000FFFFFFFFFFFFULL;
4634 		} else {
4635 
4636 			/* PBCQ is operating as a x8 stack
4637 			 * - The maximum number of engines given to CAPP should
4638 			 * be 6 and will be assigned in the order of 7 to 2.
4639 			 * - 0-30 (Read machines) are available for capp use.
4640 			 */
4641 			stq_eng = 0x0006000000000000ULL;
4642 			/* 30 Read machines for CAPP Minus 20-27 for DMA */
4643 			dma_eng = 0x0000FFFFF00E0000ULL;
4644 		}
4645 	}
4646 
4647 	if (capp_eng & CAPP_MIN_STQ_ENGINES)
4648 		stq_eng = 0x0002000000000000ULL; /* 2 capp msg engines */
4649 
4650 	/* CAPP Control Register. Enable CAPP Mode */
4651 	reg = 0x8000000000000000ULL; /* PEC works in CAPP Mode */
4652 	reg |= stq_eng;
4653 	if (capp_eng & CAPP_MAX_DMA_READ_ENGINES)
4654 		dma_eng = 0x0000F00000000000ULL; /* 4 CAPP Read machines */
4655 	reg |= dma_eng;
4656 	xscom_write(p->chip_id, p->pe_xscom + XPEC_NEST_CAPP_CNTL, reg);
4657 
4658 	/* PEC2 has 3 ETU's + 16 pci lanes that can operate as x16,
4659 	 * x8+x8 (bifurcated) or x8+x4+x4 (trifurcated) mode. When
4660 	 * Mellanox CX5 card is attached to stack0 of this PEC, indicated by
4661 	 * request to allocate CAPP_MAX_DMA_READ_ENGINES; we tweak the default
4662 	 * dma-read engines allocations to maximize the DMA read performance
4663 	 */
4664 	if ((p->index == CAPP1_PHB_INDEX) &&
4665 	    (capp_eng & CAPP_MAX_DMA_READ_ENGINES))
4666 		phb4_pec2_dma_engine_realloc(p);
4667 
4668 	/* PCI to PB data movement ignores the PB init signal. */
4669 	xscom_write_mask(p->chip_id, p->pe_xscom + XPEC_NEST_PBCQ_HW_CONFIG,
4670 			 XPEC_NEST_PBCQ_HW_CONFIG_PBINIT,
4671 			 XPEC_NEST_PBCQ_HW_CONFIG_PBINIT);
4672 
4673 	/* If pump mode is enabled don't do nodal broadcasts.
4674 	 */
4675 	xscom_read(p->chip_id, PB_CENT_HP_MODE_CURR, &reg);
4676 	if (reg & PB_CFG_PUMP_MODE) {
4677 		reg = XPEC_NEST_PBCQ_HW_CONFIG_DIS_NODAL;
4678 		reg |= XPEC_NEST_PBCQ_HW_CONFIG_DIS_RNNN;
4679 		xscom_write_mask(p->chip_id,
4680 				 p->pe_xscom + XPEC_NEST_PBCQ_HW_CONFIG,
4681 				 reg, reg);
4682 	}
4683 
4684 	/* PEC Phase 4 (PHB) registers adjustment
4685 	 * Inbound CAPP traffic: The CAPI can send both CAPP packets and
4686 	 * I/O packets. A PCIe packet is indentified as a CAPP packet in
4687 	 * the PHB if the PCIe address matches either the CAPI
4688 	 * Compare/Mask register or its NBW Compare/Mask register.
4689 	 */
4690 
4691 	/*
4692 	 * Bit [0:7] XSL_DSNCTL[capiind]
4693 	 * Init_26 - CAPI Compare/Mask
4694 	 */
4695 	out_be64(p->regs + PHB_CAPI_CMPM,
4696 		 ((u64)CAPIIND << 48) |
4697 		 ((u64)CAPIMASK << 32) | PHB_CAPI_CMPM_ENABLE);
4698 
4699 	/* PB AIB Hardware Control Register
4700 	 * Wait 32 PCI clocks for a credit to become available
4701 	 * before rejecting.
4702 	 */
4703 	xscom_read(p->chip_id, p->pci_xscom + XPEC_PCI_PBAIB_HW_CONFIG, &reg);
4704 	reg |= PPC_BITMASK(40, 42);
4705 	if (p->index == CAPP1_PHB_INDEX)
4706 		reg |= PPC_BIT(30);
4707 	xscom_write(p->chip_id, p->pci_xscom + XPEC_PCI_PBAIB_HW_CONFIG, reg);
4708 
4709 	/* non-translate/50-bit mode */
4710 	out_be64(p->regs + PHB_NXLATE_PREFIX, 0x0000000000000000Ull);
4711 
4712 	/* set tve no translate mode allow mmio window */
4713 	memset(p->tve_cache, 0x0, sizeof(p->tve_cache));
4714 
4715 	/*
4716 	 * In 50-bit non-translate mode, the fields of the TVE are
4717 	 * used to perform an address range check. In this mode TCE
4718 	 * Table Size(0) must be a '1' (TVE[51] = 1)
4719 	 *      PCI Addr(49:24) >= TVE[52:53]+TVE[0:23] and
4720 	 *      PCI Addr(49:24) < TVE[54:55]+TVE[24:47]
4721 	 *
4722 	 * TVE[51] = 1
4723 	 * TVE[56] = 1: 50-bit Non-Translate Mode Enable
4724 	 * TVE[0:23] = 0x000000
4725 	 * TVE[24:47] = 0xFFFFFF
4726 	 *
4727 	 * capi dma mode: CAPP DMA mode needs access to all of memory
4728 	 * capi mode: Allow address range (bit 14 = 1)
4729 	 *            0x0002000000000000: 0x0002FFFFFFFFFFFF
4730 	 *            TVE[52:53] = '10' and TVE[54:55] = '10'
4731 	 */
4732 
4733 	/* TVT#0: CAPI window + DMA, all memory */
4734 	start_addr = 0ull;
4735 	end_addr   = 0x0003ffffffffffffull;
4736 	p->tve_cache[pe_number * 2] =
4737 		tve_encode_50b_noxlate(start_addr, end_addr);
4738 
4739 	/* TVT#1: CAPI window + DMA, all memory, in bypass mode */
4740 	start_addr = (1ull << 59);
4741 	end_addr   = start_addr + 0x0003ffffffffffffull;
4742 	p->tve_cache[pe_number * 2 + 1] =
4743 		tve_encode_50b_noxlate(start_addr, end_addr);
4744 
4745 	phb4_ioda_sel(p, IODA3_TBL_TVT, 0, true);
4746 	for (i = 0; i < p->tvt_size; i++)
4747 		out_be64(p->regs + PHB_IODA_DATA0, p->tve_cache[i]);
4748 
4749 	/*
4750 	 * Since TVT#0 is in by-pass mode, disable 32-bit MSI, as a
4751 	 * DMA write targeting 0x00000000FFFFxxxx would be interpreted
4752 	 * as a 32-bit MSI
4753 	 */
4754 	reg = in_be64(p->regs + PHB_PHB4_CONFIG);
4755 	reg &= ~PHB_PHB4C_32BIT_MSI_EN;
4756 	out_be64(p->regs + PHB_PHB4_CONFIG, reg);
4757 
4758 	/* set mbt bar to pass capi mmio window and keep the other
4759 	 * mmio values
4760 	 */
4761 	mbt0 = IODA3_MBT0_ENABLE | IODA3_MBT0_TYPE_M64 |
4762 	       SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_SINGLE_PE) |
4763 	       SETFIELD(IODA3_MBT0_MDT_COLUMN, 0ull, 0) |
4764 	       (0x0002000000000000ULL & IODA3_MBT0_BASE_ADDR);
4765 
4766 	mbt1 = IODA3_MBT1_ENABLE |
4767 	       (0x00ff000000000000ULL & IODA3_MBT1_MASK) |
4768 	       SETFIELD(IODA3_MBT1_SINGLE_PE_NUM, 0ull, pe_number);
4769 
4770 	for (i = 0; i < p->mbt_size; i++) {
4771 		/* search if the capi mmio window is already present */
4772 		if ((p->mbt_cache[i][0] == mbt0) &&
4773 		    (p->mbt_cache[i][1] == mbt1))
4774 			break;
4775 
4776 		/* search a free entry */
4777 		if ((window_num == -1) &&
4778 		   ((!(p->mbt_cache[i][0] & IODA3_MBT0_ENABLE)) &&
4779 		    (!(p->mbt_cache[i][1] & IODA3_MBT1_ENABLE))))
4780 			window_num = i;
4781 	}
4782 
4783 	if (window_num >= 0 && i == p->mbt_size) {
4784 		/* no capi mmio window found, so add it */
4785 		p->mbt_cache[window_num][0] = mbt0;
4786 		p->mbt_cache[window_num][1] = mbt1;
4787 
4788 		phb4_ioda_sel(p, IODA3_TBL_MBT, window_num << 1, true);
4789 		out_be64(p->regs + PHB_IODA_DATA0, mbt0);
4790 		out_be64(p->regs + PHB_IODA_DATA0, mbt1);
4791 	} else if (i == p->mbt_size) {
4792 		/* mbt cache full, this case should never happen */
4793 		PHBERR(p, "CAPP: Failed to add CAPI mmio window\n");
4794 	} else {
4795 		/* duplicate entry. Nothing to do */
4796 	}
4797 
4798 	phb4_init_capp_errors(p);
4799 
4800 	phb4_init_capp_regs(p, capp_eng);
4801 
4802 	if (!chiptod_capp_timebase_sync(p->chip_id, CAPP_TFMR,
4803 					CAPP_TB,
4804 					PHB4_CAPP_REG_OFFSET(p)))
4805 		PHBERR(p, "CAPP: Failed to sync timebase\n");
4806 
4807 	/* set callbacks to handle HMI events */
4808 	capi_ops.get_capp_info = &phb4_get_capp_info;
4809 
4810 	return OPAL_SUCCESS;
4811 }
4812 
4813 
phb4_init_capp(struct phb4 * p)4814 static int64_t phb4_init_capp(struct phb4 *p)
4815 {
4816 	struct capp *capp;
4817 	int rc;
4818 
4819 	if (p->index != CAPP0_PHB_INDEX &&
4820 	    p->index != CAPP1_PHB_INDEX)
4821 		return OPAL_UNSUPPORTED;
4822 
4823 	capp = zalloc(sizeof(struct capp));
4824 	if (capp == NULL)
4825 		return OPAL_NO_MEM;
4826 
4827 	if (p->index == CAPP0_PHB_INDEX) {
4828 		capp->capp_index = 0;
4829 		capp->capp_xscom_offset = 0;
4830 
4831 	} else if (p->index == CAPP1_PHB_INDEX) {
4832 		capp->capp_index = 1;
4833 		capp->capp_xscom_offset = CAPP1_REG_OFFSET;
4834 	}
4835 
4836 	capp->attached_pe = phb4_get_reserved_pe_number(&p->phb);
4837 	capp->chip_id = p->chip_id;
4838 
4839 	/* Load capp microcode into the capp unit */
4840 	rc = load_capp_ucode(p);
4841 
4842 	if (rc == OPAL_SUCCESS)
4843 		p->capp = capp;
4844 	else
4845 		free(capp);
4846 
4847 	return rc;
4848 }
4849 
phb4_set_capi_mode(struct phb * phb,uint64_t mode,uint64_t pe_number)4850 static int64_t phb4_set_capi_mode(struct phb *phb, uint64_t mode,
4851 				  uint64_t pe_number)
4852 {
4853 	struct phb4 *p = phb_to_phb4(phb);
4854 	struct proc_chip *chip = get_chip(p->chip_id);
4855 	struct capp *capp = p->capp;
4856 	uint64_t reg, ret;
4857 
4858 	/* No CAPI on P10. OpenCAPI only */
4859 	if (is_phb5())
4860 		return OPAL_UNSUPPORTED;
4861 
4862 	/* cant do a mode switch when capp is in recovery mode */
4863 	ret = capp_xscom_read(capp, CAPP_ERR_STATUS_CTRL, &reg);
4864 	if (ret != OPAL_SUCCESS)
4865 		return ret;
4866 
4867 	if ((reg & PPC_BIT(0)) && (!(reg & PPC_BIT(1)))) {
4868 		PHBDBG(p, "CAPP: recovery in progress\n");
4869 		return OPAL_BUSY;
4870 	}
4871 
4872 
4873 	switch (mode) {
4874 
4875 	case OPAL_PHB_CAPI_MODE_DMA: /* Enabled by default on p9 */
4876 	case OPAL_PHB_CAPI_MODE_SNOOP_ON:
4877 		/* nothing to do on P9 if CAPP is already enabled */
4878 		ret = p->capp->phb ? OPAL_SUCCESS : OPAL_UNSUPPORTED;
4879 		break;
4880 
4881 	case OPAL_PHB_CAPI_MODE_SNOOP_OFF:
4882 		ret = p->capp->phb ? OPAL_UNSUPPORTED : OPAL_SUCCESS;
4883 		break;
4884 
4885 	case OPAL_PHB_CAPI_MODE_PCIE:
4886 		if (p->flags & PHB4_CAPP_DISABLE) {
4887 			/* We are in middle of a CAPP disable */
4888 			ret = OPAL_BUSY;
4889 
4890 		} else if (capp->phb) {
4891 			/* Kick start a creset */
4892 			p->flags |= PHB4_CAPP_DISABLE;
4893 			PHBINF(p, "CAPP: PCIE mode needs a cold-reset\n");
4894 			/* Kick off the pci state machine */
4895 			ret = phb4_creset(phb->slot);
4896 			ret = ret > 0 ? OPAL_BUSY : ret;
4897 
4898 		} else {
4899 			/* PHB already in PCI mode */
4900 			ret = OPAL_SUCCESS;
4901 		}
4902 		break;
4903 
4904 	case OPAL_PHB_CAPI_MODE_CAPI: /* Fall Through */
4905 	case OPAL_PHB_CAPI_MODE_DMA_TVT1:
4906 		/* Make sure that PHB is not disabling CAPP */
4907 		if (p->flags & PHB4_CAPP_DISABLE) {
4908 			PHBERR(p, "CAPP: Disable in progress\n");
4909 			ret = OPAL_BUSY;
4910 			break;
4911 		}
4912 
4913 		/* Check if ucode is available */
4914 		if (!capp_ucode_loaded(chip, p->index)) {
4915 			PHBERR(p, "CAPP: ucode not loaded\n");
4916 			ret = OPAL_RESOURCE;
4917 			break;
4918 		}
4919 
4920 		/*
4921 		 * Mark the CAPP attached to the PHB right away so that
4922 		 * if a MCE happens during CAPP init we can handle it.
4923 		 * In case of an error in CAPP init we remove the PHB
4924 		 * from the attached_mask later.
4925 		 */
4926 		capp->phb = phb;
4927 		capp->attached_pe = pe_number;
4928 
4929 		if (mode == OPAL_PHB_CAPI_MODE_DMA_TVT1)
4930 			ret = enable_capi_mode(p, pe_number,
4931 					       CAPP_MIN_STQ_ENGINES |
4932 					       CAPP_MAX_DMA_READ_ENGINES);
4933 
4934 		else
4935 			ret = enable_capi_mode(p, pe_number,
4936 					       CAPP_MAX_STQ_ENGINES |
4937 					       CAPP_MIN_DMA_READ_ENGINES);
4938 		if (ret == OPAL_SUCCESS) {
4939 			/* register notification on system shutdown */
4940 			opal_add_host_sync_notifier(&phb4_host_sync_reset, p);
4941 
4942 		} else {
4943 			/* In case of an error mark the PHB detached */
4944 			capp->phb = NULL;
4945 			capp->attached_pe = phb4_get_reserved_pe_number(phb);
4946 		}
4947 		break;
4948 
4949 	default:
4950 		ret = OPAL_UNSUPPORTED;
4951 		break;
4952 	};
4953 
4954 	return ret;
4955 }
4956 
phb4_p2p_set_initiator(struct phb4 * p,uint16_t pe_number)4957 static void phb4_p2p_set_initiator(struct phb4 *p, uint16_t pe_number)
4958 {
4959 	uint64_t tve;
4960 	uint16_t window_id = (pe_number << 1) + 1;
4961 
4962 	/*
4963 	 * Initiator needs access to the MMIO space of the target,
4964 	 * which is well beyond the 'normal' memory area. Set its TVE
4965 	 * with no range checking.
4966 	 */
4967 	PHBDBG(p, "Setting TVE#1 for peer-to-peer for pe %d\n", pe_number);
4968 	tve = PPC_BIT(51);
4969 	phb4_ioda_sel(p, IODA3_TBL_TVT, window_id, false);
4970 	out_be64(p->regs + PHB_IODA_DATA0, tve);
4971 	p->tve_cache[window_id] = tve;
4972 }
4973 
phb4_p2p_set_target(struct phb4 * p,bool enable)4974 static void phb4_p2p_set_target(struct phb4 *p, bool enable)
4975 {
4976 	uint64_t val;
4977 
4978 	/*
4979 	 * Enabling p2p on a target PHB reserves an outbound (as seen
4980 	 * from the CPU) store queue for p2p
4981 	 */
4982 	PHBDBG(p, "%s peer-to-peer\n", (enable ? "Enabling" : "Disabling"));
4983 	xscom_read(p->chip_id,
4984 		p->pe_stk_xscom + XPEC_NEST_STK_PBCQ_MODE, &val);
4985 	if (enable)
4986 		val |= XPEC_NEST_STK_PBCQ_MODE_P2P;
4987 	else
4988 		val &= ~XPEC_NEST_STK_PBCQ_MODE_P2P;
4989 	xscom_write(p->chip_id,
4990 		p->pe_stk_xscom + XPEC_NEST_STK_PBCQ_MODE, val);
4991 }
4992 
phb4_set_p2p(struct phb * phb,uint64_t mode,uint64_t flags,uint16_t pe_number)4993 static void phb4_set_p2p(struct phb *phb, uint64_t mode, uint64_t flags,
4994 			uint16_t pe_number)
4995 {
4996 	struct phb4 *p = phb_to_phb4(phb);
4997 
4998 	switch (mode) {
4999 	case OPAL_PCI_P2P_INITIATOR:
5000 		if (flags & OPAL_PCI_P2P_ENABLE)
5001 			phb4_p2p_set_initiator(p, pe_number);
5002 		/*
5003 		 * When disabling p2p on the initiator, we should
5004 		 * reset the TVE to its default bypass setting, but it
5005 		 * is more easily done from the OS, as it knows the
5006 		 * the start and end address and there's already an
5007 		 * opal call for it, so let linux handle it.
5008 		 */
5009 		break;
5010 	case OPAL_PCI_P2P_TARGET:
5011 		phb4_p2p_set_target(p, !!(flags & OPAL_PCI_P2P_ENABLE));
5012 		break;
5013 	default:
5014 		assert(0);
5015 	}
5016 }
5017 
phb4_set_capp_recovery(struct phb * phb)5018 static int64_t phb4_set_capp_recovery(struct phb *phb)
5019 {
5020 	struct phb4 *p = phb_to_phb4(phb);
5021 
5022 	if (p->flags & PHB4_CAPP_RECOVERY)
5023 		return 0;
5024 
5025 	/* set opal event flag to indicate eeh condition */
5026 	opal_update_pending_evt(OPAL_EVENT_PCI_ERROR,
5027 				OPAL_EVENT_PCI_ERROR);
5028 
5029 	p->flags |= PHB4_CAPP_RECOVERY;
5030 
5031 	return 0;
5032 }
5033 
5034 /*
5035  * Return the address out of a PBCQ Tunnel Bar register.
5036  */
phb4_get_tunnel_bar(struct phb * phb,uint64_t * addr)5037 static void phb4_get_tunnel_bar(struct phb *phb, uint64_t *addr)
5038 {
5039 	struct phb4 *p = phb_to_phb4(phb);
5040 	uint64_t val;
5041 
5042 	xscom_read(p->chip_id, p->pe_stk_xscom + XPEC_NEST_STK_TUNNEL_BAR,
5043 		   &val);
5044 	*addr = val >> 8;
5045 }
5046 
5047 /*
5048  * Set PBCQ Tunnel Bar register.
5049  * Store addr bits [8:50] in PBCQ Tunnel Bar register bits [0:42].
5050  * Note that addr bits [8:50] must also match PSL_TNR_ADDR[8:50].
5051  * Reset register if val == 0.
5052  *
5053  * This interface is required to let device drivers set the Tunnel Bar
5054  * value of their choice.
5055  *
5056  * Compatibility with older versions of linux, that do not set the
5057  * Tunnel Bar with phb4_set_tunnel_bar(), is ensured by enable_capi_mode(),
5058  * that will set the default value that used to be assumed.
5059  */
phb4_set_tunnel_bar(struct phb * phb,uint64_t addr)5060 static int64_t phb4_set_tunnel_bar(struct phb *phb, uint64_t addr)
5061 {
5062 	struct phb4 *p = phb_to_phb4(phb);
5063 	uint64_t mask = 0x00FFFFFFFFFFE000ULL;
5064 
5065 	if (!addr) {
5066 		/* Reset register */
5067 		xscom_write(p->chip_id,
5068 			    p->pe_stk_xscom + XPEC_NEST_STK_TUNNEL_BAR, addr);
5069 		return OPAL_SUCCESS;
5070 	}
5071 	if ((addr & ~mask))
5072 		return OPAL_PARAMETER;
5073 	if (!(addr & mask))
5074 		return OPAL_PARAMETER;
5075 
5076 	xscom_write(p->chip_id, p->pe_stk_xscom + XPEC_NEST_STK_TUNNEL_BAR,
5077 		    (addr & mask) << 8);
5078 	return OPAL_SUCCESS;
5079 }
5080 
5081 static const struct phb_ops phb4_ops = {
5082 	.cfg_read8		= phb4_pcicfg_read8,
5083 	.cfg_read16		= phb4_pcicfg_read16,
5084 	.cfg_read32		= phb4_pcicfg_read32,
5085 	.cfg_write8		= phb4_pcicfg_write8,
5086 	.cfg_write16		= phb4_pcicfg_write16,
5087 	.cfg_write32		= phb4_pcicfg_write32,
5088 	.get_reserved_pe_number	= phb4_get_reserved_pe_number,
5089 	.device_init		= phb4_device_init,
5090 	.device_remove		= NULL,
5091 	.ioda_reset		= phb4_ioda_reset,
5092 	.papr_errinjct_reset	= phb4_papr_errinjct_reset,
5093 	.pci_reinit		= phb4_pci_reinit,
5094 	.set_phb_mem_window	= phb4_set_phb_mem_window,
5095 	.phb_mmio_enable	= phb4_phb_mmio_enable,
5096 	.map_pe_mmio_window	= phb4_map_pe_mmio_window,
5097 	.map_pe_dma_window	= phb4_map_pe_dma_window,
5098 	.map_pe_dma_window_real = phb4_map_pe_dma_window_real,
5099 	.set_option		= phb4_set_option,
5100 	.get_option		= phb4_get_option,
5101 	.set_xive_pe		= phb4_set_ive_pe,
5102 	.get_msi_32		= phb4_get_msi_32,
5103 	.get_msi_64		= phb4_get_msi_64,
5104 	.set_pe			= phb4_set_pe,
5105 	.set_peltv		= phb4_set_peltv,
5106 	.eeh_freeze_status	= phb4_eeh_freeze_status,
5107 	.eeh_freeze_clear	= phb4_eeh_freeze_clear,
5108 	.eeh_freeze_set		= phb4_eeh_freeze_set,
5109 	.next_error		= phb4_eeh_next_error,
5110 	.err_inject		= phb4_err_inject,
5111 	.get_diag_data2		= phb4_get_diag_data,
5112 	.tce_kill		= phb4_tce_kill,
5113 	.set_capi_mode		= phb4_set_capi_mode,
5114 	.set_p2p		= phb4_set_p2p,
5115 	.set_capp_recovery	= phb4_set_capp_recovery,
5116 	.get_tunnel_bar         = phb4_get_tunnel_bar,
5117 	.set_tunnel_bar         = phb4_set_tunnel_bar,
5118 };
5119 
phb4_init_ioda3(struct phb4 * p)5120 static void phb4_init_ioda3(struct phb4 *p)
5121 {
5122 	if (is_phb5()) {
5123 		/*
5124 		 * When ABT is on, the MSIs on the PHB use the PQ state bits
5125 		 * of the IC and MSI triggers from the PHB are forwarded
5126 		 * directly to the IC ESB page. However, the LSIs are still
5127 		 * controlled locally on the PHB and LSI triggers use a
5128 		 * special offset for trigger injection.
5129 		 */
5130 		if (phb_abt_mode(p)) {
5131 			uint64_t mmio_base = xive2_get_esb_base(p->base_msi);
5132 
5133 			PHBDBG(p, "Using ABT mode. ESB: 0x%016llx\n", mmio_base);
5134 
5135 			/* Init_18 - Interrupt Notify Base Address */
5136 			out_be64(p->regs + PHB_INT_NOTIFY_ADDR,
5137 				 PHB_INT_NOTIFY_ADDR_64K | mmio_base);
5138 
5139 			/* Interrupt Notify Base Index is unused */
5140 		} else {
5141 			p->irq_port = xive2_get_notify_port(p->chip_id,
5142 						XIVE_HW_SRC_PHBn(p->index));
5143 
5144 			PHBDBG(p, "Using IC notif page at 0x%016llx\n",
5145 						p->irq_port);
5146 
5147 			/* Init_18 - Interrupt Notify Base Address */
5148 			out_be64(p->regs + PHB_INT_NOTIFY_ADDR, p->irq_port);
5149 
5150 			/* Init_19 - Interrupt Notify Base Index */
5151 			out_be64(p->regs + PHB_INT_NOTIFY_INDEX,
5152 				 xive2_get_notify_base(p->base_msi));
5153 		}
5154 
5155 	} else { /* p9 */
5156 		p->irq_port = xive_get_notify_port(p->chip_id,
5157 						   XIVE_HW_SRC_PHBn(p->index));
5158 		/* Init_18 - Interrupt Notify Base Address */
5159 		out_be64(p->regs + PHB_INT_NOTIFY_ADDR, p->irq_port);
5160 
5161 		/* Init_19 - Interrupt Notify Base Index */
5162 		out_be64(p->regs + PHB_INT_NOTIFY_INDEX,
5163 			 xive_get_notify_base(p->base_msi));
5164 	}
5165 
5166 	/* Init_19x - Not in spec: Initialize source ID */
5167 	PHBDBG(p, "Reset state SRC_ID: %016llx\n",
5168 	       in_be64(p->regs + PHB_LSI_SOURCE_ID));
5169 	out_be64(p->regs + PHB_LSI_SOURCE_ID,
5170 		 SETFIELD(PHB_LSI_SRC_ID, 0ull, (p->num_irqs - 1) >> 3));
5171 
5172 	/* Init_20 - RTT BAR */
5173 	out_be64(p->regs + PHB_RTT_BAR, (u64) p->tbl_rtt | PHB_RTT_BAR_ENABLE);
5174 
5175 	/* Init_21 - PELT-V BAR */
5176 	out_be64(p->regs + PHB_PELTV_BAR,
5177 		 (u64) p->tbl_peltv | PHB_PELTV_BAR_ENABLE);
5178 
5179 	/* Init_22 - Setup M32 starting address */
5180 	out_be64(p->regs + PHB_M32_START_ADDR, M32_PCI_START);
5181 
5182 	/* Init_23 - Setup PEST BAR */
5183 	out_be64(p->regs + PHB_PEST_BAR,
5184 		 p->tbl_pest | PHB_PEST_BAR_ENABLE);
5185 
5186 	/* Init_24 - CRW Base Address Reg */
5187 	/* See enable_capi_mode() */
5188 
5189 	if (is_phb4()) {
5190 		/* Init_25 - ASN Compare/Mask - P9 only */
5191 		out_be64(p->regs + PHB_ASN_CMPM, ((u64)ASNIND << 48) |
5192 			 ((u64)ASNMASK << 32) | PHB_ASN_CMPM_ENABLE);
5193 	}
5194 
5195 	/* Init_26 - CAPI Compare/Mask */
5196 	/* See enable_capi_mode() */
5197 	/* if CAPP being disabled then reset CAPI Compare/Mask Register */
5198 	if (p->flags & PHB4_CAPP_DISABLE)
5199 		out_be64(p->regs + PHB_CAPI_CMPM, 0);
5200 
5201 	/* Init_27 - PCIE Outbound upper address */
5202 	out_be64(p->regs + PHB_M64_UPPER_BITS, 0);
5203 
5204 	/* Init_28 - PHB4 Configuration */
5205 	out_be64(p->regs + PHB_PHB4_CONFIG,
5206 		 PHB_PHB4C_32BIT_MSI_EN |
5207 		 PHB_PHB4C_64BIT_MSI_EN);
5208 
5209 	/* Init_29 - At least 256ns delay according to spec. Do a dummy
5210 	 * read first to flush posted writes
5211 	 */
5212 	in_be64(p->regs + PHB_PHB4_CONFIG);
5213 	time_wait_us(2);
5214 
5215 	/* Init_30..41 - On-chip IODA tables init */
5216 	phb4_ioda_reset(&p->phb, false);
5217 }
5218 
5219 /* phb4_init_rc - Initialize the Root Complex config space
5220  */
phb4_init_rc_cfg(struct phb4 * p)5221 static bool phb4_init_rc_cfg(struct phb4 *p)
5222 {
5223 	int64_t ecap, aercap;
5224 
5225 	/* XXX Handle errors ? */
5226 
5227 	/* Init_46:
5228 	 *
5229 	 * Set primary bus to 0, secondary to 1 and subordinate to 0xff
5230 	 */
5231 	phb4_pcicfg_write32(&p->phb, 0, PCI_CFG_PRIMARY_BUS, 0x00ff0100);
5232 
5233 	/* Init_47 - Clear errors */
5234 	/* see phb4_rc_err_clear() called below */
5235 
5236 	/* Init_48
5237 	 *
5238 	 * PCIE Device control/status, enable error reporting, disable relaxed
5239 	 * ordering, set MPS to 128 (see note), clear errors.
5240 	 *
5241 	 * Note: The doc recommends to set MPS to 512. This has proved to have
5242 	 * some issues as it requires specific clamping of MRSS on devices and
5243 	 * we've found devices in the field that misbehave when doing that.
5244 	 *
5245 	 * We currently leave it all to 128 bytes (minimum setting) at init
5246 	 * time. The generic PCIe probing later on might apply a different
5247 	 * value, or the kernel will, but we play it safe at early init
5248 	 */
5249 	if (p->ecap <= 0) {
5250 		ecap = pci_find_cap(&p->phb, 0, PCI_CFG_CAP_ID_EXP);
5251 		if (ecap < 0) {
5252 			PHBERR(p, "Can't locate PCI-E capability\n");
5253 			return false;
5254 		}
5255 		p->ecap = ecap;
5256 	} else {
5257 		ecap = p->ecap;
5258 	}
5259 
5260 	phb4_pcicfg_write16(&p->phb, 0, ecap + PCICAP_EXP_DEVCTL,
5261 			     PCICAP_EXP_DEVCTL_CE_REPORT	|
5262 			     PCICAP_EXP_DEVCTL_NFE_REPORT	|
5263 			     PCICAP_EXP_DEVCTL_FE_REPORT	|
5264 			     PCICAP_EXP_DEVCTL_UR_REPORT	|
5265 			     SETFIELD(PCICAP_EXP_DEVCTL_MPS, 0, PCIE_MPS_128B));
5266 
5267 	/* Init_49 - Device Control/Status 2 */
5268 	phb4_pcicfg_write16(&p->phb, 0, ecap + PCICAP_EXP_DCTL2,
5269 			     SETFIELD(PCICAP_EXP_DCTL2_CMPTOUT, 0, 0x5) |
5270 			     PCICAP_EXP_DCTL2_ARI_FWD);
5271 
5272 	/* Init_50..54
5273 	 *
5274 	 * AER inits
5275 	 */
5276 	if (p->aercap <= 0) {
5277 		aercap = pci_find_ecap(&p->phb, 0, PCIECAP_ID_AER, NULL);
5278 		if (aercap < 0) {
5279 			PHBERR(p, "Can't locate AER capability\n");
5280 			return false;
5281 		}
5282 		p->aercap = aercap;
5283 	} else {
5284 		aercap = p->aercap;
5285 	}
5286 
5287 	/* Disable some error reporting as per the PHB4 spec */
5288 	phb4_pcicfg_write32(&p->phb, 0, aercap + PCIECAP_AER_UE_MASK,
5289 			     PCIECAP_AER_UE_POISON_TLP		|
5290 			     PCIECAP_AER_UE_COMPL_TIMEOUT	|
5291 			     PCIECAP_AER_UE_COMPL_ABORT);
5292 
5293 	/* Enable ECRC generation & checking */
5294 	phb4_pcicfg_write32(&p->phb, 0, aercap + PCIECAP_AER_CAPCTL,
5295 			     PCIECAP_AER_CAPCTL_ECRCG_EN	|
5296 			     PCIECAP_AER_CAPCTL_ECRCC_EN);
5297 
5298 	phb4_rc_err_clear(p);
5299 
5300 	return true;
5301 }
5302 
phb4_init_errors(struct phb4 * p)5303 static void phb4_init_errors(struct phb4 *p)
5304 {
5305 	/* Init_55..63 - PBL errors */
5306 	out_be64(p->regs + 0x1900,	0xffffffffffffffffull);
5307 	out_be64(p->regs + 0x1908,	0x0000000000000000ull);
5308 	out_be64(p->regs + 0x1920,	0x000000004d1780f8ull);
5309 	out_be64(p->regs + 0x1928,	0x0000000000000000ull);
5310 	out_be64(p->regs + 0x1930,	0xffffffffb2f87f07ull);
5311 	out_be64(p->regs + 0x1940,	0x0000000000000000ull);
5312 	out_be64(p->regs + 0x1948,	0x0000000000000000ull);
5313 	out_be64(p->regs + 0x1950,	0x0000000000000000ull);
5314 	out_be64(p->regs + 0x1958,	0x0000000000000000ull);
5315 
5316 	/* Init_64..72 - REGB errors */
5317 	out_be64(p->regs + 0x1c00,	0xffffffffffffffffull);
5318 	out_be64(p->regs + 0x1c08,	0x0000000000000000ull);
5319 	/* Enable/disable error status indicators that trigger irqs */
5320 	if (p->has_link) {
5321 		out_be64(p->regs + 0x1c20,	0x2130006efca8bc00ull);
5322 		out_be64(p->regs + 0x1c30,	0xde1fff91035743ffull);
5323 	} else {
5324 		out_be64(p->regs + 0x1c20,	0x0000000000000000ull);
5325 		out_be64(p->regs + 0x1c30,	0x0000000000000000ull);
5326 	}
5327 	out_be64(p->regs + 0x1c28,	0x0080000000000000ull);
5328 	out_be64(p->regs + 0x1c40,	0x0000000000000000ull);
5329 	out_be64(p->regs + 0x1c48,	0x0000000000000000ull);
5330 	out_be64(p->regs + 0x1c50,	0x0000000000000000ull);
5331 	out_be64(p->regs + 0x1c58,	0x0040000000000000ull);
5332 
5333 	/* Init_73..81 - TXE errors */
5334 	out_be64(p->regs + 0x0d08,	0x0000000000000000ull);
5335 
5336 	/* Errata: Clear bit 17, otherwise a CFG write UR/CA will incorrectly
5337 	 * freeze a "random" PE (whatever last PE did an MMIO)
5338 	 */
5339 	if (is_phb5()) {
5340 		out_be64(p->regs + 0x0d28,	0x0000500a00000000ull);
5341 		out_be64(p->regs + 0x0d00,	0xffffffffffffffffull);
5342 		out_be64(p->regs + 0x0d18,	0xffffff0fffffffffull);
5343 		out_be64(p->regs + 0x0d30,	0xdff7af41f7ddffdfull);
5344 	} else {
5345 		out_be64(p->regs + 0x0d28,	0x0000000a00000000ull);
5346 		if (phb4_is_dd20(p)) {
5347 			out_be64(p->regs + 0x0d00,	0xf3acff0ff7ddfff0ull);
5348 			out_be64(p->regs + 0x0d18,	0xf3acff0ff7ddfff0ull);
5349 			out_be64(p->regs + 0x0d30,	0xdfffbd05f7ddfff0ull); /* XXX CAPI has diff. value */
5350 		} else  {
5351 			out_be64(p->regs + 0x0d00,	0xffffffffffffffffull);
5352 			out_be64(p->regs + 0x0d18,	0xffffff0fffffffffull);
5353 			out_be64(p->regs + 0x0d30,	0xdff7bd05f7ddfff0ull);
5354 		}
5355 	}
5356 
5357 	out_be64(p->regs + 0x0d40,	0x0000000000000000ull);
5358 	out_be64(p->regs + 0x0d48,	0x0000000000000000ull);
5359 	out_be64(p->regs + 0x0d50,	0x0000000000000000ull);
5360 	out_be64(p->regs + 0x0d58,	0x0000000000000000ull);
5361 
5362 	/* Init_82..90 - RXE_ARB errors */
5363 	out_be64(p->regs + 0x0d80,	0xffffffffffffffffull);
5364 	out_be64(p->regs + 0x0d88,	0x0000000000000000ull);
5365 	out_be64(p->regs + 0x0d98,	0xfffffffffbffffffull);
5366 	out_be64(p->regs + 0x0da8,	0xc00018b801000060ull);
5367 	/*
5368 	 * Errata ER20161123 says we should set the top two bits in
5369 	 * 0x0db0 but this causes config space accesses which don't
5370 	 * get a response to fence the PHB. This breaks probing,
5371 	 * hence we don't set them here.
5372 	 */
5373 	out_be64(p->regs + 0x0db0,	0x3bffd703fa7fbf8full); /* XXX CAPI has diff. value */
5374 	out_be64(p->regs + 0x0dc0,	0x0000000000000000ull);
5375 	out_be64(p->regs + 0x0dc8,	0x0000000000000000ull);
5376 	out_be64(p->regs + 0x0dd0,	0x0000000000000000ull);
5377 	out_be64(p->regs + 0x0dd8,	0x0000000004000000ull);
5378 
5379 	/* Init_91..99 - RXE_MRG errors */
5380 	out_be64(p->regs + 0x0e00,	0xffffffffffffffffull);
5381 	out_be64(p->regs + 0x0e08,	0x0000000000000000ull);
5382 	out_be64(p->regs + 0x0e18,	0xffffffffffffffffull);
5383 	out_be64(p->regs + 0x0e28,	0x0000600000000000ull);
5384 	out_be64(p->regs + 0x0e30,	0xfffffeffff7fff57ull);
5385 	out_be64(p->regs + 0x0e40,	0x0000000000000000ull);
5386 	out_be64(p->regs + 0x0e48,	0x0000000000000000ull);
5387 	out_be64(p->regs + 0x0e50,	0x0000000000000000ull);
5388 	out_be64(p->regs + 0x0e58,	0x0000000000000000ull);
5389 
5390 	/* Init_100..108 - RXE_TCE errors */
5391 	out_be64(p->regs + 0x0e80,	0xffffffffffffffffull);
5392 	out_be64(p->regs + 0x0e88,	0x0000000000000000ull);
5393 	out_be64(p->regs + 0x0e98,	0xffffffffffffffffull);
5394 	out_be64(p->regs + 0x0ea8,	0x60000000c0000000ull);
5395 	out_be64(p->regs + 0x0eb0,	0x9faeffaf3fffffffull); /* XXX CAPI has diff. value */
5396 	out_be64(p->regs + 0x0ec0,	0x0000000000000000ull);
5397 	out_be64(p->regs + 0x0ec8,	0x0000000000000000ull);
5398 	out_be64(p->regs + 0x0ed0,	0x0000000000000000ull);
5399 	out_be64(p->regs + 0x0ed8,	0x0000000000000000ull);
5400 
5401 	/* Init_109..117 - RXPHB errors */
5402 	out_be64(p->regs + 0x0c80,	0xffffffffffffffffull);
5403 	out_be64(p->regs + 0x0c88,	0x0000000000000000ull);
5404 	out_be64(p->regs + 0x0c98,	0xffffffffffffffffull);
5405 	out_be64(p->regs + 0x0ca8,	0x0000004000000000ull);
5406 	out_be64(p->regs + 0x0cb0,	0x35777033ff000000ull); /* XXX CAPI has diff. value */
5407 	out_be64(p->regs + 0x0cc0,	0x0000000000000000ull);
5408 	out_be64(p->regs + 0x0cc8,	0x0000000000000000ull);
5409 	out_be64(p->regs + 0x0cd0,	0x0000000000000000ull);
5410 	out_be64(p->regs + 0x0cd8,	0x0000000000000000ull);
5411 
5412 	/* Init_118..121 - LEM */
5413 	out_be64(p->regs + 0x0c00,	0x0000000000000000ull);
5414 	if (phb4_is_dd20(p)) {
5415 		out_be64(p->regs + 0x0c30,	0xf3ffffffffffffffull);
5416 		out_be64(p->regs + 0x0c38,	0xf3ffffffffffffffull);
5417 	} else {
5418 		out_be64(p->regs + 0x0c30,	0xffffffffffffffffull);
5419 		out_be64(p->regs + 0x0c38,	0xffffffffffffffffull);
5420 	}
5421 	out_be64(p->regs + 0x0c40,	0x0000000000000000ull);
5422 }
5423 
5424 
phb4_wait_dlp_reset(struct phb4 * p)5425 static bool phb4_wait_dlp_reset(struct phb4 *p)
5426 {
5427 	unsigned int i;
5428 	uint64_t val;
5429 
5430 	/*
5431 	 * Firmware cannot access the UTL core regs or PCI config space
5432 	 * until the cores are out of DL_PGRESET.
5433 	 * DL_PGRESET should be polled until it is inactive with a value
5434 	 * of '0'. The recommended polling frequency is once every 1ms.
5435 	 * Firmware should poll at least 200 attempts before giving up.
5436 	 * MMIO Stores to the link are silently dropped by the UTL core if
5437 	 * the link is down.
5438 	 * MMIO Loads to the link will be dropped by the UTL core and will
5439 	 * eventually time-out and will return an all ones response if the
5440 	 * link is down.
5441 	 */
5442 #define DLP_RESET_ATTEMPTS	200
5443 
5444 	PHBDBG(p, "Waiting for DLP PG reset to complete...\n");
5445 	for (i = 0; i < DLP_RESET_ATTEMPTS; i++) {
5446 		val = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
5447 		if (!(val & PHB_PCIE_DLP_DL_PGRESET))
5448 			break;
5449 		time_wait_ms(1);
5450 	}
5451 	if (val & PHB_PCIE_DLP_DL_PGRESET) {
5452 		PHBERR(p, "Timeout waiting for DLP PG reset !\n");
5453 		return false;
5454 	}
5455 	return true;
5456 }
phb4_init_hw(struct phb4 * p)5457 static void phb4_init_hw(struct phb4 *p)
5458 {
5459 	uint64_t val, creset;
5460 
5461 	PHBDBG(p, "Initializing PHB...\n");
5462 
5463 	/* Init_1 - Sync reset
5464 	 *
5465 	 * At this point we assume the PHB has already been reset.
5466 	 */
5467 
5468 	/* Init_2 - Mask FIRs */
5469 	out_be64(p->regs + PHB_LEM_ERROR_MASK,			0xffffffffffffffffull);
5470 
5471 	/* Init_3 - TCE tag enable */
5472 	out_be64(p->regs + PHB_TCE_TAG_ENABLE,			0xffffffffffffffffull);
5473 
5474 	/* Init_4 - PCIE System Configuration Register
5475 	 *
5476 	 * Adjust max speed based on system config
5477 	 */
5478 	val = in_be64(p->regs + PHB_PCIE_SCR);
5479 	PHBDBG(p, "Default system config: 0x%016llx\n", val);
5480 	val = SETFIELD(PHB_PCIE_SCR_MAXLINKSPEED, val, p->max_link_speed);
5481 	out_be64(p->regs + PHB_PCIE_SCR, val);
5482 	PHBDBG(p, "New system config    : 0x%016llx\n",
5483 	       in_be64(p->regs + PHB_PCIE_SCR));
5484 
5485 	/* Init_5 - deassert CFG reset */
5486 	creset = in_be64(p->regs + PHB_PCIE_CRESET);
5487 	PHBDBG(p, "Initial PHB CRESET is 0x%016llx\n", creset);
5488 	creset &= ~PHB_PCIE_CRESET_CFG_CORE;
5489 	out_be64(p->regs + PHB_PCIE_CRESET,			creset);
5490 
5491 	/* Init_6..13 - PCIE DLP Lane EQ control */
5492 	if (p->lane_eq) {
5493 		out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL0, be64_to_cpu(p->lane_eq[0]));
5494 		out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL1, be64_to_cpu(p->lane_eq[1]));
5495 		out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL2, be64_to_cpu(p->lane_eq[2]));
5496 		out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL3, be64_to_cpu(p->lane_eq[3]));
5497 		out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL40, be64_to_cpu(p->lane_eq[4]));
5498 		out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL41, be64_to_cpu(p->lane_eq[5]));
5499 		if (is_phb5()) {
5500 			out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL50, be64_to_cpu(p->lane_eq[6]));
5501 			out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL51, be64_to_cpu(p->lane_eq[7]));
5502 		}
5503 	}
5504 	if (!p->lane_eq_en) {
5505 		/* Read modify write and set to 2 bits */
5506 		PHBDBG(p, "LINK: Disabling Lane EQ\n");
5507 		val = in_be64(p->regs + PHB_PCIE_DLP_CTL);
5508 		val |= PHB_PCIE_DLP_CTL_BYPASS_PH2 | PHB_PCIE_DLP_CTL_BYPASS_PH3;
5509 		out_be64(p->regs + PHB_PCIE_DLP_CTL, val);
5510 	}
5511 
5512 	if (is_phb5()) {
5513 		/* disable scaled flow control for now. SW527785 */
5514 		PHBDBG(p, "LINK: Disabling scaled flow control\n");
5515 		val = in_be64(p->regs + PHB_PCIE_DLP_CTL);
5516 		val |= PHB_PCIE_DLP_CTL_SFC_DISABLE;
5517 		out_be64(p->regs + PHB_PCIE_DLP_CTL, val);
5518 
5519 		/* lane equalization settings need to be tuned on P10 */
5520 		out_be64(p->regs + PHB_PCIE_PDL_PHY_EQ_CNTL,
5521 			 0x80F4FFFFFF0F9C00);
5522 	}
5523 
5524 	/* Init_14 - Clear link training */
5525 	phb4_pcicfg_write32(&p->phb, 0, 0x78,
5526 			    0x07FE0000 | p->max_link_speed);
5527 
5528 	/* Init_15 - deassert cores reset */
5529 	/*
5530 	 * Lift the PHB resets but not PERST, this will be lifted
5531 	 * later by the initial PERST state machine
5532 	 */
5533 	creset &= ~(PHB_PCIE_CRESET_TLDLP | PHB_PCIE_CRESET_PBL);
5534 	creset |= PHB_PCIE_CRESET_PIPE_N;
5535 	out_be64(p->regs + PHB_PCIE_CRESET,			   creset);
5536 
5537 	/* Init_16 - Wait for DLP PGRESET to clear */
5538 	if (!phb4_wait_dlp_reset(p))
5539 		goto failed;
5540 
5541 	/* Init_17 - PHB Control */
5542 	val = PHB_CTRLR_IRQ_PGSZ_64K;
5543 	val |= PHB_CTRLR_TCE_CLB_DISABLE; // HW557787 circumvention
5544 	val |= SETFIELD(PHB_CTRLR_TVT_ADDR_SEL, 0ull, TVT_2_PER_PE);
5545 	if (phb_pq_disable(p))
5546 		val |= PHB_CTRLR_IRQ_PQ_DISABLE;
5547 	if (phb_abt_mode(p))
5548 		val |= PHB_CTRLR_IRQ_ABT_MODE;
5549 	if (phb_can_store_eoi(p)) {
5550 		val |= PHB_CTRLR_IRQ_STORE_EOI;
5551 		PHBDBG(p, "store EOI is enabled\n");
5552 	}
5553 
5554 	if (!pci_eeh_mmio)
5555 		val |= PHB_CTRLR_MMIO_EEH_DISABLE;
5556 
5557 	out_be64(p->regs + PHB_CTRLR, val);
5558 
5559 	/* Init_18..41 - Architected IODA3 inits */
5560 	phb4_init_ioda3(p);
5561 
5562 	/* Init_42..45 - Clear DLP error logs */
5563 	out_be64(p->regs + 0x1aa0,			0xffffffffffffffffull);
5564 	out_be64(p->regs + 0x1aa8,			0xffffffffffffffffull);
5565 	out_be64(p->regs + 0x1ab0,			0xffffffffffffffffull);
5566 	out_be64(p->regs + 0x1ab8,			0x0);
5567 
5568 
5569 	/* Init_46..54 : Init root complex config space */
5570 	if (!phb4_init_rc_cfg(p))
5571 		goto failed;
5572 
5573 	/* Init_55..121  : Setup error registers */
5574 	phb4_init_errors(p);
5575 
5576 	/* Init_122..123 : Wait for link
5577 	 * NOTE: At this point the spec waits for the link to come up. We
5578 	 * don't bother as we are doing a PERST soon.
5579 	 */
5580 
5581 	/* Init_124 :  NBW. XXX TODO */
5582 	/* See enable_capi_mode() */
5583 
5584 	/* Init_125 : Setup PCI command/status on root complex
5585 	 * I don't know why the spec does this now and not earlier, so
5586 	 * to be sure to get it right we might want to move it to the freset
5587 	 * state machine, though the generic PCI layer will probably do
5588 	 * this anyway (ie, enable MEM, etc... in the RC)
5589 
5590 	 */
5591 	phb4_pcicfg_write16(&p->phb, 0, PCI_CFG_CMD,
5592 			    PCI_CFG_CMD_MEM_EN |
5593 			    PCI_CFG_CMD_BUS_MASTER_EN);
5594 
5595 	/* Clear errors */
5596 	phb4_pcicfg_write16(&p->phb, 0, PCI_CFG_STAT,
5597 			    PCI_CFG_STAT_SENT_TABORT |
5598 			    PCI_CFG_STAT_RECV_TABORT |
5599 			    PCI_CFG_STAT_RECV_MABORT |
5600 			    PCI_CFG_STAT_SENT_SERR |
5601 			    PCI_CFG_STAT_RECV_PERR);
5602 
5603 	/* Init_126..130 - Re-enable error interrupts */
5604 	phb4_int_unmask_all(p);
5605 
5606 	/* Init_131 - Re-enable LEM error mask */
5607 	out_be64(p->regs + PHB_LEM_ERROR_MASK,			0x0000000000000000ull);
5608 
5609 
5610 	/* Init_132 - Enable DMA address speculation */
5611 	out_be64(p->regs + PHB_TCE_SPEC_CTL,			0x0000000000000000ull);
5612 
5613 	/* Init_133 - Timeout Control Register 1 */
5614 	out_be64(p->regs + PHB_TIMEOUT_CTRL1,			0x0015150000150000ull);
5615 
5616 	/* Init_134 - Timeout Control Register 2 */
5617 	out_be64(p->regs + PHB_TIMEOUT_CTRL2,			0x0000151500000000ull);
5618 
5619 	/* Init_135 - PBL Timeout Control Register */
5620 	out_be64(p->regs + PHB_PBL_TIMEOUT_CTRL,		0x2013000000000000ull);
5621 
5622 	/* Mark the PHB as functional which enables all the various sequences */
5623 	p->broken = false;
5624 
5625 	PHBDBG(p, "Initialization complete\n");
5626 
5627 	return;
5628 
5629  failed:
5630 	PHBERR(p, "Initialization failed\n");
5631 	p->broken = true;
5632 }
5633 
5634 /* FIXME: Use scoms rather than MMIO incase we are fenced */
phb4_read_capabilities(struct phb4 * p)5635 static bool phb4_read_capabilities(struct phb4 *p)
5636 {
5637 	uint64_t val;
5638 
5639 	/* XXX Should make sure ETU is out of reset ! */
5640 
5641 	/* Grab version and fit it in an int */
5642 	val = phb4_read_reg_asb(p, PHB_VERSION);
5643 	if (val == 0 || val == 0xffffffffffffffffUL) {
5644 		PHBERR(p, "Failed to read version, PHB appears broken\n");
5645 		return false;
5646 	}
5647 
5648 	p->rev = ((val >> 16) & 0x00ff0000) | (val & 0xffff);
5649 	PHBDBG(p, "Core revision 0x%x\n", p->rev);
5650 
5651 	/* Read EEH capabilities */
5652 	val = in_be64(p->regs + PHB_PHB4_EEH_CAP);
5653 	if (val == 0xffffffffffffffffUL) {
5654 		PHBERR(p, "Failed to read EEH cap, PHB appears broken\n");
5655 		return false;
5656 	}
5657 	p->max_num_pes = val >> 52;
5658 	if (p->max_num_pes >= 512) {
5659 		p->mrt_size = 16;
5660 		p->mbt_size = 32;
5661 		p->tvt_size = 1024;
5662 	} else {
5663 		p->mrt_size = 8;
5664 		p->mbt_size = 16;
5665 		p->tvt_size = 512;
5666 	}
5667 
5668 	val = in_be64(p->regs + PHB_PHB4_IRQ_CAP);
5669 	if (val == 0xffffffffffffffffUL) {
5670 		PHBERR(p, "Failed to read IRQ cap, PHB appears broken\n");
5671 		return false;
5672 	}
5673 	p->num_irqs = val & 0xffff;
5674 
5675 	/* This works for 512 PEs.  FIXME calculate for any hardware
5676 	 * size returned above
5677 	 */
5678 	p->tbl_peltv_size = PELTV_TABLE_SIZE_MAX;
5679 
5680 	p->tbl_pest_size = p->max_num_pes*16;
5681 
5682 	PHBDBG(p, "Found %d max PEs and %d IRQs \n",
5683 	       p->max_num_pes, p->num_irqs);
5684 
5685 	return true;
5686 }
5687 
phb4_allocate_tables(struct phb4 * p)5688 static void phb4_allocate_tables(struct phb4 *p)
5689 {
5690 	uint32_t i;
5691 
5692 	/* XXX Our current memalign implementation sucks,
5693 	 *
5694 	 * It will do the job, however it doesn't support freeing
5695 	 * the memory and wastes space by always allocating twice
5696 	 * as much as requested (size + alignment)
5697 	 */
5698 	p->tbl_rtt = local_alloc(p->chip_id, RTT_TABLE_SIZE, RTT_TABLE_SIZE);
5699 	assert(p->tbl_rtt);
5700 	for (i = 0; i < RTT_TABLE_ENTRIES; i++)
5701 		p->tbl_rtt[i] = cpu_to_be16(PHB4_RESERVED_PE_NUM(p));
5702 
5703 	p->tbl_peltv = local_alloc(p->chip_id, p->tbl_peltv_size, p->tbl_peltv_size);
5704 	assert(p->tbl_peltv);
5705 	memset(p->tbl_peltv, 0, p->tbl_peltv_size);
5706 
5707 	p->tbl_pest = (uint64_t)local_alloc(p->chip_id, p->tbl_pest_size, p->tbl_pest_size);
5708 	assert(p->tbl_pest);
5709 	memset((void *)p->tbl_pest, 0, p->tbl_pest_size);
5710 }
5711 
phb4_add_properties(struct phb4 * p)5712 static void phb4_add_properties(struct phb4 *p)
5713 {
5714 	struct dt_node *np = p->phb.dt_node;
5715 	uint32_t lsibase, icsp = get_ics_phandle();
5716 	uint64_t m32b, m64b, m64s;
5717 
5718 	/* Add various properties that HB doesn't have to
5719 	 * add, some of them simply because they result from
5720 	 * policy decisions made in skiboot rather than in HB
5721 	 * such as the MMIO windows going to PCI, interrupts,
5722 	 * etc...
5723 	 */
5724 	dt_add_property_cells(np, "#address-cells", 3);
5725 	dt_add_property_cells(np, "#size-cells", 2);
5726 	dt_add_property_cells(np, "#interrupt-cells", 1);
5727 	dt_add_property_cells(np, "bus-range", 0, 0xff);
5728 	dt_add_property_cells(np, "clock-frequency", 0x200, 0); /* ??? */
5729 
5730 	dt_add_property_cells(np, "interrupt-parent", icsp);
5731 
5732 	/* XXX FIXME: add slot-name */
5733 	//dt_property_cell("bus-width", 8); /* Figure it out from VPD ? */
5734 
5735 	/* "ranges", we only expose M32 (PHB4 doesn't do IO)
5736 	 *
5737 	 * Note: The kernel expects us to have chopped of 64k from the
5738 	 * M32 size (for the 32-bit MSIs). If we don't do that, it will
5739 	 * get confused (OPAL does it)
5740 	 */
5741 	m32b = cleanup_addr(p->mm1_base);
5742 	m64b = cleanup_addr(p->mm0_base);
5743 	m64s = p->mm0_size;
5744 	dt_add_property_cells(np, "ranges",
5745 			      /* M32 space */
5746 			      0x02000000, 0x00000000, M32_PCI_START,
5747 			      hi32(m32b), lo32(m32b), 0, M32_PCI_SIZE - 0x10000);
5748 
5749 	/* XXX FIXME: add opal-memwin32, dmawins, etc... */
5750 	dt_add_property_u64s(np, "ibm,opal-m64-window", m64b, m64b, m64s);
5751 	dt_add_property(np, "ibm,opal-single-pe", NULL, 0);
5752 	dt_add_property_cells(np, "ibm,opal-num-pes", p->num_pes);
5753 	dt_add_property_cells(np, "ibm,opal-reserved-pe",
5754 			      PHB4_RESERVED_PE_NUM(p));
5755 	dt_add_property_cells(np, "ibm,opal-msi-ranges",
5756 			      p->base_msi, p->num_irqs - 8);
5757 	/* M64 ranges start at 1 as MBT0 is used for M32 */
5758 	dt_add_property_cells(np, "ibm,opal-available-m64-ranges",
5759 			      1, p->mbt_size - 1);
5760 	dt_add_property_cells(np, "ibm,supported-tce-sizes",
5761 			      12, // 4K
5762 			      16, // 64K
5763 			      21, // 2M
5764 			      30); // 1G
5765 
5766 	/* Tell Linux about alignment limits for segment splits.
5767 	 *
5768 	 * XXX We currently only expose splits of 1 and "num PEs",
5769 	 */
5770 	dt_add_property_cells(np, "ibm,opal-m64-segment-splits",
5771 			      /* Full split, number of segments: */
5772 			      p->num_pes,
5773 			      /* Encoding passed to the enable call */
5774 			      OPAL_ENABLE_M64_SPLIT,
5775 			      /* Alignement/size restriction in #bits*/
5776 			      /* XXX VERIFY VALUE */
5777 			      12,
5778 			      /* Unused */
5779 			      0,
5780 			      /* single PE, number of segments: */
5781 			      1,
5782 			      /* Encoding passed to the enable call */
5783 			      OPAL_ENABLE_M64_NON_SPLIT,
5784 			      /* Alignement/size restriction in #bits*/
5785 			      /* XXX VERIFY VALUE */
5786 			      12,
5787 			      /* Unused */
5788 			      0);
5789 
5790 	/* The interrupt maps will be generated in the RC node by the
5791 	 * PCI code based on the content of this structure:
5792 	 */
5793 	lsibase = p->base_lsi;
5794 	p->phb.lstate.int_size = 2;
5795 	p->phb.lstate.int_val[0][0] = lsibase + PHB4_LSI_PCIE_INTA;
5796 	p->phb.lstate.int_val[0][1] = 1;
5797 	p->phb.lstate.int_val[1][0] = lsibase + PHB4_LSI_PCIE_INTB;
5798 	p->phb.lstate.int_val[1][1] = 1;
5799 	p->phb.lstate.int_val[2][0] = lsibase + PHB4_LSI_PCIE_INTC;
5800 	p->phb.lstate.int_val[2][1] = 1;
5801 	p->phb.lstate.int_val[3][0] = lsibase + PHB4_LSI_PCIE_INTD;
5802 	p->phb.lstate.int_val[3][1] = 1;
5803 	p->phb.lstate.int_parent[0] = icsp;
5804 	p->phb.lstate.int_parent[1] = icsp;
5805 	p->phb.lstate.int_parent[2] = icsp;
5806 	p->phb.lstate.int_parent[3] = icsp;
5807 
5808 	/* Indicators for variable tables */
5809 	dt_add_property_cells(np, "ibm,opal-rtt-table",
5810 		hi32((u64) p->tbl_rtt), lo32((u64) p->tbl_rtt), RTT_TABLE_SIZE);
5811 
5812 	dt_add_property_cells(np, "ibm,opal-peltv-table",
5813 		hi32((u64) p->tbl_peltv), lo32((u64) p->tbl_peltv),
5814 		p->tbl_peltv_size);
5815 
5816 	dt_add_property_cells(np, "ibm,opal-pest-table",
5817 		hi32(p->tbl_pest), lo32(p->tbl_pest), p->tbl_pest_size);
5818 
5819 	dt_add_property_cells(np, "ibm,phb-diag-data-size",
5820 			      sizeof(struct OpalIoPhb4ErrorData));
5821 
5822 	/* Indicate to Linux that CAPP timebase sync is supported */
5823 	dt_add_property_string(np, "ibm,capp-timebase-sync", NULL);
5824 
5825 	/* Tell Linux Compare/Mask indication values */
5826 	dt_add_property_cells(np, "ibm,phb-indications", CAPIIND, ASNIND,
5827 			      NBWIND);
5828 }
5829 
phb4_calculate_windows(struct phb4 * p)5830 static bool phb4_calculate_windows(struct phb4 *p)
5831 {
5832 	const struct dt_property *prop;
5833 
5834 	/* Get PBCQ MMIO windows from device-tree */
5835 	prop = dt_require_property(p->phb.dt_node,
5836 				   "ibm,mmio-windows", -1);
5837 	assert(prop->len >= (2 * sizeof(uint64_t)));
5838 
5839 	p->mm0_base = dt_property_get_u64(prop, 0);
5840 	p->mm0_size = dt_property_get_u64(prop, 1);
5841 	if (prop->len > 16) {
5842 		p->mm1_base = dt_property_get_u64(prop, 2);
5843 		p->mm1_size = dt_property_get_u64(prop, 3);
5844 	}
5845 
5846 	/* Sort them so that 0 is big and 1 is small */
5847 	if (p->mm1_size && p->mm1_size > p->mm0_size) {
5848 		uint64_t b = p->mm0_base;
5849 		uint64_t s = p->mm0_size;
5850 		p->mm0_base = p->mm1_base;
5851 		p->mm0_size = p->mm1_size;
5852 		p->mm1_base = b;
5853 		p->mm1_size = s;
5854 	}
5855 
5856 	/* If 1 is too small, ditch it */
5857 	if (p->mm1_size < M32_PCI_SIZE)
5858 		p->mm1_size = 0;
5859 
5860 	/* If 1 doesn't exist, carve it out of 0 */
5861 	if (p->mm1_size == 0) {
5862 		p->mm0_size /= 2;
5863 		p->mm1_base = p->mm0_base + p->mm0_size;
5864 		p->mm1_size = p->mm0_size;
5865 	}
5866 
5867 	/* Crop mm1 to our desired size */
5868 	if (p->mm1_size > M32_PCI_SIZE)
5869 		p->mm1_size = M32_PCI_SIZE;
5870 
5871 	return true;
5872 }
5873 
phb4_err_interrupt(struct irq_source * is,uint32_t isn)5874 static void phb4_err_interrupt(struct irq_source *is, uint32_t isn)
5875 {
5876 	struct phb4 *p = is->data;
5877 
5878 	PHBDBG(p, "Got interrupt 0x%08x\n", isn);
5879 
5880 	/* mask the interrupt conditions to prevent it from re-firing */
5881 	phb4_int_mask_active(p);
5882 
5883 	/* Update pending event */
5884 	opal_update_pending_evt(OPAL_EVENT_PCI_ERROR,
5885 				OPAL_EVENT_PCI_ERROR);
5886 
5887 	/* If the PHB is broken, go away */
5888 	if (p->broken)
5889 		return;
5890 
5891 	/*
5892 	 * Mark the PHB has pending error so that the OS
5893 	 * can handle it at late point.
5894 	 */
5895 	phb4_set_err_pending(p, true);
5896 }
5897 
phb4_lsi_attributes(struct irq_source * is __unused,uint32_t isn __unused)5898 static uint64_t phb4_lsi_attributes(struct irq_source *is __unused,
5899 				uint32_t isn __unused)
5900 {
5901 #ifndef DISABLE_ERR_INTS
5902 	struct phb4 *p = is->data;
5903 	uint32_t idx = isn - p->base_lsi;
5904 
5905 	if (idx == PHB4_LSI_PCIE_INF || idx == PHB4_LSI_PCIE_ER)
5906 		return IRQ_ATTR_TARGET_OPAL | IRQ_ATTR_TARGET_RARE | IRQ_ATTR_TYPE_LSI;
5907 #endif
5908 	return IRQ_ATTR_TARGET_LINUX;
5909 }
5910 
phb4_lsi_name(struct irq_source * is,uint32_t isn)5911 static char *phb4_lsi_name(struct irq_source *is, uint32_t isn)
5912 {
5913 	struct phb4 *p = is->data;
5914 	uint32_t idx = isn - p->base_lsi;
5915 	char buf[32];
5916 
5917 	if (idx == PHB4_LSI_PCIE_INF)
5918 		snprintf(buf, 32, "phb#%04x-inf", p->phb.opal_id);
5919 	else if (idx == PHB4_LSI_PCIE_ER)
5920 		snprintf(buf, 32, "phb#%04x-err", p->phb.opal_id);
5921 	else
5922 		assert(0); /* PCIe LSIs should never be directed to OPAL */
5923 
5924 	return strdup(buf);
5925 }
5926 
5927 static const struct irq_source_ops phb4_lsi_ops = {
5928 	.interrupt = phb4_err_interrupt,
5929 	.attributes = phb4_lsi_attributes,
5930 	.name = phb4_lsi_name,
5931 };
5932 
5933 static __be64 lane_eq_default[8] = {
5934 	CPU_TO_BE64(0x5454545454545454UL), CPU_TO_BE64(0x5454545454545454UL),
5935 	CPU_TO_BE64(0x5454545454545454UL), CPU_TO_BE64(0x5454545454545454UL),
5936 	CPU_TO_BE64(0x7777777777777777UL), CPU_TO_BE64(0x7777777777777777UL),
5937 	CPU_TO_BE64(0x7777777777777777UL), CPU_TO_BE64(0x7777777777777777UL),
5938 };
5939 
5940 static __be64 lane_eq_phb5_default[8] = {
5941 	CPU_TO_BE64(0x4444444444444444UL), CPU_TO_BE64(0x4444444444444444UL),
5942 	CPU_TO_BE64(0x4444444444444444UL), CPU_TO_BE64(0x4444444444444444UL),
5943 	CPU_TO_BE64(0x4444444444444444UL), CPU_TO_BE64(0x4444444444444444UL),
5944 	CPU_TO_BE64(0x9999999999999999UL), CPU_TO_BE64(0x9999999999999999UL),
5945 };
5946 
phb4_create(struct dt_node * np)5947 static void phb4_create(struct dt_node *np)
5948 {
5949 	const struct dt_property *prop;
5950 	struct phb4 *p;
5951 	struct pci_slot *slot;
5952 	size_t lane_eq_len, lane_eq_len_req;
5953 	struct dt_node *iplp;
5954 	char *path;
5955 	uint32_t irq_base, irq_flags;
5956 	int i, eq_reg_count;
5957 	int chip_id;
5958 
5959 	chip_id = dt_prop_get_u32(np, "ibm,chip-id");
5960 	p = local_alloc(chip_id, sizeof(struct phb4), 8);
5961 	assert(p);
5962 	memset(p, 0x0, sizeof(struct phb4));
5963 
5964 	/* Populate base stuff */
5965 	p->index = dt_prop_get_u32(np, "ibm,phb-index");
5966 	p->chip_id = chip_id;
5967 	p->pec = dt_prop_get_u32(np, "ibm,phb-pec-index");
5968 	p->regs = (void *)dt_get_address(np, 0, NULL);
5969 	p->int_mmio = (void *)dt_get_address(np, 1, NULL);
5970 	p->phb.dt_node = np;
5971 	p->phb.ops = &phb4_ops;
5972 	p->phb.phb_type = phb_type_pcie_v4;
5973 	p->phb.scan_map = 0x1; /* Only device 0 to scan */
5974 
5975 	if (!phb4_calculate_windows(p))
5976 		return;
5977 
5978 	/* Get the various XSCOM register bases from the device-tree */
5979 	prop = dt_require_property(np, "ibm,xscom-bases", 5 * sizeof(uint32_t));
5980 	p->pe_xscom = dt_property_get_cell(prop, 0);
5981 	p->pe_stk_xscom = dt_property_get_cell(prop, 1);
5982 	p->pci_xscom = dt_property_get_cell(prop, 2);
5983 	p->pci_stk_xscom = dt_property_get_cell(prop, 3);
5984 	p->etu_xscom = dt_property_get_cell(prop, 4);
5985 
5986 	/*
5987 	 * We skip the initial PERST assertion requested by the generic code
5988 	 * when doing a cold boot because we are coming out of cold boot already
5989 	 * so we save boot time that way. The PERST state machine will still
5990 	 * handle waiting for the link to come up, it will just avoid actually
5991 	 * asserting & deasserting the PERST output
5992 	 *
5993 	 * For a hot IPL, we still do a PERST
5994 	 *
5995 	 * Note: In absence of property (ie, FSP-less), we stick to the old
5996 	 * behaviour and set skip_perst to true
5997 	 */
5998 	p->skip_perst = true; /* Default */
5999 
6000 	iplp = dt_find_by_path(dt_root, "ipl-params/ipl-params");
6001 	if (iplp) {
6002 		const char *ipl_type = dt_prop_get_def(iplp, "cec-major-type", NULL);
6003 		if (ipl_type && (!strcmp(ipl_type, "hot")))
6004 			p->skip_perst = false;
6005 	}
6006 
6007 	/* By default link is assumed down */
6008 	p->has_link = false;
6009 
6010 	/* We register the PHB before we initialize it so we
6011 	 * get a useful OPAL ID for it
6012 	 */
6013 	pci_register_phb(&p->phb, phb4_get_opal_id(p->chip_id, p->index));
6014 
6015 	/* Create slot structure */
6016 	slot = phb4_slot_create(&p->phb);
6017 	if (!slot)
6018 		PHBERR(p, "Cannot create PHB slot\n");
6019 
6020 	/* Hello ! */
6021 	path = dt_get_path(np);
6022 	PHBINF(p, "Found %s @%p\n", path, p->regs);
6023 	PHBINF(p, "  M32 [0x%016llx..0x%016llx]\n",
6024 	       p->mm1_base, p->mm1_base + p->mm1_size - 1);
6025 	PHBINF(p, "  M64 [0x%016llx..0x%016llx]\n",
6026 	       p->mm0_base, p->mm0_base + p->mm0_size - 1);
6027 	free(path);
6028 
6029 	/* Find base location code from root node */
6030 	p->phb.base_loc_code = dt_prop_get_def(dt_root,
6031 					       "ibm,io-base-loc-code", NULL);
6032 	if (!p->phb.base_loc_code)
6033 		PHBDBG(p, "Base location code not found !\n");
6034 
6035 	/*
6036 	 * Grab CEC IO VPD load info from the root of the device-tree,
6037 	 * on P8 there's a single such VPD for the whole machine
6038 	 */
6039 	prop = dt_find_property(dt_root, "ibm,io-vpd");
6040 	if (!prop) {
6041 		/* LX VPD Lid not already loaded */
6042 		if (platform.vpd_iohub_load)
6043 			platform.vpd_iohub_load(dt_root);
6044 	}
6045 
6046 	/* Obtain informatin about the PHB from the hardware directly */
6047 	if (!phb4_read_capabilities(p))
6048 		goto failed;
6049 
6050 	p->max_link_speed = phb4_get_max_link_speed(p, np);
6051 	p->max_link_width = phb4_get_max_link_width(p);
6052 	PHBINF(p, "Max link speed: GEN%i, max link width %i\n",
6053 	       p->max_link_speed, p->max_link_width);
6054 
6055 	/* Check for lane equalization values from HB or HDAT */
6056 	p->lane_eq_en = true;
6057 	p->lane_eq = dt_prop_get_def_size(np, "ibm,lane-eq", NULL, &lane_eq_len);
6058 	if (is_phb5())
6059 		eq_reg_count = 8;
6060 	else
6061 		eq_reg_count = 6;
6062 	lane_eq_len_req = eq_reg_count * 8;
6063 	if (p->lane_eq) {
6064 		if (lane_eq_len < lane_eq_len_req) {
6065 			PHBERR(p, "Device-tree has ibm,lane-eq too short: %ld"
6066 			       " (want %ld)\n", lane_eq_len, lane_eq_len_req);
6067 			p->lane_eq = NULL;
6068 		}
6069 	} else {
6070 		PHBDBG(p, "Using default lane equalization settings\n");
6071 		if (is_phb5())
6072 			p->lane_eq = lane_eq_phb5_default;
6073 		else
6074 			p->lane_eq = lane_eq_default;
6075 	}
6076 	if (p->lane_eq) {
6077 		PHBDBG(p, "Override lane equalization settings:\n");
6078 		for (i = 0 ; i < lane_eq_len_req/(8 * 2) ; i++)
6079 			PHBDBG(p, "  0x%016llx 0x%016llx\n",
6080 			       be64_to_cpu(p->lane_eq[2 * i]),
6081 			       be64_to_cpu(p->lane_eq[2 * i + 1]));
6082 	}
6083 
6084 	/* Allocate a block of interrupts. We need to know if it needs
6085 	 * 2K or 4K interrupts ... for now we just use 4K but that
6086 	 * needs to be fixed
6087 	 */
6088 	if (is_phb5())
6089 		irq_base = xive2_alloc_hw_irqs(p->chip_id, p->num_irqs, p->num_irqs);
6090 	else
6091 		irq_base = xive_alloc_hw_irqs(p->chip_id, p->num_irqs, p->num_irqs);
6092 	if (irq_base == XIVE_IRQ_ERROR) {
6093 		PHBERR(p, "Failed to allocate %d interrupt sources\n",
6094 		       p->num_irqs);
6095 		goto failed;
6096 	}
6097 	p->base_msi = irq_base;
6098 	p->base_lsi = irq_base + p->num_irqs - 8;
6099 	p->num_pes = p->max_num_pes;
6100 
6101 	/* Allocate the SkiBoot internal in-memory tables for the PHB */
6102 	phb4_allocate_tables(p);
6103 
6104 	phb4_add_properties(p);
6105 
6106 	/* Clear IODA3 cache */
6107 	phb4_init_ioda_cache(p);
6108 
6109 	/* Get the HW up and running */
6110 	phb4_init_hw(p);
6111 
6112 	/* init capp that might get attached to the phb */
6113 	if (is_phb4())
6114 		phb4_init_capp(p);
6115 
6116 	/* Compute XIVE source flags depending on PHB revision */
6117 	irq_flags = 0;
6118 	if (phb_can_store_eoi(p))
6119 		irq_flags |= XIVE_SRC_STORE_EOI;
6120 	else
6121 		irq_flags |= XIVE_SRC_TRIGGER_PAGE;
6122 
6123 	if (is_phb5()) {
6124 		/*
6125 		 * Register sources with XIVE. If offloading is on, use the
6126 		 * ESB pages of the XIVE IC for the MSI sources instead of the
6127 		 * ESB pages of the PHB.
6128 		 */
6129 		if (phb_pq_disable(p) || phb_abt_mode(p)) {
6130 			xive2_register_esb_source(p->base_msi, p->num_irqs - 8);
6131 		} else {
6132 			xive2_register_hw_source(p->base_msi,
6133 						 p->num_irqs - 8, 16,
6134 						 p->int_mmio, irq_flags,
6135 						 NULL, NULL);
6136 		}
6137 
6138 		/*
6139 		 * LSI sources always use the ESB pages of the PHB.
6140 		 */
6141 		xive2_register_hw_source(p->base_lsi, 8, 16,
6142 					 p->int_mmio + ((p->num_irqs - 8) << 16),
6143 					 XIVE_SRC_LSI | irq_flags, p, &phb4_lsi_ops);
6144 	} else {
6145 		/* Register all interrupt sources with XIVE */
6146 		xive_register_hw_source(p->base_msi, p->num_irqs - 8, 16,
6147 					p->int_mmio, irq_flags, NULL, NULL);
6148 
6149 		xive_register_hw_source(p->base_lsi, 8, 16,
6150 					p->int_mmio + ((p->num_irqs - 8) << 16),
6151 					XIVE_SRC_LSI, p, &phb4_lsi_ops);
6152 	}
6153 
6154 	/* Platform additional setup */
6155 	if (platform.pci_setup_phb)
6156 		platform.pci_setup_phb(&p->phb, p->index);
6157 
6158 	dt_add_property_string(np, "status", "okay");
6159 
6160 	return;
6161 
6162  failed:
6163 	p->broken = true;
6164 
6165 	/* Tell Linux it's broken */
6166 	dt_add_property_string(np, "status", "error");
6167 }
6168 
phb4_probe_stack(struct dt_node * stk_node,uint32_t pec_index,uint32_t nest_base,uint32_t pci_base)6169 static void phb4_probe_stack(struct dt_node *stk_node, uint32_t pec_index,
6170 			     uint32_t nest_base, uint32_t pci_base)
6171 {
6172 	enum phys_map_type phys_mmio64, phys_mmio32, phys_xive_esb, phys_reg_spc;
6173 	uint32_t pci_stack, nest_stack, etu_base, gcid, phb_num, stk_index;
6174 	uint64_t val, phb_bar = 0, irq_bar = 0, bar_en;
6175 	uint64_t mmio0_bar = 0, mmio0_bmask, mmio0_sz;
6176 	uint64_t mmio1_bar = 0, mmio1_bmask, mmio1_sz;
6177 	void *foo;
6178 	__be64 mmio_win[4];
6179 	unsigned int mmio_win_sz;
6180 	struct dt_node *np;
6181 	char *path;
6182 	uint64_t capp_ucode_base;
6183 	unsigned int max_link_speed;
6184 	int rc;
6185 
6186 	assert(is_phb5() || is_phb4()); /* Sanity check */
6187 
6188 	gcid = dt_get_chip_id(stk_node);
6189 	stk_index = dt_prop_get_u32(stk_node, "reg");
6190 	phb_num = dt_prop_get_u32(stk_node, "ibm,phb-index");
6191 	path = dt_get_path(stk_node);
6192 	if (is_phb5()) {
6193 		phys_mmio64 = PHB5_64BIT_MMIO;
6194 		phys_mmio32 = PHB5_32BIT_MMIO;
6195 		phys_xive_esb = PHB5_XIVE_ESB;
6196 		phys_reg_spc = PHB5_REG_SPC;
6197 		prlog(PR_INFO, "PHB: Chip %d Found PHB5 PBCQ%d Stack %d at %s\n",
6198 		      gcid, pec_index, stk_index, path);
6199 	} else {
6200 		phys_mmio64 = PHB4_64BIT_MMIO;
6201 		phys_mmio32 = PHB4_32BIT_MMIO;
6202 		phys_xive_esb = PHB4_XIVE_ESB;
6203 		phys_reg_spc = PHB4_REG_SPC;
6204 		prlog(PR_INFO, "PHB: Chip %d Found PHB4 PBCQ%d Stack %d at %s\n",
6205 		      gcid, pec_index, stk_index, path);
6206 	}
6207 	free(path);
6208 
6209 	pci_stack = pci_base + 0x40 * (stk_index + 1);
6210 	nest_stack = nest_base + 0x40 * (stk_index + 1);
6211 	etu_base = pci_base + 0x100 + 0x40 * stk_index;
6212 
6213 	prlog(PR_DEBUG, "PHB[%d:%d] X[PE]=0x%08x/0x%08x X[PCI]=0x%08x/0x%08x X[ETU]=0x%08x\n",
6214 	      gcid, phb_num, nest_base, nest_stack, pci_base, pci_stack, etu_base);
6215 
6216 	/* Default BAR enables */
6217 	bar_en = 0;
6218 
6219 	/* Initialize PHB register BAR */
6220 	phys_map_get(gcid, phys_reg_spc, phb_num, &phb_bar, NULL);
6221 	rc = xscom_write(gcid, nest_stack + XPEC_NEST_STK_PHB_REG_BAR,
6222 			 phb_bar << 8);
6223 
6224 	/* A scom error here probably indicates a defective/garded PHB */
6225 	if (rc != OPAL_SUCCESS) {
6226 		prerror("PHB[%d:%d] Unable to set PHB BAR. Error=%d\n",
6227 		      gcid, phb_num, rc);
6228 		return;
6229 	}
6230 
6231 	bar_en |= XPEC_NEST_STK_BAR_EN_PHB;
6232 
6233 	/* Same with INT BAR (ESB) */
6234 	phys_map_get(gcid, phys_xive_esb, phb_num, &irq_bar, NULL);
6235 	xscom_write(gcid, nest_stack + XPEC_NEST_STK_IRQ_BAR, irq_bar << 8);
6236 	bar_en |= XPEC_NEST_STK_BAR_EN_INT;
6237 
6238 
6239 	/* Same with MMIO windows */
6240 	phys_map_get(gcid, phys_mmio64, phb_num, &mmio0_bar, &mmio0_sz);
6241 	mmio0_bmask =  (~(mmio0_sz - 1)) & 0x00FFFFFFFFFFFFFFULL;
6242 	xscom_write(gcid, nest_stack + XPEC_NEST_STK_MMIO_BAR0, mmio0_bar << 8);
6243 	xscom_write(gcid, nest_stack + XPEC_NEST_STK_MMIO_BAR0_MASK, mmio0_bmask << 8);
6244 
6245 	phys_map_get(gcid, phys_mmio32, phb_num, &mmio1_bar, &mmio1_sz);
6246 	mmio1_bmask =  (~(mmio1_sz - 1)) & 0x00FFFFFFFFFFFFFFULL;
6247 	xscom_write(gcid, nest_stack + XPEC_NEST_STK_MMIO_BAR1, mmio1_bar << 8);
6248 	xscom_write(gcid, nest_stack + XPEC_NEST_STK_MMIO_BAR1_MASK, mmio1_bmask << 8);
6249 
6250 	/* Build MMIO windows list */
6251 	mmio_win_sz = 0;
6252 	if (mmio0_bar) {
6253 		mmio_win[mmio_win_sz++] = cpu_to_be64(mmio0_bar);
6254 		mmio_win[mmio_win_sz++] = cpu_to_be64(mmio0_sz);
6255 		bar_en |= XPEC_NEST_STK_BAR_EN_MMIO0;
6256 	}
6257 	if (mmio1_bar) {
6258 		mmio_win[mmio_win_sz++] = cpu_to_be64(mmio1_bar);
6259 		mmio_win[mmio_win_sz++] = cpu_to_be64(mmio1_sz);
6260 		bar_en |= XPEC_NEST_STK_BAR_EN_MMIO1;
6261 	}
6262 
6263 	/* Set the appropriate enables */
6264 	xscom_read(gcid, nest_stack + XPEC_NEST_STK_BAR_EN, &val);
6265 	val |= bar_en;
6266 	xscom_write(gcid, nest_stack + XPEC_NEST_STK_BAR_EN, val);
6267 
6268 	/* No MMIO windows ? Barf ! */
6269 	if (mmio_win_sz == 0) {
6270 		prerror("PHB[%d:%d] No MMIO windows enabled !\n", gcid, phb_num);
6271 		return;
6272 	}
6273 
6274 	/* Clear errors in PFIR and NFIR */
6275 	xscom_write(gcid, pci_stack + XPEC_PCI_STK_PCI_FIR, 0);
6276 	xscom_write(gcid, nest_stack + XPEC_NEST_STK_PCI_NFIR, 0);
6277 
6278 	/* Check ETU reset */
6279 	xscom_read(gcid, pci_stack + XPEC_PCI_STK_ETU_RESET, &val);
6280 	prlog_once(PR_DEBUG, "ETU reset: %llx\n", val);
6281 	xscom_write(gcid, pci_stack + XPEC_PCI_STK_ETU_RESET, 0);
6282 	time_wait_ms(1);
6283 
6284 	// show we can read phb mmio space
6285 	foo = (void *)(phb_bar + 0x800); // phb version register
6286 	prlog_once(PR_DEBUG, "Version reg: 0x%016llx\n", in_be64(foo));
6287 
6288 	/* Create PHB node */
6289 	np = dt_new_addr(dt_root, "pciex", phb_bar);
6290 	if (!np)
6291 		return;
6292 
6293 	if (is_phb5())
6294 		dt_add_property_strings(np, "compatible", "ibm,power10-pciex", "ibm,ioda3-phb");
6295 	else
6296 		dt_add_property_strings(np, "compatible", "ibm,power9-pciex", "ibm,ioda3-phb");
6297 	dt_add_property_strings(np, "device_type", "pciex");
6298 	dt_add_property_u64s(np, "reg",
6299 				phb_bar, 0x1000,
6300 				irq_bar, 0x10000000);
6301 
6302 	/* Everything else is handled later by skiboot, we just
6303 	 * stick a few hints here
6304 	 */
6305 	dt_add_property_cells(np, "ibm,xscom-bases",
6306 			      nest_base, nest_stack, pci_base, pci_stack, etu_base);
6307 	dt_add_property(np, "ibm,mmio-windows", mmio_win, 8 * mmio_win_sz);
6308 	dt_add_property_cells(np, "ibm,phb-index", phb_num);
6309 	dt_add_property_cells(np, "ibm,phb-pec-index", pec_index);
6310 	dt_add_property_cells(np, "ibm,phb-stack", stk_node->phandle);
6311 	dt_add_property_cells(np, "ibm,phb-stack-index", stk_index);
6312 	dt_add_property_cells(np, "ibm,chip-id", gcid);
6313 
6314 	/* read the hub-id out of the pbcq node */
6315 	if (dt_has_node_property(stk_node->parent, "ibm,hub-id", NULL)) {
6316 		uint32_t hub_id;
6317 
6318 		hub_id = dt_prop_get_u32(stk_node->parent, "ibm,hub-id");
6319 		dt_add_property_cells(np, "ibm,hub-id", hub_id);
6320 	}
6321 
6322 	if (dt_has_node_property(stk_node->parent, "ibm,loc-code", NULL)) {
6323 		const char *lc = dt_prop_get(stk_node->parent, "ibm,loc-code");
6324 		dt_add_property_string(np, "ibm,loc-code", lc);
6325 	}
6326 	if (dt_has_node_property(stk_node, "ibm,lane-eq", NULL)) {
6327 		size_t leq_size;
6328 		const void *leq = dt_prop_get_def_size(stk_node, "ibm,lane-eq",
6329 						       NULL, &leq_size);
6330 		if (leq != NULL && leq_size >= 6 * 8)
6331 			dt_add_property(np, "ibm,lane-eq", leq, leq_size);
6332 	}
6333 	if (dt_has_node_property(stk_node, "ibm,capp-ucode", NULL)) {
6334 		capp_ucode_base = dt_prop_get_u32(stk_node, "ibm,capp-ucode");
6335 		dt_add_property_cells(np, "ibm,capp-ucode", capp_ucode_base);
6336 	}
6337 	if (dt_has_node_property(stk_node, "ibm,max-link-speed", NULL)) {
6338 		max_link_speed = dt_prop_get_u32(stk_node, "ibm,max-link-speed");
6339 		dt_add_property_cells(np, "ibm,max-link-speed", max_link_speed);
6340 	}
6341 	dt_add_property_cells(np, "ibm,capi-flags",
6342 			      OPAL_PHB_CAPI_FLAG_SNOOP_CONTROL);
6343 
6344 	add_chip_dev_associativity(np);
6345 }
6346 
phb4_probe_pbcq(struct dt_node * pbcq)6347 static void phb4_probe_pbcq(struct dt_node *pbcq)
6348 {
6349 	uint32_t nest_base, pci_base, pec_index;
6350 	struct dt_node *stk;
6351 
6352 	/* REMOVEME: force this for now until we stabalise PCIe */
6353 	verbose_eeh = 1;
6354 
6355 	nest_base = dt_get_address(pbcq, 0, NULL);
6356 	pci_base = dt_get_address(pbcq, 1, NULL);
6357 	pec_index = dt_prop_get_u32(pbcq, "ibm,pec-index");
6358 
6359 	dt_for_each_child(pbcq, stk) {
6360 		if (dt_node_is_enabled(stk))
6361 			phb4_probe_stack(stk, pec_index, nest_base, pci_base);
6362 	}
6363 }
6364 
probe_phb4(void)6365 void probe_phb4(void)
6366 {
6367 	struct dt_node *np;
6368 	const char *s;
6369 
6370 	pci_eeh_mmio = !nvram_query_eq_dangerous("pci-eeh-mmio", "disabled");
6371 	pci_retry_all = nvram_query_eq_dangerous("pci-retry-all", "true");
6372 	s = nvram_query_dangerous("phb-rx-err-max");
6373 	if (s) {
6374 		rx_err_max = atoi(s);
6375 
6376 		/* Clip to uint8_t used by hardware */
6377 		rx_err_max = MAX(rx_err_max, 0);
6378 		rx_err_max = MIN(rx_err_max, 255);
6379 	}
6380 
6381 	if (is_phb5()) {
6382 		prlog(PR_DEBUG, "PHB5: Maximum RX errors during training: %d\n", rx_err_max);
6383 		/* Look for PBCQ XSCOM nodes */
6384 		dt_for_each_compatible(dt_root, np, "ibm,power10-pbcq")
6385 			phb4_probe_pbcq(np);
6386 
6387 		/* Look for newly created PHB nodes */
6388 		dt_for_each_compatible(dt_root, np, "ibm,power10-pciex")
6389 			phb4_create(np);
6390 	} else {
6391 		prlog(PR_DEBUG, "PHB4: Maximum RX errors during training: %d\n", rx_err_max);
6392 		/* Look for PBCQ XSCOM nodes */
6393 		dt_for_each_compatible(dt_root, np, "ibm,power9-pbcq")
6394 			phb4_probe_pbcq(np);
6395 
6396 		/* Look for newly created PHB nodes */
6397 		dt_for_each_compatible(dt_root, np, "ibm,power9-pciex")
6398 			phb4_create(np);
6399 	}
6400 }
6401