1 /* Copyright 2013-2016 IBM Corp.
2  * Copyright 2018 Raptor Engineering, LLC
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  * 	http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
13  * implied.
14  * See the License for the specific language governing permissions and
15  * limitations under the License.
16  */
17 /*
18  * PHB4 support
19  *
20  */
21 
22 /*
23  *
24  * FIXME:
25  *   More stuff for EEH support:
26  *      - PBCQ error reporting interrupt
27  *	- I2C-based power management (replacing SHPC)
28  *	- Directly detect fenced PHB through one dedicated HW reg
29  */
30 
31 /*
32  * This is a simplified view of the PHB4 reset and link training steps
33  *
34  * Step 1:
35  * - Check for hotplug status:
36  *  o PHB_PCIE_HOTPLUG_STATUS bit PHB_PCIE_HPSTAT_PRESENCE
37  *  o If not set -> Bail out (Slot is empty)
38  *
39  * Step 2:
40  * - Do complete PHB reset:
41  *   o PHB/ETU reset procedure
42  *
43  * Step 3:
44  * - Drive PERST active (skip if already asserted. ie. after cold reboot)
45  * - Wait 250ms (for cards to reset)
46  *   o powervm have used 250ms for a long time without any problems
47  *
48  * Step 4:
49  * - Drive PERST inactive
50  *
51  * Step 5:
52  * - Look for inband presence:
53  *   o From PERST we have two stages to get inband presence detected
54  *     1) Devices must enter Detect state within 20 ms of the end of
55  *          Fundamental Reset
56  *     2) Receiver detect pulse are every 12ms
57  *      - Hence minimum wait time 20 + 12 = 32ms
58  *   o Unfortunatey, we've seen cards take 440ms
59  *   o Hence we are conservative and poll here for 1000ms (> 440ms)
60  * - If no inband presence after 100ms -> Bail out (Slot is broken)
61  *   o PHB_PCIE_DLP_TRAIN_CTL bit PHB_PCIE_DLP_INBAND_PRESENCE
62  *
63  * Step 6:
64  * - Look for link training done:
65  *   o PHB_PCIE_DLP_TRAIN_CTL bit PHB_PCIE_DLP_TL_LINKACT
66  * - If not set after 2000ms, Retry (3 times) -> Goto Step 2
67  *   o phy lockup could link training failure, hence going back to a
68  *     complete PHB reset on retry
69  *   o not expect to happen very often
70  *
71  * Step 7:
72  * - Wait for 1 sec (before touching device config space):
73  * -  From PCIe spec:
74  *     Root Complex and/or system software must allow at least 1.0 s after
75  *     a Conventional Reset of a device, before it may determine that a
76  *     device which fails to return a Successful Completion status for a
77  *     valid Configuration Request is a broken device.
78  *
79  * Step 8:
80  * - Sanity check for fence and link still up:
81  *   o If fenced or link down, Retry (3 times) -> Goto Step 2
82  *   o This is not nessary but takes no time and can be useful
83  *   o Once we leave here, much harder to recover from errors
84  *
85  * Step 9:
86  * - Check for optimised link for directly attached devices:
87  *   o Wait for CRS (so we can read device config space)
88  *   o Check chip and device are in whitelist. if not, Goto Step 10
89  *   o If trained link speed is degraded, retry ->  Goto Step 2
90  *   o If trained link width is degraded, retry -> Goto Step 2
91  *   o If still degraded after 3 retries. Give up, Goto Step 10.
92  *
93  * Step 10:
94  *  - PHB good, start probing config space.
95  *    o core/pci.c: pci_reset_phb() -> pci_scan_phb()
96  */
97 
98 
99 #undef NO_ASB
100 #undef LOG_CFG
101 
102 #include <skiboot.h>
103 #include <io.h>
104 #include <timebase.h>
105 #include <pci.h>
106 #include <pci-cfg.h>
107 #include <pci-slot.h>
108 #include <vpd.h>
109 #include <interrupts.h>
110 #include <opal.h>
111 #include <cpu.h>
112 #include <device.h>
113 #include <ccan/str/str.h>
114 #include <ccan/array_size/array_size.h>
115 #include <xscom.h>
116 #include <affinity.h>
117 #include <phb4.h>
118 #include <phb4-regs.h>
119 #include <phb4-capp.h>
120 #include <capp.h>
121 #include <fsp.h>
122 #include <chip.h>
123 #include <chiptod.h>
124 #include <xive.h>
125 #include <xscom-p9-regs.h>
126 #include <phys-map.h>
127 #include <nvram.h>
128 
129 /* Enable this to disable error interrupts for debug purposes */
130 #define DISABLE_ERR_INTS
131 
132 static void phb4_init_hw(struct phb4 *p);
133 
134 #define PHBDBG(p, fmt, a...)	prlog(PR_DEBUG, "PHB#%04x[%d:%d]: " fmt, \
135 				      (p)->phb.opal_id, (p)->chip_id, \
136 				      (p)->index,  ## a)
137 #define PHBINF(p, fmt, a...)	prlog(PR_INFO, "PHB#%04x[%d:%d]: " fmt, \
138 				      (p)->phb.opal_id, (p)->chip_id, \
139 				      (p)->index,  ## a)
140 #define PHBNOTICE(p, fmt, a...)	prlog(PR_NOTICE, "PHB#%04x[%d:%d]: " fmt, \
141 				      (p)->phb.opal_id, (p)->chip_id, \
142 				      (p)->index,  ## a)
143 #define PHBERR(p, fmt, a...)	prlog(PR_ERR, "PHB#%04x[%d:%d]: " fmt, \
144 				      (p)->phb.opal_id, (p)->chip_id, \
145 				      (p)->index,  ## a)
146 #ifdef LOG_CFG
147 #define PHBLOGCFG(p, fmt, a...)	PHBDBG(p, fmt, ## a)
148 #else
149 #define PHBLOGCFG(p, fmt, a...) do {} while (0)
150 #endif
151 
152 #define PHB4_CAN_STORE_EOI(p) XIVE_STORE_EOI_ENABLED
153 
154 static bool pci_tracing;
155 static bool pci_eeh_mmio;
156 static bool pci_retry_all;
157 static int rx_err_max = PHB4_RX_ERR_MAX;
158 
159 /* Note: The "ASB" name is historical, practically this means access via
160  * the XSCOM backdoor
161  */
phb4_read_reg_asb(struct phb4 * p,uint32_t offset)162 static inline uint64_t phb4_read_reg_asb(struct phb4 *p, uint32_t offset)
163 {
164 #ifdef NO_ASB
165 	return in_be64(p->regs + offset);
166 #else
167 	int64_t rc;
168 	uint64_t addr, val;
169 
170 	/* Address register: must use 4 bytes for built-in config space.
171 	 *
172 	 * This path isn't usable for outbound configuration space
173 	 */
174 	if (((offset & 0xfffffffc) == PHB_CONFIG_DATA) && (offset & 3)) {
175 		PHBERR(p, "XSCOM unaligned access to CONFIG_DATA unsupported\n");
176 		return -1ull;
177 	}
178 	addr = XETU_HV_IND_ADDR_VALID | offset;
179 	if ((offset >= 0x1000 && offset < 0x1800) || (offset == PHB_CONFIG_DATA))
180 		addr |= XETU_HV_IND_ADDR_4B;
181  	rc = xscom_write(p->chip_id, p->etu_xscom + XETU_HV_IND_ADDRESS, addr);
182 	if (rc != 0) {
183 		PHBERR(p, "XSCOM error addressing register 0x%x\n", offset);
184 		return -1ull;
185 	}
186  	rc = xscom_read(p->chip_id, p->etu_xscom + XETU_HV_IND_DATA, &val);
187 	if (rc != 0) {
188 		PHBERR(p, "XSCOM error reading register 0x%x\n", offset);
189 		return -1ull;
190 	}
191 	return val;
192 #endif
193 }
194 
phb4_write_reg_asb(struct phb4 * p,uint32_t offset,uint64_t val)195 static inline void phb4_write_reg_asb(struct phb4 *p,
196 				      uint32_t offset, uint64_t val)
197 {
198 #ifdef NO_ASB
199 	out_be64(p->regs + offset, val);
200 #else
201 	int64_t rc;
202 	uint64_t addr;
203 
204 	/* Address register: must use 4 bytes for built-in config space.
205 	 *
206 	 * This path isn't usable for outbound configuration space
207 	 */
208 	if (((offset & 0xfffffffc) == PHB_CONFIG_DATA) && (offset & 3)) {
209 		PHBERR(p, "XSCOM access to CONFIG_DATA unsupported\n");
210 		return;
211 	}
212 	addr = XETU_HV_IND_ADDR_VALID | offset;
213 	if ((offset >= 0x1000 && offset < 0x1800) || (offset == PHB_CONFIG_DATA))
214 		addr |= XETU_HV_IND_ADDR_4B;
215  	rc = xscom_write(p->chip_id, p->etu_xscom + XETU_HV_IND_ADDRESS, addr);
216 	if (rc != 0) {
217 		PHBERR(p, "XSCOM error addressing register 0x%x\n", offset);
218 		return;
219 	}
220  	rc = xscom_write(p->chip_id, p->etu_xscom + XETU_HV_IND_DATA, val);
221 	if (rc != 0) {
222 		PHBERR(p, "XSCOM error writing register 0x%x\n", offset);
223 		return;
224 	}
225 #endif
226 }
227 
phb4_read_reg(struct phb4 * p,uint32_t offset)228 static uint64_t phb4_read_reg(struct phb4 *p, uint32_t offset)
229 {
230 	if (p->flags & PHB4_CFG_USE_ASB)
231 		return phb4_read_reg_asb(p, offset);
232 	else
233 		return in_be64(p->regs + offset);
234 }
235 
phb4_write_reg(struct phb4 * p,uint32_t offset,uint64_t val)236 static void phb4_write_reg(struct phb4 *p, uint32_t offset, uint64_t val)
237 {
238 	if (p->flags & PHB4_CFG_USE_ASB)
239 		phb4_write_reg_asb(p, offset, val);
240 	else
241 		return out_be64(p->regs + offset, val);
242 }
243 
244 /* Helper to select an IODA table entry */
phb4_ioda_sel(struct phb4 * p,uint32_t table,uint32_t addr,bool autoinc)245 static inline void phb4_ioda_sel(struct phb4 *p, uint32_t table,
246 				 uint32_t addr, bool autoinc)
247 {
248 	phb4_write_reg(p, PHB_IODA_ADDR,
249 		       (autoinc ? PHB_IODA_AD_AUTOINC : 0)	|
250 		       SETFIELD(PHB_IODA_AD_TSEL, 0ul, table)	|
251 		       SETFIELD(PHB_IODA_AD_TADR, 0ul, addr));
252 }
253 
254 /*
255  * Configuration space access
256  *
257  * The PHB lock is assumed to be already held
258  */
phb4_pcicfg_check(struct phb4 * p,uint32_t bdfn,uint32_t offset,uint32_t size,uint16_t * pe)259 static int64_t phb4_pcicfg_check(struct phb4 *p, uint32_t bdfn,
260 				 uint32_t offset, uint32_t size,
261 				 uint16_t *pe)
262 {
263 	uint32_t sm = size - 1;
264 
265 	if (offset > 0xfff || bdfn > 0xffff)
266 		return OPAL_PARAMETER;
267 	if (offset & sm)
268 		return OPAL_PARAMETER;
269 
270 	/* The root bus only has a device at 0 and we get into an
271 	 * error state if we try to probe beyond that, so let's
272 	 * avoid that and just return an error to Linux
273 	 */
274 	if ((bdfn >> 8) == 0 && (bdfn & 0xff))
275 		return OPAL_HARDWARE;
276 
277 	/* Check PHB state */
278 	if (p->broken)
279 		return OPAL_HARDWARE;
280 
281 	/* Fetch the PE# from cache */
282 	*pe = p->tbl_rtt[bdfn];
283 
284 	return OPAL_SUCCESS;
285 }
286 
phb4_rc_read(struct phb4 * p,uint32_t offset,uint8_t sz,void * data,bool use_asb)287 static int64_t phb4_rc_read(struct phb4 *p, uint32_t offset, uint8_t sz,
288 			    void *data, bool use_asb)
289 {
290 	uint32_t reg = offset & ~3;
291 	uint32_t oval;
292 
293 	/* Some registers are handled locally */
294 	switch (reg) {
295 		/* Bridge base/limit registers are cached here as HW
296 		 * doesn't implement them (it hard codes values that
297 		 * will confuse a proper PCI implementation).
298 		 */
299 	case PCI_CFG_MEM_BASE:		/* Includes PCI_CFG_MEM_LIMIT */
300 		oval = p->rc_cache[(reg - 0x20) >> 2] & 0xfff0fff0;
301 		break;
302 	case PCI_CFG_PREF_MEM_BASE:	/* Includes PCI_CFG_PREF_MEM_LIMIT */
303 		oval = p->rc_cache[(reg - 0x20) >> 2] & 0xfff0fff0;
304 		oval |= 0x00010001;
305 		break;
306 	case PCI_CFG_IO_BASE_U16:	/* Includes PCI_CFG_IO_LIMIT_U16 */
307 		oval = 0;
308 		break;
309 	case PCI_CFG_PREF_MEM_BASE_U32:
310 	case PCI_CFG_PREF_MEM_LIMIT_U32:
311 		oval = p->rc_cache[(reg - 0x20) >> 2];
312 		break;
313 	default:
314 		oval = 0xffffffff; /* default if offset too big */
315 		if (reg < PHB_RC_CONFIG_SIZE) {
316 			if (use_asb)
317 				oval = bswap_32(phb4_read_reg_asb(p, PHB_RC_CONFIG_BASE
318 								  + reg));
319 			else
320 				oval = in_le32(p->regs + PHB_RC_CONFIG_BASE + reg);
321 		}
322 	}
323 
324 	/* Apply any post-read fixups */
325 	switch (reg) {
326 	case PCI_CFG_IO_BASE:
327 		oval |= 0x01f1; /* Set IO base < limit to disable the window */
328 		break;
329 	}
330 
331 	switch (sz) {
332 	case 1:
333 		offset &= 3;
334 		*((uint8_t *)data) = (oval >> (offset << 3)) & 0xff;
335 		PHBLOGCFG(p, "000 CFG08 Rd %02x=%02x\n",
336 			  offset, *((uint8_t *)data));
337 		break;
338 	case 2:
339 		offset &= 2;
340 		*((uint16_t *)data) = (oval >> (offset << 3)) & 0xffff;
341 		PHBLOGCFG(p, "000 CFG16 Rd %02x=%04x\n",
342 			  offset, *((uint16_t *)data));
343 		break;
344 	case 4:
345 		*((uint32_t *)data) = oval;
346 		PHBLOGCFG(p, "000 CFG32 Rd %02x=%08x\n",
347 			  offset, *((uint32_t *)data));
348 		break;
349 	default:
350 		assert(false);
351 	}
352 	return OPAL_SUCCESS;
353 }
354 
phb4_rc_write(struct phb4 * p,uint32_t offset,uint8_t sz,uint32_t val,bool use_asb)355 static int64_t phb4_rc_write(struct phb4 *p, uint32_t offset, uint8_t sz,
356 			     uint32_t val, bool use_asb)
357 {
358 	uint32_t reg = offset & ~3;
359 	uint32_t old, mask, shift, oldold;
360 	int64_t rc;
361 
362 	if (reg > PHB_RC_CONFIG_SIZE)
363 		return OPAL_SUCCESS;
364 
365 	/* If size isn't 4-bytes, do a RMW cycle */
366 	if (sz < 4) {
367 		rc = phb4_rc_read(p, reg, 4, &old, use_asb);
368 		if (rc != OPAL_SUCCESS)
369 			return rc;
370 
371 		/*
372 		 * Since we have to Read-Modify-Write here, we need to filter
373 		 * out registers that have write-1-to-clear bits to prevent
374 		 * clearing stuff we shouldn't be.  So for any register this
375 		 * applies to, mask out those bits.
376 		 */
377 		oldold = old;
378 		switch(reg) {
379 		case 0x1C: /* Secondary status */
380 			old &= 0x00ffffff; /* mask out 24-31 */
381 			break;
382 		case 0x50: /* EC - Device status */
383 			old &= 0xfff0ffff; /* mask out 16-19 */
384 			break;
385 		case 0x58: /* EC - Link status */
386 			old &= 0x3fffffff; /* mask out 30-31 */
387 			break;
388 		case 0x78: /* EC - Link status 2 */
389 			old &= 0xf000ffff; /* mask out 16-27 */
390 			break;
391 		/* These registers *only* have write-1-to-clear bits */
392 		case 0x104: /* AER - Uncorr. error status */
393 		case 0x110: /* AER - Corr. error status */
394 		case 0x130: /* AER - Root error status */
395 		case 0x180: /* P16 - status */
396 		case 0x184: /* P16 - LDPM status */
397 		case 0x188: /* P16 - FRDPM status */
398 		case 0x18C: /* P16 - SRDPM status */
399 			old &= 0x00000000;
400 			break;
401 		}
402 
403 		if (old != oldold) {
404 			PHBLOGCFG(p, "Rewrote %x to %x for reg %x for W1C\n",
405 				  oldold, old, reg);
406 		}
407 
408 		if (sz == 1) {
409 			shift = (offset & 3) << 3;
410 			mask = 0xff << shift;
411 			val = (old & ~mask) | ((val & 0xff) << shift);
412 		} else {
413 			shift = (offset & 2) << 3;
414 			mask = 0xffff << shift;
415 			val = (old & ~mask) | ((val & 0xffff) << shift);
416 		}
417 	}
418 
419 	/* Some registers are handled locally */
420 	switch (reg) {
421 		/* See comment in phb4_rc_read() */
422 	case PCI_CFG_MEM_BASE:		/* Includes PCI_CFG_MEM_LIMIT */
423 	case PCI_CFG_PREF_MEM_BASE:	/* Includes PCI_CFG_PREF_MEM_LIMIT */
424 	case PCI_CFG_PREF_MEM_BASE_U32:
425 	case PCI_CFG_PREF_MEM_LIMIT_U32:
426 		p->rc_cache[(reg - 0x20) >> 2] = val;
427 		break;
428 	case PCI_CFG_IO_BASE_U16:	/* Includes PCI_CFG_IO_LIMIT_U16 */
429 		break;
430 	default:
431 		/* Workaround PHB config space enable */
432 		PHBLOGCFG(p, "000 CFG%02d Wr %02x=%08x\n", 8 * sz, reg, val);
433 		if (use_asb)
434 			phb4_write_reg_asb(p, PHB_RC_CONFIG_BASE + reg, val);
435 		else
436 			out_le32(p->regs + PHB_RC_CONFIG_BASE + reg, val);
437 	}
438 	return OPAL_SUCCESS;
439 }
440 
phb4_pcicfg_read(struct phb4 * p,uint32_t bdfn,uint32_t offset,uint32_t size,void * data)441 static int64_t phb4_pcicfg_read(struct phb4 *p, uint32_t bdfn,
442 				uint32_t offset, uint32_t size,
443 				void *data)
444 {
445 	uint64_t addr, val64;
446 	int64_t rc;
447 	uint16_t pe;
448 	bool use_asb = false;
449 
450 	rc = phb4_pcicfg_check(p, bdfn, offset, size, &pe);
451 	if (rc)
452 		return rc;
453 
454 	if (p->flags & PHB4_AIB_FENCED) {
455 		if (!(p->flags & PHB4_CFG_USE_ASB))
456 			return OPAL_HARDWARE;
457 		if (bdfn != 0)
458 			return OPAL_HARDWARE;
459 		use_asb = true;
460 	} else if ((p->flags & PHB4_CFG_BLOCKED) && bdfn != 0) {
461 		return OPAL_HARDWARE;
462 	}
463 
464 	/* Handle per-device filters */
465 	rc = pci_handle_cfg_filters(&p->phb, bdfn, offset, size,
466 				    (uint32_t *)data, false);
467 	if (rc != OPAL_PARTIAL)
468 		return rc;
469 
470 	/* Handle root complex MMIO based config space */
471 	if (bdfn == 0)
472 		return phb4_rc_read(p, offset, size, data, use_asb);
473 
474 	addr = PHB_CA_ENABLE;
475 	addr = SETFIELD(PHB_CA_BDFN, addr, bdfn);
476 	addr = SETFIELD(PHB_CA_REG, addr, offset & ~3u);
477 	addr = SETFIELD(PHB_CA_PE, addr, pe);
478 	if (use_asb) {
479 		phb4_write_reg_asb(p, PHB_CONFIG_ADDRESS, addr);
480 		sync();
481 		val64 = bswap_64(phb4_read_reg_asb(p, PHB_CONFIG_DATA));
482 		switch(size) {
483 		case 1:
484 			*((uint8_t *)data) = val64 >> (8 * (offset & 3));
485 			break;
486 		case 2:
487 			*((uint16_t *)data) = val64 >> (8 * (offset & 2));
488 			break;
489 		case 4:
490 			*((uint32_t *)data) = val64;
491 			break;
492 		default:
493 			return OPAL_PARAMETER;
494 		}
495 	} else {
496 		out_be64(p->regs + PHB_CONFIG_ADDRESS, addr);
497 		switch(size) {
498 		case 1:
499 			*((uint8_t *)data) =
500 				in_8(p->regs + PHB_CONFIG_DATA + (offset & 3));
501 			PHBLOGCFG(p, "%03x CFG08 Rd %02x=%02x\n",
502 				  bdfn, offset, *((uint8_t *)data));
503 			break;
504 		case 2:
505 			*((uint16_t *)data) =
506 				in_le16(p->regs + PHB_CONFIG_DATA + (offset & 2));
507 			PHBLOGCFG(p, "%03x CFG16 Rd %02x=%04x\n",
508 				  bdfn, offset, *((uint16_t *)data));
509 			break;
510 		case 4:
511 			*((uint32_t *)data) = in_le32(p->regs + PHB_CONFIG_DATA);
512 			PHBLOGCFG(p, "%03x CFG32 Rd %02x=%08x\n",
513 				  bdfn, offset, *((uint32_t *)data));
514 			break;
515 		default:
516 			return OPAL_PARAMETER;
517 		}
518 	}
519 	return OPAL_SUCCESS;
520 }
521 
522 
523 #define PHB4_PCI_CFG_READ(size, type)					\
524 static int64_t phb4_pcicfg_read##size(struct phb *phb, uint32_t bdfn,	\
525 				      uint32_t offset, type *data)	\
526 {									\
527 	struct phb4 *p = phb_to_phb4(phb);				\
528 									\
529 	/* Initialize data in case of error */				\
530 	*data = (type)0xffffffff;					\
531 	return phb4_pcicfg_read(p, bdfn, offset, sizeof(type), data);	\
532 }
533 
phb4_pcicfg_write(struct phb4 * p,uint32_t bdfn,uint32_t offset,uint32_t size,uint32_t data)534 static int64_t phb4_pcicfg_write(struct phb4 *p, uint32_t bdfn,
535 				 uint32_t offset, uint32_t size,
536 				 uint32_t data)
537 {
538 	uint64_t addr;
539 	int64_t rc;
540 	uint16_t pe;
541 	bool use_asb = false;
542 
543 	rc = phb4_pcicfg_check(p, bdfn, offset, size, &pe);
544 	if (rc)
545 		return rc;
546 
547 	if (p->flags & PHB4_AIB_FENCED) {
548 		if (!(p->flags & PHB4_CFG_USE_ASB))
549 			return OPAL_HARDWARE;
550 		if (bdfn != 0)
551 			return OPAL_HARDWARE;
552 		use_asb = true;
553 	} else if ((p->flags & PHB4_CFG_BLOCKED) && bdfn != 0) {
554 		return OPAL_HARDWARE;
555 	}
556 
557 	/* Handle per-device filters */
558 	rc = pci_handle_cfg_filters(&p->phb, bdfn, offset, size,
559 				    (uint32_t *)&data, true);
560 	if (rc != OPAL_PARTIAL)
561 		return rc;
562 
563 	/* Handle root complex MMIO based config space */
564 	if (bdfn == 0)
565 		return phb4_rc_write(p, offset, size, data, use_asb);
566 
567 	addr = PHB_CA_ENABLE;
568 	addr = SETFIELD(PHB_CA_BDFN, addr, bdfn);
569 	addr = SETFIELD(PHB_CA_REG, addr, offset & ~3u);
570 	addr = SETFIELD(PHB_CA_PE, addr, pe);
571 	if (use_asb) {
572 		/* We don't support ASB config space writes */
573 		return OPAL_UNSUPPORTED;
574 	} else {
575 		out_be64(p->regs + PHB_CONFIG_ADDRESS, addr);
576 		switch(size) {
577 		case 1:
578 			out_8(p->regs + PHB_CONFIG_DATA + (offset & 3), data);
579 			break;
580 		case 2:
581 			out_le16(p->regs + PHB_CONFIG_DATA + (offset & 2), data);
582 			break;
583 		case 4:
584 			out_le32(p->regs + PHB_CONFIG_DATA, data);
585 			break;
586 		default:
587 			return OPAL_PARAMETER;
588 		}
589 	}
590 	PHBLOGCFG(p, "%03x CFG%d Wr %02x=%08x\n", bdfn, 8 * size, offset, data);
591 	return OPAL_SUCCESS;
592 }
593 
594 #define PHB4_PCI_CFG_WRITE(size, type)					\
595 static int64_t phb4_pcicfg_write##size(struct phb *phb, uint32_t bdfn,	\
596 				       uint32_t offset, type data)	\
597 {									\
598 	struct phb4 *p = phb_to_phb4(phb);				\
599 									\
600 	return phb4_pcicfg_write(p, bdfn, offset, sizeof(type), data);	\
601 }
602 
603 PHB4_PCI_CFG_READ(8, u8)
604 PHB4_PCI_CFG_READ(16, u16)
605 PHB4_PCI_CFG_READ(32, u32)
606 PHB4_PCI_CFG_WRITE(8, u8)
607 PHB4_PCI_CFG_WRITE(16, u16)
608 PHB4_PCI_CFG_WRITE(32, u32)
609 
phb4_choose_bus(struct phb * phb __unused,struct pci_device * bridge __unused,uint8_t candidate,uint8_t * max_bus __unused,bool * use_max)610 static uint8_t phb4_choose_bus(struct phb *phb __unused,
611 			       struct pci_device *bridge __unused,
612 			       uint8_t candidate, uint8_t *max_bus __unused,
613 			       bool *use_max)
614 {
615 	/* Use standard bus number selection */
616 	*use_max = false;
617 	return candidate;
618 }
619 
phb4_get_reserved_pe_number(struct phb * phb)620 static int64_t phb4_get_reserved_pe_number(struct phb *phb)
621 {
622 	struct phb4 *p = phb_to_phb4(phb);
623 
624 	return PHB4_RESERVED_PE_NUM(p);
625 }
626 
627 
phb4_root_port_init(struct phb * phb,struct pci_device * dev,int ecap,int aercap)628 static void phb4_root_port_init(struct phb *phb, struct pci_device *dev,
629 				int ecap, int aercap)
630 {
631 	struct phb4 *p = phb_to_phb4(phb);
632 	struct pci_slot *slot = dev->slot;
633 	uint16_t bdfn = dev->bdfn;
634 	uint16_t val16;
635 	uint32_t val32;
636 
637 	/*
638 	 * Use the PHB's callback so that UTL events will be masked or
639 	 * unmasked when the link is down or up.
640 	 */
641 	if (dev->slot && dev->slot->ops.prepare_link_change &&
642 	    phb->slot && phb->slot->ops.prepare_link_change)
643 		dev->slot->ops.prepare_link_change =
644 			phb->slot->ops.prepare_link_change;
645 
646 	// FIXME: check recommended init values for phb4
647 
648 	/*
649 	 * Enable the bridge slot capability in the root port's config
650 	 * space. This should probably be done *before* we start
651 	 * scanning config space, but we need a pci_device struct to
652 	 * exist before we do a slot lookup so *faaaaaaaaaaaaaart*
653 	 */
654 	if (slot && slot->pluggable && slot->power_limit) {
655 		uint64_t val;
656 
657 		val = in_be64(p->regs + PHB_PCIE_SCR);
658 		val |= PHB_PCIE_SCR_SLOT_CAP;
659 		out_be64(p->regs + PHB_PCIE_SCR, val);
660 
661 		/* update the cached slotcap */
662 		pci_cfg_read32(phb, bdfn, ecap + PCICAP_EXP_SLOTCAP,
663 				&slot->slot_cap);
664 	}
665 
666 	/* Enable SERR and parity checking */
667 	pci_cfg_read16(phb, bdfn, PCI_CFG_CMD, &val16);
668 	val16 |= (PCI_CFG_CMD_SERR_EN | PCI_CFG_CMD_PERR_RESP |
669 		  PCI_CFG_CMD_MEM_EN);
670 	pci_cfg_write16(phb, bdfn, PCI_CFG_CMD, val16);
671 
672 	/* Enable reporting various errors */
673 	if (!ecap) return;
674 	pci_cfg_read16(phb, bdfn, ecap + PCICAP_EXP_DEVCTL, &val16);
675 	val16 |= (PCICAP_EXP_DEVCTL_CE_REPORT |
676 		  PCICAP_EXP_DEVCTL_NFE_REPORT |
677 		  PCICAP_EXP_DEVCTL_FE_REPORT |
678 		  PCICAP_EXP_DEVCTL_UR_REPORT);
679 	pci_cfg_write16(phb, bdfn, ecap + PCICAP_EXP_DEVCTL, val16);
680 
681 	if (!aercap) return;
682 
683 	/* Mask various unrecoverable errors */
684 	pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_UE_MASK, &val32);
685 	val32 |= (PCIECAP_AER_UE_MASK_POISON_TLP |
686 		  PCIECAP_AER_UE_MASK_COMPL_TIMEOUT |
687 		  PCIECAP_AER_UE_MASK_COMPL_ABORT |
688 		  PCIECAP_AER_UE_MASK_ECRC);
689 	pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_UE_MASK, val32);
690 
691 	/* Report various unrecoverable errors as fatal errors */
692 	pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_UE_SEVERITY, &val32);
693 	val32 |= (PCIECAP_AER_UE_SEVERITY_DLLP |
694 		  PCIECAP_AER_UE_SEVERITY_SURPRISE_DOWN |
695 		  PCIECAP_AER_UE_SEVERITY_FLOW_CTL_PROT |
696 		  PCIECAP_AER_UE_SEVERITY_UNEXP_COMPL |
697 		  PCIECAP_AER_UE_SEVERITY_RECV_OVFLOW |
698 		  PCIECAP_AER_UE_SEVERITY_MALFORMED_TLP);
699 	pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_UE_SEVERITY, val32);
700 
701 	/* Mask various recoverable errors */
702 	pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_CE_MASK, &val32);
703 	val32 |= PCIECAP_AER_CE_MASK_ADV_NONFATAL;
704 	pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_CE_MASK, val32);
705 
706 	/* Enable ECRC check */
707 	pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, &val32);
708 	val32 |= (PCIECAP_AER_CAPCTL_ECRCG_EN |
709 		  PCIECAP_AER_CAPCTL_ECRCC_EN);
710 	pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, val32);
711 
712 	/* Enable all error reporting */
713 	pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_RERR_CMD, &val32);
714 	val32 |= (PCIECAP_AER_RERR_CMD_FE |
715 		  PCIECAP_AER_RERR_CMD_NFE |
716 		  PCIECAP_AER_RERR_CMD_CE);
717 	pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_RERR_CMD, val32);
718 }
719 
phb4_switch_port_init(struct phb * phb,struct pci_device * dev,int ecap,int aercap)720 static void phb4_switch_port_init(struct phb *phb,
721 				  struct pci_device *dev,
722 				  int ecap, int aercap)
723 {
724 	uint16_t bdfn = dev->bdfn;
725 	uint16_t val16;
726 	uint32_t val32;
727 
728 	// FIXME: update AER settings for phb4
729 
730 	/* Enable SERR and parity checking and disable INTx */
731 	pci_cfg_read16(phb, bdfn, PCI_CFG_CMD, &val16);
732 	val16 |= (PCI_CFG_CMD_PERR_RESP |
733 		  PCI_CFG_CMD_SERR_EN |
734 		  PCI_CFG_CMD_INTx_DIS);
735 	pci_cfg_write16(phb, bdfn, PCI_CFG_CMD, val16);
736 
737 	/* Disable partity error and enable system error */
738 	pci_cfg_read16(phb, bdfn, PCI_CFG_BRCTL, &val16);
739 	val16 &= ~PCI_CFG_BRCTL_PERR_RESP_EN;
740 	val16 |= PCI_CFG_BRCTL_SERR_EN;
741 	pci_cfg_write16(phb, bdfn, PCI_CFG_BRCTL, val16);
742 
743 	/* Enable reporting various errors */
744 	if (!ecap) return;
745 	pci_cfg_read16(phb, bdfn, ecap + PCICAP_EXP_DEVCTL, &val16);
746 	val16 |= (PCICAP_EXP_DEVCTL_CE_REPORT |
747 		  PCICAP_EXP_DEVCTL_NFE_REPORT |
748 		  PCICAP_EXP_DEVCTL_FE_REPORT);
749 	/* HW279570 - Disable reporting of correctable errors */
750 	val16 &= ~PCICAP_EXP_DEVCTL_CE_REPORT;
751 	pci_cfg_write16(phb, bdfn, ecap + PCICAP_EXP_DEVCTL, val16);
752 
753 	/* Unmask all unrecoverable errors */
754 	if (!aercap) return;
755 	pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_UE_MASK, 0x0);
756 
757 	/* Severity of unrecoverable errors */
758 	if (dev->dev_type == PCIE_TYPE_SWITCH_UPPORT)
759 		val32 = (PCIECAP_AER_UE_SEVERITY_DLLP |
760 			 PCIECAP_AER_UE_SEVERITY_SURPRISE_DOWN |
761 			 PCIECAP_AER_UE_SEVERITY_FLOW_CTL_PROT |
762 			 PCIECAP_AER_UE_SEVERITY_RECV_OVFLOW |
763 			 PCIECAP_AER_UE_SEVERITY_MALFORMED_TLP |
764 			 PCIECAP_AER_UE_SEVERITY_INTERNAL);
765 	else
766 		val32 = (PCIECAP_AER_UE_SEVERITY_FLOW_CTL_PROT |
767 			 PCIECAP_AER_UE_SEVERITY_INTERNAL);
768 	pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_UE_SEVERITY, val32);
769 
770 	/*
771 	 * Mask various correctable errors
772 	 */
773 	val32 = PCIECAP_AER_CE_MASK_ADV_NONFATAL;
774 	pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_CE_MASK, val32);
775 
776 	/* Enable ECRC generation and disable ECRC check */
777 	pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, &val32);
778 	val32 |= PCIECAP_AER_CAPCTL_ECRCG_EN;
779 	val32 &= ~PCIECAP_AER_CAPCTL_ECRCC_EN;
780 	pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, val32);
781 }
782 
phb4_endpoint_init(struct phb * phb,struct pci_device * dev,int ecap,int aercap)783 static void phb4_endpoint_init(struct phb *phb,
784 			       struct pci_device *dev,
785 			       int ecap, int aercap)
786 {
787 	uint16_t bdfn = dev->bdfn;
788 	uint16_t val16;
789 	uint32_t val32;
790 
791 	/* Enable SERR and parity checking */
792 	pci_cfg_read16(phb, bdfn, PCI_CFG_CMD, &val16);
793 	val16 |= (PCI_CFG_CMD_PERR_RESP |
794 		  PCI_CFG_CMD_SERR_EN);
795 	pci_cfg_write16(phb, bdfn, PCI_CFG_CMD, val16);
796 
797 	/* Enable reporting various errors */
798 	if (!ecap) return;
799 	pci_cfg_read16(phb, bdfn, ecap + PCICAP_EXP_DEVCTL, &val16);
800 	val16 &= ~PCICAP_EXP_DEVCTL_CE_REPORT;
801 	val16 |= (PCICAP_EXP_DEVCTL_NFE_REPORT |
802 		  PCICAP_EXP_DEVCTL_FE_REPORT |
803 		  PCICAP_EXP_DEVCTL_UR_REPORT);
804 
805 	/* Enable ECRC generation and check */
806 	pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, &val32);
807 	val32 |= (PCIECAP_AER_CAPCTL_ECRCG_EN |
808 		  PCIECAP_AER_CAPCTL_ECRCC_EN);
809 	pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, val32);
810 }
811 
phb4_pcicfg_no_dstate(void * dev __unused,struct pci_cfg_reg_filter * pcrf,uint32_t offset,uint32_t len __unused,uint32_t * data __unused,bool write)812 static int64_t phb4_pcicfg_no_dstate(void *dev __unused,
813 				     struct pci_cfg_reg_filter *pcrf,
814 				     uint32_t offset, uint32_t len __unused,
815 				     uint32_t *data __unused,  bool write)
816 {
817 	uint32_t loff = offset - pcrf->start;
818 
819 	/* Disable D-state change on children of the PHB. For now we
820 	 * simply block all writes to the PM control/status
821 	 */
822 	if (write && loff >= 4 && loff < 6)
823 		return OPAL_SUCCESS;
824 
825 	return OPAL_PARTIAL;
826 }
827 
phb4_check_device_quirks(struct pci_device * dev)828 static void phb4_check_device_quirks(struct pci_device *dev)
829 {
830 	/* Some special adapter tweaks for devices directly under the PHB */
831 	if (dev->primary_bus != 1)
832 		return;
833 
834 	/* PM quirk */
835 	if (!pci_has_cap(dev, PCI_CFG_CAP_ID_PM, false))
836 		return;
837 
838 	pci_add_cfg_reg_filter(dev,
839 			       pci_cap(dev, PCI_CFG_CAP_ID_PM, false), 8,
840 			       PCI_REG_FLAG_WRITE,
841 			       phb4_pcicfg_no_dstate);
842 }
843 
phb4_device_init(struct phb * phb,struct pci_device * dev,void * data __unused)844 static int phb4_device_init(struct phb *phb, struct pci_device *dev,
845 			    void *data __unused)
846 {
847 	int ecap, aercap;
848 
849 	/* Setup special device quirks */
850 	phb4_check_device_quirks(dev);
851 
852 	/* Common initialization for the device */
853 	pci_device_init(phb, dev);
854 
855 	ecap = pci_cap(dev, PCI_CFG_CAP_ID_EXP, false);
856 	aercap = pci_cap(dev, PCIECAP_ID_AER, true);
857 	if (dev->dev_type == PCIE_TYPE_ROOT_PORT)
858 		phb4_root_port_init(phb, dev, ecap, aercap);
859 	else if (dev->dev_type == PCIE_TYPE_SWITCH_UPPORT ||
860 		 dev->dev_type == PCIE_TYPE_SWITCH_DNPORT)
861 		phb4_switch_port_init(phb, dev, ecap, aercap);
862 	else
863 		phb4_endpoint_init(phb, dev, ecap, aercap);
864 
865 	return 0;
866 }
867 
phb4_pci_reinit(struct phb * phb,uint64_t scope,uint64_t data)868 static int64_t phb4_pci_reinit(struct phb *phb, uint64_t scope, uint64_t data)
869 {
870 	struct pci_device *pd;
871 	uint16_t bdfn = data;
872 	int ret;
873 
874 	if (scope != OPAL_REINIT_PCI_DEV)
875 		return OPAL_PARAMETER;
876 
877 	pd = pci_find_dev(phb, bdfn);
878 	if (!pd)
879 		return OPAL_PARAMETER;
880 
881 	ret = phb4_device_init(phb, pd, NULL);
882 	if (ret)
883 		return OPAL_HARDWARE;
884 
885 	return OPAL_SUCCESS;
886 }
887 
888 /* Default value for MBT0, see comments in init_ioda_cache() */
phb4_default_mbt0(struct phb4 * p,unsigned int bar_idx)889 static uint64_t phb4_default_mbt0(struct phb4 *p, unsigned int bar_idx)
890 {
891 	uint64_t mbt0;
892 
893 	switch (p->mbt_size - bar_idx - 1) {
894 	case 0:
895 		mbt0 = SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_MDT);
896 		mbt0 = SETFIELD(IODA3_MBT0_MDT_COLUMN, mbt0, 3);
897 		break;
898 	case 1:
899 		mbt0 = SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_MDT);
900 		mbt0 = SETFIELD(IODA3_MBT0_MDT_COLUMN, mbt0, 2);
901 		break;
902 	case 2:
903 		mbt0 = SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_MDT);
904 		mbt0 = SETFIELD(IODA3_MBT0_MDT_COLUMN, mbt0, 1);
905 		break;
906 	default:
907 		mbt0 = SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_PE_SEG);
908 	}
909 	return mbt0;
910 }
911 
912 /*
913  * Clear the saved (cached) IODA state.
914  *
915  * The caches here are used to save the configuration of the IODA tables
916  * done by the OS. When the PHB is reset it loses all of its internal state
917  * so we need to keep a copy to restore from. This function re-initialises
918  * the saved state to sane defaults.
919  */
phb4_init_ioda_cache(struct phb4 * p)920 static void phb4_init_ioda_cache(struct phb4 *p)
921 {
922 	uint32_t i;
923 
924 	/*
925 	 * The RTT entries (RTE) are supposed to be initialised to
926 	 * 0xFF which indicates an invalid PE# for that RTT index
927 	 * (the bdfn). However, we set them to 0x00 since Linux
928 	 * needs to find the devices first by scanning config space
929 	 * and this occurs before PEs have been assigned.
930 	 */
931 	for (i = 0; i < RTT_TABLE_ENTRIES; i++)
932 		p->tbl_rtt[i] = PHB4_RESERVED_PE_NUM(p);
933 	memset(p->tbl_peltv, 0x0, p->tbl_peltv_size);
934 	memset(p->tve_cache, 0x0, sizeof(p->tve_cache));
935 
936 	/* XXX Should we mask them ? */
937 	memset(p->mist_cache, 0x0, sizeof(p->mist_cache));
938 
939 	/* Configure MBT entries 1...N */
940 
941 	/* Column 0 is left 0 and will be used fo M32 and configured
942 	 * by the OS. We use MDT column 1..3 for the last 3 BARs, thus
943 	 * allowing Linux to remap those, and setup all the other ones
944 	 * for now in mode 00 (segment# == PE#). By default those
945 	 * columns are set to map the same way.
946 	 */
947 	for (i = 0; i < p->max_num_pes; i++) {
948 		p->mdt_cache[i]  = SETFIELD(IODA3_MDT_PE_B, 0ull, i);
949 		p->mdt_cache[i] |= SETFIELD(IODA3_MDT_PE_C, 0ull, i);
950 		p->mdt_cache[i] |= SETFIELD(IODA3_MDT_PE_D, 0ull, i);
951 	}
952 
953 	/* Initialize MBT entries for BARs 1...N */
954 	for (i = 1; i < p->mbt_size; i++) {
955 		p->mbt_cache[i][0] = phb4_default_mbt0(p, i);
956 		p->mbt_cache[i][1] = 0;
957 	}
958 
959 	/* Initialize M32 bar using MBT entry 0, MDT colunm A */
960 	p->mbt_cache[0][0] = SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_MDT);
961 	p->mbt_cache[0][0] |= SETFIELD(IODA3_MBT0_MDT_COLUMN, 0ull, 0);
962 	p->mbt_cache[0][0] |= IODA3_MBT0_TYPE_M32 | (p->mm1_base & IODA3_MBT0_BASE_ADDR);
963 	p->mbt_cache[0][1] = IODA3_MBT1_ENABLE | ((~(M32_PCI_SIZE - 1)) & IODA3_MBT1_MASK);
964 }
965 
phb4_wait_bit(struct phb4 * p,uint32_t reg,uint64_t mask,uint64_t want_val)966 static int64_t phb4_wait_bit(struct phb4 *p, uint32_t reg,
967 			     uint64_t mask, uint64_t want_val)
968 {
969 	uint64_t val;
970 
971 	/* Wait for all pending TCE kills to complete
972 	 *
973 	 * XXX Add timeout...
974 	 */
975 	/* XXX SIMICS is nasty... */
976 	if ((reg == PHB_TCE_KILL || reg == PHB_DMARD_SYNC) &&
977 	    chip_quirk(QUIRK_SIMICS))
978 		return OPAL_SUCCESS;
979 
980 	for (;;) {
981 		val = in_be64(p->regs + reg);
982 		if (val == 0xffffffffffffffffull) {
983 			/* XXX Fenced ? */
984 			return OPAL_HARDWARE;
985 		}
986 		if ((val & mask) == want_val)
987 			break;
988 
989 	}
990 	return OPAL_SUCCESS;
991 }
992 
phb4_tce_kill(struct phb * phb,uint32_t kill_type,uint64_t pe_number,uint32_t tce_size,uint64_t dma_addr,uint32_t npages)993 static int64_t phb4_tce_kill(struct phb *phb, uint32_t kill_type,
994 			     uint64_t pe_number, uint32_t tce_size,
995 			     uint64_t dma_addr, uint32_t npages)
996 {
997 	struct phb4 *p = phb_to_phb4(phb);
998 	uint64_t val;
999 	int64_t rc;
1000 
1001 	sync();
1002 	switch(kill_type) {
1003 	case OPAL_PCI_TCE_KILL_PAGES:
1004 		while (npages--) {
1005 			/* Wait for a slot in the HW kill queue */
1006 			rc = phb4_wait_bit(p, PHB_TCE_KILL,
1007 					   PHB_TCE_KILL_ALL |
1008 					   PHB_TCE_KILL_PE |
1009 					   PHB_TCE_KILL_ONE, 0);
1010 			if (rc)
1011 				return rc;
1012 			val = SETFIELD(PHB_TCE_KILL_PENUM, dma_addr, pe_number);
1013 
1014 			/* Set appropriate page size */
1015 			switch(tce_size) {
1016 			case 0x1000:
1017 				if (dma_addr & 0xf000000000000fffull)
1018 					return OPAL_PARAMETER;
1019 				break;
1020 			case 0x10000:
1021 				if (dma_addr & 0xf00000000000ffffull)
1022 					return OPAL_PARAMETER;
1023 				val |= PHB_TCE_KILL_PSEL | PHB_TCE_KILL_64K;
1024 				break;
1025 			case 0x200000:
1026 				if (dma_addr & 0xf0000000001fffffull)
1027 					return OPAL_PARAMETER;
1028 				val |= PHB_TCE_KILL_PSEL | PHB_TCE_KILL_2M;
1029 				break;
1030 			case 0x40000000:
1031 				if (dma_addr & 0xf00000003fffffffull)
1032 					return OPAL_PARAMETER;
1033 				val |= PHB_TCE_KILL_PSEL | PHB_TCE_KILL_1G;
1034 				break;
1035 			default:
1036 				return OPAL_PARAMETER;
1037 			}
1038 			/* Perform kill */
1039 			out_be64(p->regs + PHB_TCE_KILL, PHB_TCE_KILL_ONE | val);
1040 			/* Next page */
1041 			dma_addr += tce_size;
1042 		}
1043 		break;
1044 	case OPAL_PCI_TCE_KILL_PE:
1045 		/* Wait for a slot in the HW kill queue */
1046 		rc = phb4_wait_bit(p, PHB_TCE_KILL,
1047 				   PHB_TCE_KILL_ALL |
1048 				   PHB_TCE_KILL_PE |
1049 				   PHB_TCE_KILL_ONE, 0);
1050 		if (rc)
1051 			return rc;
1052 		/* Perform kill */
1053 		out_be64(p->regs + PHB_TCE_KILL, PHB_TCE_KILL_PE |
1054 			 SETFIELD(PHB_TCE_KILL_PENUM, 0ull, pe_number));
1055 		break;
1056 	case OPAL_PCI_TCE_KILL_ALL:
1057 		/* Wait for a slot in the HW kill queue */
1058 		rc = phb4_wait_bit(p, PHB_TCE_KILL,
1059 				   PHB_TCE_KILL_ALL |
1060 				   PHB_TCE_KILL_PE |
1061 				   PHB_TCE_KILL_ONE, 0);
1062 		if (rc)
1063 			return rc;
1064 		/* Perform kill */
1065 		out_be64(p->regs + PHB_TCE_KILL, PHB_TCE_KILL_ALL);
1066 		break;
1067 	default:
1068 		return OPAL_PARAMETER;
1069 	}
1070 
1071 	/* Start DMA sync process */
1072 	out_be64(p->regs + PHB_DMARD_SYNC, PHB_DMARD_SYNC_START);
1073 
1074 	/* Wait for kill to complete */
1075 	rc = phb4_wait_bit(p, PHB_Q_DMA_R, PHB_Q_DMA_R_TCE_KILL_STATUS, 0);
1076 	if (rc)
1077 		return rc;
1078 
1079 	/* Wait for DMA sync to complete */
1080 	return phb4_wait_bit(p, PHB_DMARD_SYNC,
1081 			     PHB_DMARD_SYNC_COMPLETE,
1082 			     PHB_DMARD_SYNC_COMPLETE);
1083 }
1084 
1085 /* phb4_ioda_reset - Reset the IODA tables
1086  *
1087  * @purge: If true, the cache is cleared and the cleared values
1088  *         are applied to HW. If false, the cached values are
1089  *         applied to HW
1090  *
1091  * This reset the IODA tables in the PHB. It is called at
1092  * initialization time, on PHB reset, and can be called
1093  * explicitly from OPAL
1094  */
phb4_ioda_reset(struct phb * phb,bool purge)1095 static int64_t phb4_ioda_reset(struct phb *phb, bool purge)
1096 {
1097 	struct phb4 *p = phb_to_phb4(phb);
1098 	uint32_t i;
1099 	uint64_t val;
1100 
1101 	if (purge) {
1102 		PHBDBG(p, "Purging all IODA tables...\n");
1103 		if (phb->slot)
1104 			phb->slot->link_retries = PHB4_LINK_LINK_RETRIES;
1105 		phb4_init_ioda_cache(p);
1106 	}
1107 
1108 	/* Init_30..31 - Errata workaround, clear PESTA entry 0 */
1109 	phb4_ioda_sel(p, IODA3_TBL_PESTA, 0, false);
1110 	out_be64(p->regs + PHB_IODA_DATA0, 0);
1111 
1112 	/* Init_32..33 - MIST  */
1113 	phb4_ioda_sel(p, IODA3_TBL_MIST, 0, true);
1114 	val = in_be64(p->regs + PHB_IODA_ADDR);
1115 	val = SETFIELD(PHB_IODA_AD_MIST_PWV, val, 0xf);
1116 	out_be64(p->regs + PHB_IODA_ADDR, val);
1117 	for (i = 0; i < (p->num_irqs/4); i++)
1118 		out_be64(p->regs + PHB_IODA_DATA0, p->mist_cache[i]);
1119 
1120 	/* Init_34..35 - MRT */
1121 	phb4_ioda_sel(p, IODA3_TBL_MRT, 0, true);
1122 	for (i = 0; i < p->mrt_size; i++)
1123 		out_be64(p->regs + PHB_IODA_DATA0, 0);
1124 
1125 	/* Init_36..37 - TVT */
1126 	phb4_ioda_sel(p, IODA3_TBL_TVT, 0, true);
1127 	for (i = 0; i < p->tvt_size; i++)
1128 		out_be64(p->regs + PHB_IODA_DATA0, p->tve_cache[i]);
1129 
1130 	/* Init_38..39 - MBT */
1131 	phb4_ioda_sel(p, IODA3_TBL_MBT, 0, true);
1132 	for (i = 0; i < p->mbt_size; i++) {
1133 		out_be64(p->regs + PHB_IODA_DATA0, p->mbt_cache[i][0]);
1134 		out_be64(p->regs + PHB_IODA_DATA0, p->mbt_cache[i][1]);
1135 	}
1136 
1137 	/* Init_40..41 - MDT */
1138 	phb4_ioda_sel(p, IODA3_TBL_MDT, 0, true);
1139 	for (i = 0; i < p->max_num_pes; i++)
1140 		out_be64(p->regs + PHB_IODA_DATA0, p->mdt_cache[i]);
1141 
1142 	/* Additional OPAL specific inits */
1143 
1144 	/* Clear PEST & PEEV */
1145 	for (i = 0; i < p->max_num_pes; i++) {
1146 		phb4_ioda_sel(p, IODA3_TBL_PESTA, i, false);
1147 		out_be64(p->regs + PHB_IODA_DATA0, 0);
1148 		phb4_ioda_sel(p, IODA3_TBL_PESTB, i, false);
1149 		out_be64(p->regs + PHB_IODA_DATA0, 0);
1150 	}
1151 
1152 	phb4_ioda_sel(p, IODA3_TBL_PEEV, 0, true);
1153 	for (i = 0; i < p->max_num_pes/64; i++)
1154 		out_be64(p->regs + PHB_IODA_DATA0, 0);
1155 
1156 	/* Invalidate RTE, TCE cache */
1157 	out_be64(p->regs + PHB_RTC_INVALIDATE, PHB_RTC_INVALIDATE_ALL);
1158 
1159 	return phb4_tce_kill(&p->phb, OPAL_PCI_TCE_KILL_ALL, 0, 0, 0, 0);
1160 }
1161 
1162 /*
1163  * Clear anything we have in PAPR Error Injection registers. Though
1164  * the spec says the PAPR error injection should be one-shot without
1165  * the "sticky" bit. However, that's false according to the experiments
1166  * I had. So we have to clear it at appropriate point in kernel to
1167  * avoid endless frozen PE.
1168  */
phb4_papr_errinjct_reset(struct phb * phb)1169 static int64_t phb4_papr_errinjct_reset(struct phb *phb)
1170 {
1171 	struct phb4 *p = phb_to_phb4(phb);
1172 
1173 	out_be64(p->regs + PHB_PAPR_ERR_INJ_CTL, 0x0ul);
1174 	out_be64(p->regs + PHB_PAPR_ERR_INJ_ADDR, 0x0ul);
1175 	out_be64(p->regs + PHB_PAPR_ERR_INJ_MASK, 0x0ul);
1176 
1177 	return OPAL_SUCCESS;
1178 }
1179 
phb4_set_phb_mem_window(struct phb * phb,uint16_t window_type,uint16_t window_num,uint64_t addr,uint64_t pci_addr __unused,uint64_t size)1180 static int64_t phb4_set_phb_mem_window(struct phb *phb,
1181 				       uint16_t window_type,
1182 				       uint16_t window_num,
1183 				       uint64_t addr,
1184 				       uint64_t pci_addr __unused,
1185 				       uint64_t size)
1186 {
1187 	struct phb4 *p = phb_to_phb4(phb);
1188 	uint64_t mbt0, mbt1;
1189 
1190 	/*
1191 	 * We have a unified MBT for all BARs on PHB4.
1192 	 *
1193 	 * So we use it as follow:
1194 	 *
1195 	 *  - M32 is hard wired to be MBT[0] and uses MDT column 0
1196 	 *    for remapping.
1197 	 *
1198 	 *  - MBT[1..n] are available to the OS, currently only as
1199 	 *    fully segmented or single PE (we don't yet expose the
1200 	 *    new segmentation modes).
1201 	 *
1202 	 *  - We configure the 3 last BARs to columnt 1..3 initially
1203 	 *    set to segment# == PE#. We will need to provide some
1204 	 *    extensions to the existing APIs to enable remapping of
1205 	 *    segments on those BARs (and only those) as the current
1206 	 *    API forces single segment mode.
1207 	 */
1208 	switch (window_type) {
1209 	case OPAL_IO_WINDOW_TYPE:
1210 	case OPAL_M32_WINDOW_TYPE:
1211 		return OPAL_UNSUPPORTED;
1212 	case OPAL_M64_WINDOW_TYPE:
1213 		if (window_num == 0 || window_num >= p->mbt_size) {
1214 			PHBERR(p, "%s: Invalid window %d\n",
1215 			       __func__, window_num);
1216 			return OPAL_PARAMETER;
1217 		}
1218 
1219 		mbt0 = p->mbt_cache[window_num][0];
1220 		mbt1 = p->mbt_cache[window_num][1];
1221 
1222 		/* XXX For now we assume the 4K minimum alignment,
1223 		 * todo: check with the HW folks what the exact limits
1224 		 * are based on the segmentation model.
1225 		 */
1226 		if ((addr & 0xFFFul) || (size & 0xFFFul)) {
1227 			PHBERR(p, "%s: Bad addr/size alignment %llx/%llx\n",
1228 			       __func__, addr, size);
1229 			return OPAL_PARAMETER;
1230 		}
1231 
1232 		/* size should be 2^N */
1233 		if (!size || size & (size-1)) {
1234 			PHBERR(p, "%s: size not a power of 2: %llx\n",
1235 			       __func__,  size);
1236 			return OPAL_PARAMETER;
1237 		}
1238 
1239 		/* address should be size aligned */
1240 		if (addr & (size - 1)) {
1241 			PHBERR(p, "%s: addr not size aligned %llx/%llx\n",
1242 			       __func__, addr, size);
1243 			return OPAL_PARAMETER;
1244 		}
1245 
1246 		break;
1247 	default:
1248 		return OPAL_PARAMETER;
1249 	}
1250 
1251 	/* The BAR shouldn't be enabled yet */
1252 	if (mbt0 & IODA3_MBT0_ENABLE)
1253 		return OPAL_PARTIAL;
1254 
1255 	/* Apply the settings */
1256 	mbt0 = SETFIELD(IODA3_MBT0_BASE_ADDR, mbt0, addr >> 12);
1257 	mbt1 = SETFIELD(IODA3_MBT1_MASK, mbt1, ~((size >> 12) -1));
1258 	p->mbt_cache[window_num][0] = mbt0;
1259 	p->mbt_cache[window_num][1] = mbt1;
1260 
1261 	return OPAL_SUCCESS;
1262 }
1263 
1264 /*
1265  * For one specific M64 BAR, it can be shared by all PEs,
1266  * or owned by single PE exclusively.
1267  */
phb4_phb_mmio_enable(struct phb __unused * phb,uint16_t window_type,uint16_t window_num,uint16_t enable)1268 static int64_t phb4_phb_mmio_enable(struct phb __unused *phb,
1269 				    uint16_t window_type,
1270 				    uint16_t window_num,
1271 				    uint16_t enable)
1272 {
1273 	struct phb4 *p = phb_to_phb4(phb);
1274 	uint64_t mbt0, mbt1, base, mask;
1275 
1276 	/*
1277 	 * By design, PHB4 doesn't support IODT any more.
1278 	 * Besides, we can't enable M32 BAR as well. So
1279 	 * the function is used to do M64 mapping and each
1280 	 * BAR is supposed to be shared by all PEs.
1281 	 *
1282 	 * TODO: Add support for some of the new PHB4 split modes
1283 	 */
1284 	switch (window_type) {
1285 	case OPAL_IO_WINDOW_TYPE:
1286 	case OPAL_M32_WINDOW_TYPE:
1287 		return OPAL_UNSUPPORTED;
1288 	case OPAL_M64_WINDOW_TYPE:
1289 		/* Window 0 is reserved for M32 */
1290 		if (window_num == 0 || window_num >= p->mbt_size ||
1291 		    enable > OPAL_ENABLE_M64_NON_SPLIT) {
1292 			PHBDBG(p,
1293 			       "phb4_phb_mmio_enable wrong args (window %d enable %d)\n",
1294 			       window_num, enable);
1295 			return OPAL_PARAMETER;
1296 		}
1297 		break;
1298 	default:
1299 		return OPAL_PARAMETER;
1300 	}
1301 
1302 	/*
1303 	 * We need check the base/mask while enabling
1304 	 * the M64 BAR. Otherwise, invalid base/mask
1305 	 * might cause fenced AIB unintentionally
1306 	 */
1307 	mbt0 = p->mbt_cache[window_num][0];
1308 	mbt1 = p->mbt_cache[window_num][1];
1309 
1310 	if (enable == OPAL_DISABLE_M64) {
1311 		/* Reset the window to disabled & default mode */
1312 		mbt0 = phb4_default_mbt0(p, window_num);
1313 		mbt1 = 0;
1314 	} else {
1315 		/* Verify that the mode is valid and consistent */
1316 		if (enable == OPAL_ENABLE_M64_SPLIT) {
1317 			uint64_t mode = GETFIELD(IODA3_MBT0_MODE, mbt0);
1318 			if (mode != IODA3_MBT0_MODE_PE_SEG &&
1319 			    mode != IODA3_MBT0_MODE_MDT)
1320 				return OPAL_PARAMETER;
1321 		} else if (enable == OPAL_ENABLE_M64_NON_SPLIT) {
1322 			if (GETFIELD(IODA3_MBT0_MODE, mbt0) !=
1323 			    IODA3_MBT0_MODE_SINGLE_PE)
1324 				return OPAL_PARAMETER;
1325 		} else
1326 			return OPAL_PARAMETER;
1327 
1328 		base = GETFIELD(IODA3_MBT0_BASE_ADDR, mbt0);
1329 		base = (base << 12);
1330 		mask = GETFIELD(IODA3_MBT1_MASK, mbt1);
1331 		if (base < p->mm0_base || !mask)
1332 			return OPAL_PARTIAL;
1333 
1334 		mbt0 |= IODA3_MBT0_ENABLE;
1335 		mbt1 |= IODA3_MBT1_ENABLE;
1336 	}
1337 
1338 	/* Update HW and cache */
1339 	p->mbt_cache[window_num][0] = mbt0;
1340 	p->mbt_cache[window_num][1] = mbt1;
1341 	phb4_ioda_sel(p, IODA3_TBL_MBT, window_num << 1, true);
1342 	out_be64(p->regs + PHB_IODA_DATA0, mbt0);
1343 	out_be64(p->regs + PHB_IODA_DATA0, mbt1);
1344 
1345 	return OPAL_SUCCESS;
1346 }
1347 
phb4_map_pe_mmio_window(struct phb * phb,uint64_t pe_number,uint16_t window_type,uint16_t window_num,uint16_t segment_num)1348 static int64_t phb4_map_pe_mmio_window(struct phb *phb,
1349 				       uint64_t pe_number,
1350 				       uint16_t window_type,
1351 				       uint16_t window_num,
1352 				       uint16_t segment_num)
1353 {
1354 	struct phb4 *p = phb_to_phb4(phb);
1355 	uint64_t mbt0, mbt1, mdt0;
1356 
1357 	if (pe_number >= p->num_pes)
1358 		return OPAL_PARAMETER;
1359 
1360 	/*
1361 	 * We support a combined MDT that has 4 columns. We let the OS
1362 	 * use kernel 0 for M32.
1363 	 *
1364 	 * We configure the 3 last BARs to map column 3..1 which by default
1365 	 * are set to map segment# == pe#, but can be remapped here if we
1366 	 * extend this function.
1367 	 *
1368 	 * The problem is that the current API was "hijacked" so that an
1369 	 * attempt at remapping any segment of an M64 has the effect of
1370 	 * turning it into a single-PE mode BAR. So if we want to support
1371 	 * remapping we'll have to play around this for example by creating
1372 	 * a new API or a new window type...
1373 	 */
1374 	switch(window_type) {
1375 	case OPAL_IO_WINDOW_TYPE:
1376 		return OPAL_UNSUPPORTED;
1377 	case OPAL_M32_WINDOW_TYPE:
1378 		if (window_num != 0 || segment_num >= p->num_pes)
1379 			return OPAL_PARAMETER;
1380 
1381 		mdt0 = p->mdt_cache[segment_num];
1382 		mdt0 = SETFIELD(IODA3_MDT_PE_A, mdt0, pe_number);
1383 		phb4_ioda_sel(p, IODA3_TBL_MDT, segment_num, false);
1384 		out_be64(p->regs + PHB_IODA_DATA0, mdt0);
1385 		break;
1386 	case OPAL_M64_WINDOW_TYPE:
1387 		if (window_num == 0 || window_num >= p->mbt_size)
1388 			return OPAL_PARAMETER;
1389 
1390 		mbt0 = p->mbt_cache[window_num][0];
1391 		mbt1 = p->mbt_cache[window_num][1];
1392 
1393 		/* The BAR shouldn't be enabled yet */
1394 		if (mbt0 & IODA3_MBT0_ENABLE)
1395 			return OPAL_PARTIAL;
1396 
1397 		/* Set to single PE mode and configure the PE */
1398 		mbt0 = SETFIELD(IODA3_MBT0_MODE, mbt0,
1399 				IODA3_MBT0_MODE_SINGLE_PE);
1400 		mbt1 = SETFIELD(IODA3_MBT1_SINGLE_PE_NUM, mbt1, pe_number);
1401 		p->mbt_cache[window_num][0] = mbt0;
1402 		p->mbt_cache[window_num][1] = mbt1;
1403 		break;
1404 	default:
1405 		return OPAL_PARAMETER;
1406 	}
1407 
1408 	return OPAL_SUCCESS;
1409 }
1410 
phb4_map_pe_dma_window(struct phb * phb,uint64_t pe_number,uint16_t window_id,uint16_t tce_levels,uint64_t tce_table_addr,uint64_t tce_table_size,uint64_t tce_page_size)1411 static int64_t phb4_map_pe_dma_window(struct phb *phb,
1412 				      uint64_t pe_number,
1413 				      uint16_t window_id,
1414 				      uint16_t tce_levels,
1415 				      uint64_t tce_table_addr,
1416 				      uint64_t tce_table_size,
1417 				      uint64_t tce_page_size)
1418 {
1419 	struct phb4 *p = phb_to_phb4(phb);
1420 	uint64_t tts_encoded;
1421 	uint64_t data64 = 0;
1422 
1423 	/*
1424 	 * We configure the PHB in 2 TVE per PE mode to match phb3.
1425 	 * Current Linux implementation *requires* the two windows per
1426 	 * PE.
1427 	 *
1428 	 * Note: On DD2.0 this is the normal mode of operation.
1429 	 */
1430 
1431 	/*
1432 	 * Sanity check. We currently only support "2 window per PE" mode
1433 	 * ie, only bit 59 of the PCI address is used to select the window
1434 	 */
1435 	if (pe_number >= p->num_pes || (window_id >> 1) != pe_number)
1436 		return OPAL_PARAMETER;
1437 
1438 	/*
1439 	 * tce_table_size == 0 is used to disable an entry, in this case
1440 	 * we ignore other arguments
1441 	 */
1442 	if (tce_table_size == 0) {
1443 		phb4_ioda_sel(p, IODA3_TBL_TVT, window_id, false);
1444 		out_be64(p->regs + PHB_IODA_DATA0, 0);
1445 		p->tve_cache[window_id] = 0;
1446 		return OPAL_SUCCESS;
1447 	}
1448 
1449 	/* Additional arguments validation */
1450 	if (tce_levels < 1 || tce_levels > 5 ||
1451 	    !is_pow2(tce_table_size) ||
1452 	    tce_table_size < 0x1000)
1453 		return OPAL_PARAMETER;
1454 
1455 	/* Encode TCE table size */
1456 	data64 = SETFIELD(IODA3_TVT_TABLE_ADDR, 0ul, tce_table_addr >> 12);
1457 	tts_encoded = ilog2(tce_table_size) - 11;
1458 	if (tts_encoded > 31)
1459 		return OPAL_PARAMETER;
1460 	data64 = SETFIELD(IODA3_TVT_TCE_TABLE_SIZE, data64, tts_encoded);
1461 
1462 	/* Encode TCE page size */
1463 	switch (tce_page_size) {
1464 	case 0x1000:	/* 4K */
1465 		data64 = SETFIELD(IODA3_TVT_IO_PSIZE, data64, 1);
1466 		break;
1467 	case 0x10000:	/* 64K */
1468 		data64 = SETFIELD(IODA3_TVT_IO_PSIZE, data64, 5);
1469 		break;
1470 	case 0x200000:	/* 2M */
1471 		data64 = SETFIELD(IODA3_TVT_IO_PSIZE, data64, 10);
1472 		break;
1473 	case 0x40000000: /* 1G */
1474 		data64 = SETFIELD(IODA3_TVT_IO_PSIZE, data64, 19);
1475 		break;
1476 	default:
1477 		return OPAL_PARAMETER;
1478 	}
1479 
1480 	/* Encode number of levels */
1481 	data64 = SETFIELD(IODA3_TVT_NUM_LEVELS, data64, tce_levels - 1);
1482 
1483 	phb4_ioda_sel(p, IODA3_TBL_TVT, window_id, false);
1484 	out_be64(p->regs + PHB_IODA_DATA0, data64);
1485 	p->tve_cache[window_id] = data64;
1486 
1487 	return OPAL_SUCCESS;
1488 }
1489 
phb4_map_pe_dma_window_real(struct phb * phb,uint64_t pe_number,uint16_t window_id,uint64_t pci_start_addr,uint64_t pci_mem_size)1490 static int64_t phb4_map_pe_dma_window_real(struct phb *phb,
1491 					   uint64_t pe_number,
1492 					   uint16_t window_id,
1493 					   uint64_t pci_start_addr,
1494 					   uint64_t pci_mem_size)
1495 {
1496 	struct phb4 *p = phb_to_phb4(phb);
1497 	uint64_t end = pci_start_addr + pci_mem_size;
1498 	uint64_t tve;
1499 
1500 	if (pe_number >= p->num_pes ||
1501 	    (window_id >> 1) != pe_number)
1502 		return OPAL_PARAMETER;
1503 
1504 	if (pci_mem_size) {
1505 		/* Enable */
1506 
1507 		/*
1508 		 * Check that the start address has the right TVE index,
1509 		 * we only support the 1 bit mode where each PE has 2
1510 		 * TVEs
1511 		 */
1512 		if ((pci_start_addr >> 59) != (window_id & 1))
1513 			return OPAL_PARAMETER;
1514 		pci_start_addr &= ((1ull << 59) - 1);
1515 		end = pci_start_addr + pci_mem_size;
1516 
1517 		/* We have to be 16M aligned */
1518 		if ((pci_start_addr & 0x00ffffff) ||
1519 		    (pci_mem_size & 0x00ffffff))
1520 			return OPAL_PARAMETER;
1521 
1522 		/*
1523 		 * It *looks* like this is the max we can support (we need
1524 		 * to verify this. Also we are not checking for rollover,
1525 		 * but then we aren't trying too hard to protect ourselves
1526 		 * againt a completely broken OS.
1527 		 */
1528 		if (end > 0x0003ffffffffffffull)
1529 			return OPAL_PARAMETER;
1530 
1531 		/*
1532 		 * Put start address bits 49:24 into TVE[52:53]||[0:23]
1533 		 * and end address bits 49:24 into TVE[54:55]||[24:47]
1534 		 * and set TVE[51]
1535 		 */
1536 		tve  = (pci_start_addr << 16) & (0xffffffull << 40);
1537 		tve |= (pci_start_addr >> 38) & (3ull << 10);
1538 		tve |= (end >>  8) & (0xfffffful << 16);
1539 		tve |= (end >> 40) & (3ull << 8);
1540 		tve |= PPC_BIT(51) | IODA3_TVT_NON_TRANSLATE_50;
1541 	} else {
1542 		/* Disable */
1543 		tve = 0;
1544 	}
1545 
1546 	phb4_ioda_sel(p, IODA3_TBL_TVT, window_id, false);
1547 	out_be64(p->regs + PHB_IODA_DATA0, tve);
1548 	p->tve_cache[window_id] = tve;
1549 
1550 	return OPAL_SUCCESS;
1551 }
1552 
phb4_set_ive_pe(struct phb * phb,uint64_t pe_number,uint32_t ive_num)1553 static int64_t phb4_set_ive_pe(struct phb *phb,
1554 			       uint64_t pe_number,
1555 			       uint32_t ive_num)
1556 {
1557 	struct phb4 *p = phb_to_phb4(phb);
1558 	uint32_t mist_idx;
1559 	uint32_t mist_quad;
1560 	uint32_t mist_shift;
1561 	uint64_t val;
1562 
1563 	if (pe_number >= p->num_pes || ive_num >= (p->num_irqs - 8))
1564 		return OPAL_PARAMETER;
1565 
1566 	mist_idx = ive_num >> 2;
1567 	mist_quad = ive_num & 3;
1568 	mist_shift = (3 - mist_quad) << 4;
1569 	p->mist_cache[mist_idx] &= ~(0x0fffull << mist_shift);
1570 	p->mist_cache[mist_idx] |=  ((uint64_t)pe_number) << mist_shift;
1571 
1572 	/* Note: This has the side effect of clearing P/Q, so this
1573 	 * shouldn't be called while the interrupt is "hot"
1574 	 */
1575 
1576 	phb4_ioda_sel(p, IODA3_TBL_MIST, mist_idx, false);
1577 
1578 	/* We need to inject the appropriate MIST write enable bit
1579 	 * in the IODA table address register
1580 	 */
1581 	val = in_be64(p->regs + PHB_IODA_ADDR);
1582 	val = SETFIELD(PHB_IODA_AD_MIST_PWV, val, 8 >> mist_quad);
1583 	out_be64(p->regs + PHB_IODA_ADDR, val);
1584 
1585 	/* Write entry */
1586 	out_be64(p->regs + PHB_IODA_DATA0, p->mist_cache[mist_idx]);
1587 
1588 	return OPAL_SUCCESS;
1589 }
1590 
phb4_get_msi_32(struct phb * phb,uint64_t pe_number,uint32_t ive_num,uint8_t msi_range,uint32_t * msi_address,uint32_t * message_data)1591 static int64_t phb4_get_msi_32(struct phb *phb,
1592 			       uint64_t pe_number,
1593 			       uint32_t ive_num,
1594 			       uint8_t msi_range,
1595 			       uint32_t *msi_address,
1596 			       uint32_t *message_data)
1597 {
1598 	struct phb4 *p = phb_to_phb4(phb);
1599 
1600 	/*
1601 	 * Sanity check. We needn't check on mve_number (PE#)
1602 	 * on PHB3 since the interrupt source is purely determined
1603 	 * by its DMA address and data, but the check isn't
1604 	 * harmful.
1605 	 */
1606 	if (pe_number >= p->num_pes ||
1607 	    ive_num >= (p->num_irqs - 8) ||
1608 	    msi_range != 1 || !msi_address|| !message_data)
1609 		return OPAL_PARAMETER;
1610 
1611 	/*
1612 	 * DMA address and data will form the IVE index.
1613 	 * For more details, please refer to IODA2 spec.
1614 	 */
1615 	*msi_address = 0xFFFF0000 | ((ive_num << 4) & 0xFFFFFE0F);
1616 	*message_data = ive_num & 0x1F;
1617 
1618 	return OPAL_SUCCESS;
1619 }
1620 
phb4_get_msi_64(struct phb * phb,uint64_t pe_number,uint32_t ive_num,uint8_t msi_range,uint64_t * msi_address,uint32_t * message_data)1621 static int64_t phb4_get_msi_64(struct phb *phb,
1622 			       uint64_t pe_number,
1623 			       uint32_t ive_num,
1624 			       uint8_t msi_range,
1625 			       uint64_t *msi_address,
1626 			       uint32_t *message_data)
1627 {
1628 	struct phb4 *p = phb_to_phb4(phb);
1629 
1630 	/* Sanity check */
1631 	if (pe_number >= p->num_pes ||
1632 	    ive_num >= (p->num_irqs - 8) ||
1633 	    msi_range != 1 || !msi_address || !message_data)
1634 		return OPAL_PARAMETER;
1635 
1636 	/*
1637 	 * DMA address and data will form the IVE index.
1638 	 * For more details, please refer to IODA2 spec.
1639 	 */
1640 	*msi_address = (0x1ul << 60) | ((ive_num << 4) & 0xFFFFFFFFFFFFFE0Ful);
1641 	*message_data = ive_num & 0x1F;
1642 
1643 	return OPAL_SUCCESS;
1644 }
1645 
phb4_rc_err_clear(struct phb4 * p)1646 static void phb4_rc_err_clear(struct phb4 *p)
1647 {
1648 	/* Init_47 - Clear errors */
1649 	phb4_pcicfg_write16(&p->phb, 0, PCI_CFG_SECONDARY_STATUS, 0xffff);
1650 
1651 	if (p->ecap <= 0)
1652 		return;
1653 
1654 	phb4_pcicfg_write16(&p->phb, 0, p->ecap + PCICAP_EXP_DEVSTAT,
1655 			     PCICAP_EXP_DEVSTAT_CE	|
1656 			     PCICAP_EXP_DEVSTAT_NFE	|
1657 			     PCICAP_EXP_DEVSTAT_FE	|
1658 			     PCICAP_EXP_DEVSTAT_UE);
1659 
1660 	if (p->aercap <= 0)
1661 		return;
1662 
1663 	/* Clear all UE status */
1664 	phb4_pcicfg_write32(&p->phb, 0, p->aercap + PCIECAP_AER_UE_STATUS,
1665 			     0xffffffff);
1666 	/* Clear all CE status */
1667 	phb4_pcicfg_write32(&p->phb, 0, p->aercap + PCIECAP_AER_CE_STATUS,
1668 			     0xffffffff);
1669 	/* Clear root error status */
1670 	phb4_pcicfg_write32(&p->phb, 0, p->aercap + PCIECAP_AER_RERR_STA,
1671 			     0xffffffff);
1672 }
1673 
phb4_err_clear_regb(struct phb4 * p)1674 static void phb4_err_clear_regb(struct phb4 *p)
1675 {
1676 	uint64_t val64;
1677 
1678 	val64 = phb4_read_reg(p, PHB_REGB_ERR_STATUS);
1679 	phb4_write_reg(p, PHB_REGB_ERR_STATUS, val64);
1680 	phb4_write_reg(p, PHB_REGB_ERR1_STATUS, 0x0ul);
1681 	phb4_write_reg(p, PHB_REGB_ERR_LOG_0, 0x0ul);
1682 	phb4_write_reg(p, PHB_REGB_ERR_LOG_1, 0x0ul);
1683 }
1684 
1685 /*
1686  * The function can be called during error recovery for all classes of
1687  * errors.  This is new to PHB4; previous revisions had separate
1688  * sequences for INF/ER/Fatal errors.
1689  *
1690  * "Rec #" in this function refer to "Recov_#" steps in the
1691  * PHB4 INF recovery sequence.
1692  */
phb4_err_clear(struct phb4 * p)1693 static void phb4_err_clear(struct phb4 *p)
1694 {
1695 	uint64_t val64;
1696 	uint64_t fir = phb4_read_reg(p, PHB_LEM_FIR_ACCUM);
1697 
1698 	/* Rec 1: Acquire the PCI config lock (we don't need to do this) */
1699 
1700 	/* Rec 2...15: Clear error status in RC config space */
1701 	phb4_rc_err_clear(p);
1702 
1703 	/* Rec 16...23: Clear PBL errors */
1704 	val64 = phb4_read_reg(p, PHB_PBL_ERR_STATUS);
1705 	phb4_write_reg(p, PHB_PBL_ERR_STATUS, val64);
1706 	phb4_write_reg(p, PHB_PBL_ERR1_STATUS, 0x0ul);
1707 	phb4_write_reg(p, PHB_PBL_ERR_LOG_0, 0x0ul);
1708 	phb4_write_reg(p, PHB_PBL_ERR_LOG_1, 0x0ul);
1709 
1710 	/* Rec 24...31: Clear REGB errors */
1711 	phb4_err_clear_regb(p);
1712 
1713 	/* Rec 32...59: Clear PHB error trap */
1714 	val64 = phb4_read_reg(p, PHB_TXE_ERR_STATUS);
1715 	phb4_write_reg(p, PHB_TXE_ERR_STATUS, val64);
1716 	phb4_write_reg(p, PHB_TXE_ERR1_STATUS, 0x0ul);
1717 	phb4_write_reg(p, PHB_TXE_ERR_LOG_0, 0x0ul);
1718 	phb4_write_reg(p, PHB_TXE_ERR_LOG_1, 0x0ul);
1719 
1720 	val64 = phb4_read_reg(p, PHB_RXE_ARB_ERR_STATUS);
1721 	phb4_write_reg(p, PHB_RXE_ARB_ERR_STATUS, val64);
1722 	phb4_write_reg(p, PHB_RXE_ARB_ERR1_STATUS, 0x0ul);
1723 	phb4_write_reg(p, PHB_RXE_ARB_ERR_LOG_0, 0x0ul);
1724 	phb4_write_reg(p, PHB_RXE_ARB_ERR_LOG_1, 0x0ul);
1725 
1726 	val64 = phb4_read_reg(p, PHB_RXE_MRG_ERR_STATUS);
1727 	phb4_write_reg(p, PHB_RXE_MRG_ERR_STATUS, val64);
1728 	phb4_write_reg(p, PHB_RXE_MRG_ERR1_STATUS, 0x0ul);
1729 	phb4_write_reg(p, PHB_RXE_MRG_ERR_LOG_0, 0x0ul);
1730 	phb4_write_reg(p, PHB_RXE_MRG_ERR_LOG_1, 0x0ul);
1731 
1732 	val64 = phb4_read_reg(p, PHB_RXE_TCE_ERR_STATUS);
1733 	phb4_write_reg(p, PHB_RXE_TCE_ERR_STATUS, val64);
1734 	phb4_write_reg(p, PHB_RXE_TCE_ERR1_STATUS, 0x0ul);
1735 	phb4_write_reg(p, PHB_RXE_TCE_ERR_LOG_0, 0x0ul);
1736 	phb4_write_reg(p, PHB_RXE_TCE_ERR_LOG_1, 0x0ul);
1737 
1738 	val64 = phb4_read_reg(p, PHB_ERR_STATUS);
1739 	phb4_write_reg(p, PHB_ERR_STATUS, val64);
1740 	phb4_write_reg(p, PHB_ERR1_STATUS, 0x0ul);
1741 	phb4_write_reg(p, PHB_ERR_LOG_0, 0x0ul);
1742 	phb4_write_reg(p, PHB_ERR_LOG_1, 0x0ul);
1743 
1744 	/* Rec 61/62: Clear FIR/WOF */
1745 	phb4_write_reg(p, PHB_LEM_FIR_AND_MASK, ~fir);
1746 	phb4_write_reg(p, PHB_LEM_WOF, 0x0ul);
1747 
1748 	/* Rec 63: Update LEM mask to its initial value */
1749 	phb4_write_reg(p, PHB_LEM_ERROR_MASK, 0x0ul);
1750 
1751 	/* Rec 64: Clear the PCI config lock (we don't need to do this) */
1752 }
1753 
phb4_read_phb_status(struct phb4 * p,struct OpalIoPhb4ErrorData * stat)1754 static void phb4_read_phb_status(struct phb4 *p,
1755 				 struct OpalIoPhb4ErrorData *stat)
1756 {
1757 	uint16_t val = 0;
1758 	uint32_t i;
1759 	uint64_t *pPEST;
1760 
1761 	memset(stat, 0, sizeof(struct OpalIoPhb4ErrorData));
1762 
1763 	/* Error data common part */
1764 	stat->common.version = OPAL_PHB_ERROR_DATA_VERSION_1;
1765 	stat->common.ioType  = OPAL_PHB_ERROR_DATA_TYPE_PHB4;
1766 	stat->common.len     = sizeof(struct OpalIoPhb4ErrorData);
1767 
1768 	/* Use ASB for config space if the PHB is fenced */
1769 	if (p->flags & PHB4_AIB_FENCED)
1770 		p->flags |= PHB4_CFG_USE_ASB;
1771 
1772 	/* Grab RC bridge control, make it 32-bit */
1773 	phb4_pcicfg_read16(&p->phb, 0, PCI_CFG_BRCTL, &val);
1774 	stat->brdgCtl = val;
1775 
1776 	/*
1777 	 * Grab various RC PCIe capability registers. All device, slot
1778 	 * and link status are 16-bit, so we grab the pair control+status
1779 	 * for each of them
1780 	 */
1781 	phb4_pcicfg_read32(&p->phb, 0, p->ecap + PCICAP_EXP_DEVCTL,
1782 			   &stat->deviceStatus);
1783 	phb4_pcicfg_read32(&p->phb, 0, p->ecap + PCICAP_EXP_SLOTCTL,
1784 			   &stat->slotStatus);
1785 	phb4_pcicfg_read32(&p->phb, 0, p->ecap + PCICAP_EXP_LCTL,
1786 			   &stat->linkStatus);
1787 
1788 	 /*
1789 	 * I assume those are the standard config space header, cmd & status
1790 	 * together makes 32-bit. Secondary status is 16-bit so I'll clear
1791 	 * the top on that one
1792 	 */
1793 	phb4_pcicfg_read32(&p->phb, 0, PCI_CFG_CMD, &stat->devCmdStatus);
1794 	phb4_pcicfg_read16(&p->phb, 0, PCI_CFG_SECONDARY_STATUS, &val);
1795 	stat->devSecStatus = val;
1796 
1797 	/* Grab a bunch of AER regs */
1798 	phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_RERR_STA,
1799 			   &stat->rootErrorStatus);
1800 	phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_UE_STATUS,
1801 			   &stat->uncorrErrorStatus);
1802 	phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_CE_STATUS,
1803 			   &stat->corrErrorStatus);
1804 	phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_HDR_LOG0,
1805 			   &stat->tlpHdr1);
1806 	phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_HDR_LOG1,
1807 			   &stat->tlpHdr2);
1808 	phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_HDR_LOG2,
1809 			   &stat->tlpHdr3);
1810 	phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_HDR_LOG3,
1811 			   &stat->tlpHdr4);
1812 	phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_SRCID,
1813 			   &stat->sourceId);
1814 
1815 	/* PEC NFIR, same as P8/PHB3 */
1816 	xscom_read(p->chip_id, p->pe_stk_xscom + 0x0, &stat->nFir);
1817 	xscom_read(p->chip_id, p->pe_stk_xscom + 0x3, &stat->nFirMask);
1818 	xscom_read(p->chip_id, p->pe_stk_xscom + 0x8, &stat->nFirWOF);
1819 
1820 	/* PHB4 inbound and outbound error Regs */
1821 	stat->phbPlssr = phb4_read_reg_asb(p, PHB_CPU_LOADSTORE_STATUS);
1822 	stat->phbCsr = phb4_read_reg_asb(p, PHB_DMA_CHAN_STATUS);
1823 	stat->lemFir = phb4_read_reg_asb(p, PHB_LEM_FIR_ACCUM);
1824 	stat->lemErrorMask = phb4_read_reg_asb(p, PHB_LEM_ERROR_MASK);
1825 	stat->lemWOF = phb4_read_reg_asb(p, PHB_LEM_WOF);
1826 	stat->phbErrorStatus = phb4_read_reg_asb(p, PHB_ERR_STATUS);
1827 	stat->phbFirstErrorStatus = phb4_read_reg_asb(p, PHB_ERR1_STATUS);
1828 	stat->phbErrorLog0 = phb4_read_reg_asb(p, PHB_ERR_LOG_0);
1829 	stat->phbErrorLog1 = phb4_read_reg_asb(p, PHB_ERR_LOG_1);
1830 	stat->phbTxeErrorStatus = phb4_read_reg_asb(p, PHB_TXE_ERR_STATUS);
1831 	stat->phbTxeFirstErrorStatus = phb4_read_reg_asb(p, PHB_TXE_ERR1_STATUS);
1832 	stat->phbTxeErrorLog0 = phb4_read_reg_asb(p, PHB_TXE_ERR_LOG_0);
1833 	stat->phbTxeErrorLog1 = phb4_read_reg_asb(p, PHB_TXE_ERR_LOG_1);
1834 	stat->phbRxeArbErrorStatus = phb4_read_reg_asb(p, PHB_RXE_ARB_ERR_STATUS);
1835 	stat->phbRxeArbFirstErrorStatus = phb4_read_reg_asb(p, PHB_RXE_ARB_ERR1_STATUS);
1836 	stat->phbRxeArbErrorLog0 = phb4_read_reg_asb(p, PHB_RXE_ARB_ERR_LOG_0);
1837 	stat->phbRxeArbErrorLog1 = phb4_read_reg_asb(p, PHB_RXE_ARB_ERR_LOG_1);
1838 	stat->phbRxeMrgErrorStatus = phb4_read_reg_asb(p, PHB_RXE_MRG_ERR_STATUS);
1839 	stat->phbRxeMrgFirstErrorStatus = phb4_read_reg_asb(p, PHB_RXE_MRG_ERR1_STATUS);
1840 	stat->phbRxeMrgErrorLog0 = phb4_read_reg_asb(p, PHB_RXE_MRG_ERR_LOG_0);
1841 	stat->phbRxeMrgErrorLog1 = phb4_read_reg_asb(p, PHB_RXE_MRG_ERR_LOG_1);
1842 	stat->phbRxeTceErrorStatus = phb4_read_reg_asb(p, PHB_RXE_TCE_ERR_STATUS);
1843 	stat->phbRxeTceFirstErrorStatus = phb4_read_reg_asb(p, PHB_RXE_TCE_ERR1_STATUS);
1844 	stat->phbRxeTceErrorLog0 = phb4_read_reg_asb(p, PHB_RXE_TCE_ERR_LOG_0);
1845 	stat->phbRxeTceErrorLog1 = phb4_read_reg_asb(p, PHB_RXE_TCE_ERR_LOG_1);
1846 
1847 	/* PHB4 REGB error registers */
1848 	stat->phbPblErrorStatus = phb4_read_reg_asb(p, PHB_PBL_ERR_STATUS);
1849 	stat->phbPblFirstErrorStatus = phb4_read_reg_asb(p, PHB_PBL_ERR1_STATUS);
1850 	stat->phbPblErrorLog0 = phb4_read_reg_asb(p, PHB_PBL_ERR_LOG_0);
1851 	stat->phbPblErrorLog1 = phb4_read_reg_asb(p, PHB_PBL_ERR_LOG_1);
1852 
1853 	stat->phbPcieDlpErrorStatus = phb4_read_reg_asb(p, PHB_PCIE_DLP_ERR_STATUS);
1854 	stat->phbPcieDlpErrorLog1 = phb4_read_reg_asb(p, PHB_PCIE_DLP_ERRLOG1);
1855 	stat->phbPcieDlpErrorLog2 = phb4_read_reg_asb(p, PHB_PCIE_DLP_ERRLOG2);
1856 
1857 	stat->phbRegbErrorStatus = phb4_read_reg_asb(p, PHB_REGB_ERR_STATUS);
1858 	stat->phbRegbFirstErrorStatus = phb4_read_reg_asb(p, PHB_REGB_ERR1_STATUS);
1859 	stat->phbRegbErrorLog0 = phb4_read_reg_asb(p, PHB_REGB_ERR_LOG_0);
1860 	stat->phbRegbErrorLog1 = phb4_read_reg_asb(p, PHB_REGB_ERR_LOG_1);
1861 
1862 	/*
1863 	 * Grab PESTA & B content. The error bit (bit#0) should
1864 	 * be fetched from IODA and the left content from memory
1865 	 * resident tables.
1866 	 */
1867 	 pPEST = (uint64_t *)p->tbl_pest;
1868 	 phb4_ioda_sel(p, IODA3_TBL_PESTA, 0, true);
1869 	 for (i = 0; i < p->max_num_pes; i++) {
1870 		 stat->pestA[i] = phb4_read_reg_asb(p, PHB_IODA_DATA0);
1871 		 stat->pestA[i] |= pPEST[2 * i];
1872 	 }
1873 
1874 	 phb4_ioda_sel(p, IODA3_TBL_PESTB, 0, true);
1875 	 for (i = 0; i < p->max_num_pes; i++) {
1876 		 stat->pestB[i] = phb4_read_reg_asb(p, PHB_IODA_DATA0);
1877 		 stat->pestB[i] |= pPEST[2 * i + 1];
1878 	 }
1879 }
1880 
phb4_dump_peltv(struct phb4 * p)1881 static void __unused phb4_dump_peltv(struct phb4 *p)
1882 {
1883 	int stride = p->max_num_pes / 64;
1884 	uint64_t *tbl = (void *) p->tbl_peltv;
1885 	unsigned int pe;
1886 
1887 	PHBERR(p, "PELT-V: base addr: %p size: %llx (%d PEs, stride = %d)\n",
1888 			tbl, p->tbl_peltv_size, p->max_num_pes, stride);
1889 
1890 	for (pe = 0; pe < p->max_num_pes; pe++) {
1891 		unsigned int i, j;
1892 		uint64_t sum = 0;
1893 
1894 		i = pe * stride;
1895 
1896 		/*
1897 		 * Only print an entry if there's bits set in the PE's
1898 		 * PELT-V entry. There's a few hundred possible PEs and
1899 		 * generally only a handful will be in use.
1900 		 */
1901 
1902 		for (j = 0; j < stride; j++)
1903 			sum |= tbl[i + j];
1904 		if (!sum)
1905 			continue; /* unused PE, skip it */
1906 
1907 		if (p->max_num_pes == 512) {
1908 			PHBERR(p, "PELT-V[%03x] = "
1909 				"%016llx %016llx %016llx %016llx"
1910 				"%016llx %016llx %016llx %016llx\n", pe,
1911 				tbl[i + 0], tbl[i + 1], tbl[i + 2], tbl[i + 3],
1912 				tbl[i + 4], tbl[i + 5], tbl[i + 6], tbl[i + 7]);
1913 		} else if (p->max_num_pes == 256) {
1914 			PHBERR(p, "PELT-V[%03x] = "
1915 				"%016llx %016llx %016llx %016llx\n", pe,
1916 				tbl[i + 0], tbl[i + 1], tbl[i + 2], tbl[i + 3]);
1917 		}
1918 	}
1919 }
1920 
phb4_dump_ioda_table(struct phb4 * p,int table)1921 static void __unused phb4_dump_ioda_table(struct phb4 *p, int table)
1922 {
1923 	const char *name;
1924 	int entries, i;
1925 
1926 	switch (table) {
1927 	case IODA3_TBL_LIST:
1928 		name = "LIST";
1929 		entries = 8;
1930 		break;
1931 	case IODA3_TBL_MIST:
1932 		name = "MIST";
1933 		entries = 1024;
1934 		break;
1935 	case IODA3_TBL_RCAM:
1936 		name = "RCAM";
1937 		entries = 128;
1938 		break;
1939 	case IODA3_TBL_MRT:
1940 		name = "MRT";
1941 		entries = 16;
1942 		break;
1943 	case IODA3_TBL_PESTA:
1944 		name = "PESTA";
1945 		entries = 512;
1946 		break;
1947 	case IODA3_TBL_PESTB:
1948 		name = "PESTB";
1949 		entries = 512;
1950 		break;
1951 	case IODA3_TBL_TVT:
1952 		name = "TVT";
1953 		entries = 512;
1954 		break;
1955 	case IODA3_TBL_TCAM:
1956 		name = "TCAM";
1957 		entries = 1024;
1958 		break;
1959 	case IODA3_TBL_TDR:
1960 		name = "TDR";
1961 		entries = 1024;
1962 		break;
1963 	case IODA3_TBL_MBT: /* special case, see below */
1964 		name = "MBT";
1965 		entries = 64;
1966 		break;
1967 	case IODA3_TBL_MDT:
1968 		name = "MDT";
1969 		entries = 512;
1970 		break;
1971 	case IODA3_TBL_PEEV:
1972 		name = "PEEV";
1973 		entries = 8;
1974 		break;
1975 	default:
1976 		PHBERR(p, "Invalid IODA table %d!\n", table);
1977 		return;
1978 	}
1979 
1980 	PHBERR(p, "Start %s dump (only non-zero entries are printed):\n", name);
1981 
1982 	phb4_ioda_sel(p, table, 0, true);
1983 
1984 	/*
1985 	 * Each entry in the MBT is 16 bytes. Every other table has 8 byte
1986 	 * entries so we special case the MDT to keep the output readable.
1987 	 */
1988 	if (table == IODA3_TBL_MBT) {
1989 		for (i = 0; i < 32; i++) {
1990 			uint64_t v1 = phb4_read_reg_asb(p, PHB_IODA_DATA0);
1991 			uint64_t v2 = phb4_read_reg_asb(p, PHB_IODA_DATA0);
1992 
1993 			if (!v1 && !v2)
1994 				continue;
1995 			PHBERR(p, "MBT[%03x] = %016llx %016llx\n", i, v1, v2);
1996 		}
1997 	} else {
1998 		for (i = 0; i < entries; i++) {
1999 			uint64_t v = phb4_read_reg_asb(p, PHB_IODA_DATA0);
2000 
2001 			if (!v)
2002 				continue;
2003 			PHBERR(p, "%s[%03x] = %016llx\n", name, i, v);
2004 		}
2005 	}
2006 
2007 	PHBERR(p, "End %s dump\n", name);
2008 }
2009 
phb4_eeh_dump_regs(struct phb4 * p)2010 static void phb4_eeh_dump_regs(struct phb4 *p)
2011 {
2012 	struct OpalIoPhb4ErrorData *s;
2013 	uint16_t reg;
2014 	unsigned int i;
2015 
2016 	if (!verbose_eeh)
2017 		return;
2018 
2019 	s = zalloc(sizeof(struct OpalIoPhb4ErrorData));
2020 	if (!s) {
2021 		PHBERR(p, "Failed to allocate error info !\n");
2022 		return;
2023 	}
2024 	phb4_read_phb_status(p, s);
2025 
2026 	PHBERR(p, "                 brdgCtl = %08x\n", s->brdgCtl);
2027 
2028 	/* PHB4 cfg regs */
2029 	PHBERR(p, "            deviceStatus = %08x\n", s->deviceStatus);
2030 	PHBERR(p, "              slotStatus = %08x\n", s->slotStatus);
2031 	PHBERR(p, "              linkStatus = %08x\n", s->linkStatus);
2032 	PHBERR(p, "            devCmdStatus = %08x\n", s->devCmdStatus);
2033 	PHBERR(p, "            devSecStatus = %08x\n", s->devSecStatus);
2034 	PHBERR(p, "         rootErrorStatus = %08x\n", s->rootErrorStatus);
2035 	PHBERR(p, "         corrErrorStatus = %08x\n", s->corrErrorStatus);
2036 	PHBERR(p, "       uncorrErrorStatus = %08x\n", s->uncorrErrorStatus);
2037 
2038 	/* Two non OPAL API registers that are useful */
2039 	phb4_pcicfg_read16(&p->phb, 0, p->ecap + PCICAP_EXP_DEVCTL, &reg);
2040 	PHBERR(p, "                  devctl = %08x\n", reg);
2041 	phb4_pcicfg_read16(&p->phb, 0, p->ecap + PCICAP_EXP_DEVSTAT,
2042 			   &reg);
2043 	PHBERR(p, "                 devStat = %08x\n", reg);
2044 
2045 	/* Byte swap TLP headers so they are the same as the PCIe spec */
2046 	PHBERR(p, "                 tlpHdr1 = %08x\n", bswap_32(s->tlpHdr1));
2047 	PHBERR(p, "                 tlpHdr2 = %08x\n", bswap_32(s->tlpHdr2));
2048 	PHBERR(p, "                 tlpHdr3 = %08x\n", bswap_32(s->tlpHdr3));
2049 	PHBERR(p, "                 tlpHdr4 = %08x\n", bswap_32(s->tlpHdr4));
2050 	PHBERR(p, "                sourceId = %08x\n", s->sourceId);
2051 	PHBERR(p, "                    nFir = %016llx\n", s->nFir);
2052 	PHBERR(p, "                nFirMask = %016llx\n", s->nFirMask);
2053 	PHBERR(p, "                 nFirWOF = %016llx\n", s->nFirWOF);
2054 	PHBERR(p, "                phbPlssr = %016llx\n", s->phbPlssr);
2055 	PHBERR(p, "                  phbCsr = %016llx\n", s->phbCsr);
2056 	PHBERR(p, "                  lemFir = %016llx\n", s->lemFir);
2057 	PHBERR(p, "            lemErrorMask = %016llx\n", s->lemErrorMask);
2058 	PHBERR(p, "                  lemWOF = %016llx\n", s->lemWOF);
2059 	PHBERR(p, "          phbErrorStatus = %016llx\n", s->phbErrorStatus);
2060 	PHBERR(p, "     phbFirstErrorStatus = %016llx\n", s->phbFirstErrorStatus);
2061 	PHBERR(p, "            phbErrorLog0 = %016llx\n", s->phbErrorLog0);
2062 	PHBERR(p, "            phbErrorLog1 = %016llx\n", s->phbErrorLog1);
2063 	PHBERR(p, "       phbTxeErrorStatus = %016llx\n", s->phbTxeErrorStatus);
2064 	PHBERR(p, "  phbTxeFirstErrorStatus = %016llx\n", s->phbTxeFirstErrorStatus);
2065 	PHBERR(p, "         phbTxeErrorLog0 = %016llx\n", s->phbTxeErrorLog0);
2066 	PHBERR(p, "         phbTxeErrorLog1 = %016llx\n", s->phbTxeErrorLog1);
2067 	PHBERR(p, "    phbRxeArbErrorStatus = %016llx\n", s->phbRxeArbErrorStatus);
2068 	PHBERR(p, "phbRxeArbFrstErrorStatus = %016llx\n", s->phbRxeArbFirstErrorStatus);
2069 	PHBERR(p, "      phbRxeArbErrorLog0 = %016llx\n", s->phbRxeArbErrorLog0);
2070 	PHBERR(p, "      phbRxeArbErrorLog1 = %016llx\n", s->phbRxeArbErrorLog1);
2071 	PHBERR(p, "    phbRxeMrgErrorStatus = %016llx\n", s->phbRxeMrgErrorStatus);
2072 	PHBERR(p, "phbRxeMrgFrstErrorStatus = %016llx\n", s->phbRxeMrgFirstErrorStatus);
2073 	PHBERR(p, "      phbRxeMrgErrorLog0 = %016llx\n", s->phbRxeMrgErrorLog0);
2074 	PHBERR(p, "      phbRxeMrgErrorLog1 = %016llx\n", s->phbRxeMrgErrorLog1);
2075 	PHBERR(p, "    phbRxeTceErrorStatus = %016llx\n", s->phbRxeTceErrorStatus);
2076 	PHBERR(p, "phbRxeTceFrstErrorStatus = %016llx\n", s->phbRxeTceFirstErrorStatus);
2077 	PHBERR(p, "      phbRxeTceErrorLog0 = %016llx\n", s->phbRxeTceErrorLog0);
2078 	PHBERR(p, "      phbRxeTceErrorLog1 = %016llx\n", s->phbRxeTceErrorLog1);
2079 	PHBERR(p, "       phbPblErrorStatus = %016llx\n", s->phbPblErrorStatus);
2080 	PHBERR(p, "  phbPblFirstErrorStatus = %016llx\n", s->phbPblFirstErrorStatus);
2081 	PHBERR(p, "         phbPblErrorLog0 = %016llx\n", s->phbPblErrorLog0);
2082 	PHBERR(p, "         phbPblErrorLog1 = %016llx\n", s->phbPblErrorLog1);
2083 	PHBERR(p, "     phbPcieDlpErrorLog1 = %016llx\n", s->phbPcieDlpErrorLog1);
2084 	PHBERR(p, "     phbPcieDlpErrorLog2 = %016llx\n", s->phbPcieDlpErrorLog2);
2085 	PHBERR(p, "   phbPcieDlpErrorStatus = %016llx\n", s->phbPcieDlpErrorStatus);
2086 
2087 	PHBERR(p, "      phbRegbErrorStatus = %016llx\n", s->phbRegbErrorStatus);
2088 	PHBERR(p, " phbRegbFirstErrorStatus = %016llx\n", s->phbRegbFirstErrorStatus);
2089 	PHBERR(p, "        phbRegbErrorLog0 = %016llx\n", s->phbRegbErrorLog0);
2090 	PHBERR(p, "        phbRegbErrorLog1 = %016llx\n", s->phbRegbErrorLog1);
2091 
2092 	for (i = 0; i < p->max_num_pes; i++) {
2093 		if (!s->pestA[i] && !s->pestB[i])
2094 			continue;
2095 		PHBERR(p, "               PEST[%03x] = %016llx %016llx\n",
2096 		       i, s->pestA[i], s->pestB[i]);
2097 	}
2098 	free(s);
2099 }
2100 
phb4_set_pe(struct phb * phb,uint64_t pe_number,uint64_t bdfn,uint8_t bcompare,uint8_t dcompare,uint8_t fcompare,uint8_t action)2101 static int64_t phb4_set_pe(struct phb *phb,
2102 			   uint64_t pe_number,
2103 			   uint64_t bdfn,
2104 			   uint8_t bcompare,
2105 			   uint8_t dcompare,
2106 			   uint8_t fcompare,
2107 			   uint8_t action)
2108 {
2109 	struct phb4 *p = phb_to_phb4(phb);
2110 	uint64_t mask, idx;
2111 
2112 	/* Sanity check */
2113 	if (action != OPAL_MAP_PE && action != OPAL_UNMAP_PE)
2114 		return OPAL_PARAMETER;
2115 	if (pe_number >= p->num_pes || bdfn > 0xffff ||
2116 	    bcompare > OpalPciBusAll ||
2117 	    dcompare > OPAL_COMPARE_RID_DEVICE_NUMBER ||
2118 	    fcompare > OPAL_COMPARE_RID_FUNCTION_NUMBER)
2119 		return OPAL_PARAMETER;
2120 
2121 	/* match everything by default */
2122 	mask = 0;
2123 
2124 	/* Figure out the RID range */
2125 	if (bcompare != OpalPciBusAny)
2126 		mask  = ((0x1 << (bcompare + 1)) - 1) << (15 - bcompare);
2127 
2128 	if (dcompare == OPAL_COMPARE_RID_DEVICE_NUMBER)
2129 		mask |= 0xf8;
2130 
2131 	if (fcompare == OPAL_COMPARE_RID_FUNCTION_NUMBER)
2132 		mask |= 0x7;
2133 
2134 	if (action == OPAL_UNMAP_PE)
2135 		pe_number = PHB4_RESERVED_PE_NUM(p);
2136 
2137 	/* Map or unmap the RTT range */
2138 	for (idx = 0; idx < RTT_TABLE_ENTRIES; idx++)
2139 		if ((idx & mask) == (bdfn & mask))
2140 			p->tbl_rtt[idx] = pe_number;
2141 
2142 	/* Invalidate the RID Translation Cache (RTC) inside the PHB */
2143 	out_be64(p->regs + PHB_RTC_INVALIDATE, PHB_RTC_INVALIDATE_ALL);
2144 
2145 	return OPAL_SUCCESS;
2146 }
2147 
phb4_set_peltv(struct phb * phb,uint32_t parent_pe,uint32_t child_pe,uint8_t state)2148 static int64_t phb4_set_peltv(struct phb *phb,
2149 			      uint32_t parent_pe,
2150 			      uint32_t child_pe,
2151 			      uint8_t state)
2152 {
2153 	struct phb4 *p = phb_to_phb4(phb);
2154 	uint32_t idx, mask;
2155 
2156 	/* Sanity check */
2157 	if (parent_pe >= p->num_pes || child_pe >= p->num_pes)
2158 		return OPAL_PARAMETER;
2159 
2160 	/* Find index for parent PE */
2161 	idx = parent_pe * (p->max_num_pes / 8);
2162 	idx += (child_pe / 8);
2163 	mask = 0x1 << (7 - (child_pe % 8));
2164 
2165 	if (state)
2166 		p->tbl_peltv[idx] |= mask;
2167 	else
2168 		p->tbl_peltv[idx] &= ~mask;
2169 
2170 	return OPAL_SUCCESS;
2171 }
2172 
phb4_prepare_link_change(struct pci_slot * slot,bool is_up)2173 static void phb4_prepare_link_change(struct pci_slot *slot, bool is_up)
2174 {
2175 	struct phb4 *p = phb_to_phb4(slot->phb);
2176 	uint32_t reg32;
2177 
2178 	p->has_link = is_up;
2179 
2180 	if (is_up) {
2181 		/* Clear AER receiver error status */
2182 		phb4_pcicfg_write32(&p->phb, 0, p->aercap +
2183 				    PCIECAP_AER_CE_STATUS,
2184 				    PCIECAP_AER_CE_RECVR_ERR);
2185 		/* Unmask receiver error status in AER */
2186 		phb4_pcicfg_read32(&p->phb, 0, p->aercap +
2187 				   PCIECAP_AER_CE_MASK, &reg32);
2188 		reg32 &= ~PCIECAP_AER_CE_RECVR_ERR;
2189 		phb4_pcicfg_write32(&p->phb, 0, p->aercap +
2190 				    PCIECAP_AER_CE_MASK, reg32);
2191 
2192 		/* Don't block PCI-CFG */
2193 		p->flags &= ~PHB4_CFG_BLOCKED;
2194 
2195 		/* Re-enable link down errors */
2196 		out_be64(p->regs + PHB_PCIE_MISC_STRAP,
2197 			 0x0000060000000000ull);
2198 
2199 		/* Re-enable error status indicators that trigger irqs */
2200 		out_be64(p->regs + PHB_REGB_ERR_INF_ENABLE,
2201 			 0x2130006efca8bc00ull);
2202 		out_be64(p->regs + PHB_REGB_ERR_ERC_ENABLE,
2203 			 0x0080000000000000ull);
2204 		out_be64(p->regs + PHB_REGB_ERR_FAT_ENABLE,
2205 			 0xde0fff91035743ffull);
2206 
2207 	} else {
2208 		/* Mask AER receiver error */
2209 		phb4_pcicfg_read32(&p->phb, 0, p->aercap +
2210 				   PCIECAP_AER_CE_MASK, &reg32);
2211 		reg32 |= PCIECAP_AER_CE_RECVR_ERR;
2212 		phb4_pcicfg_write32(&p->phb, 0, p->aercap +
2213 				    PCIECAP_AER_CE_MASK, reg32);
2214 
2215 		/* Clear error link enable & error link down kill enable */
2216 		out_be64(p->regs + PHB_PCIE_MISC_STRAP, 0);
2217 
2218 		/* Disable all error status indicators that trigger irqs */
2219 		out_be64(p->regs + PHB_REGB_ERR_INF_ENABLE, 0);
2220 		out_be64(p->regs + PHB_REGB_ERR_ERC_ENABLE, 0);
2221 		out_be64(p->regs + PHB_REGB_ERR_FAT_ENABLE, 0);
2222 
2223 		/* Block PCI-CFG access */
2224 		p->flags |= PHB4_CFG_BLOCKED;
2225 	}
2226 }
2227 
phb4_get_presence_state(struct pci_slot * slot,uint8_t * val)2228 static int64_t phb4_get_presence_state(struct pci_slot *slot, uint8_t *val)
2229 {
2230 	struct phb4 *p = phb_to_phb4(slot->phb);
2231 	uint64_t hps, dtctl;
2232 
2233 	/* Test for PHB in error state ? */
2234 	if (p->broken)
2235 		return OPAL_HARDWARE;
2236 
2237 	/* Check hotplug status */
2238 	hps = in_be64(p->regs + PHB_PCIE_HOTPLUG_STATUS);
2239 	if (!(hps & PHB_PCIE_HPSTAT_PRESENCE)) {
2240 		*val = OPAL_PCI_SLOT_PRESENT;
2241 	} else {
2242 		/*
2243 		 * If it says not present but link is up, then we assume
2244 		 * we are on a broken simulation environment and still
2245 		 * return a valid presence. Otherwise, not present.
2246 		 */
2247 		dtctl = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
2248 		if (dtctl & PHB_PCIE_DLP_TL_LINKACT) {
2249 			PHBERR(p, "Presence detect 0 but link set !\n");
2250 			*val = OPAL_PCI_SLOT_PRESENT;
2251 		} else {
2252 			*val = OPAL_PCI_SLOT_EMPTY;
2253 		}
2254 	}
2255 
2256 	return OPAL_SUCCESS;
2257 }
2258 
phb4_get_link_info(struct pci_slot * slot,uint8_t * speed,uint8_t * width)2259 static int64_t phb4_get_link_info(struct pci_slot *slot, uint8_t *speed,
2260 				   uint8_t *width)
2261 {
2262 	struct phb4 *p = phb_to_phb4(slot->phb);
2263 	uint64_t reg;
2264 	uint16_t state;
2265 	int64_t rc;
2266 	uint8_t s;
2267 
2268 	/* Link is up, let's find the actual speed */
2269 	reg = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
2270 	if (!(reg & PHB_PCIE_DLP_TL_LINKACT)) {
2271 		*width = 0;
2272 		if (speed)
2273 			*speed = 0;
2274 		return OPAL_SUCCESS;
2275 	}
2276 
2277 	rc = phb4_pcicfg_read16(&p->phb, 0,
2278 				p->ecap + PCICAP_EXP_LSTAT, &state);
2279 	if (rc != OPAL_SUCCESS) {
2280 		PHBERR(p, "%s: Error %lld getting link state\n", __func__, rc);
2281 		return OPAL_HARDWARE;
2282 	}
2283 
2284 	if (state & PCICAP_EXP_LSTAT_DLLL_ACT) {
2285 		*width = ((state & PCICAP_EXP_LSTAT_WIDTH) >> 4);
2286 		s =  state & PCICAP_EXP_LSTAT_SPEED;
2287 	} else {
2288 		*width = 0;
2289 		s = 0;
2290 	}
2291 
2292 	if (speed)
2293 		*speed = s;
2294 
2295 	return OPAL_SUCCESS;
2296 }
2297 
phb4_get_link_state(struct pci_slot * slot,uint8_t * val)2298 static int64_t phb4_get_link_state(struct pci_slot *slot, uint8_t *val)
2299 {
2300 	return phb4_get_link_info(slot, NULL, val);
2301 }
2302 
phb4_retry_state(struct pci_slot * slot)2303 static int64_t phb4_retry_state(struct pci_slot *slot)
2304 {
2305 	struct phb4 *p = phb_to_phb4(slot->phb);
2306 
2307 	/* Mark link as down */
2308 	phb4_prepare_link_change(slot, false);
2309 
2310 	/* Last attempt to activate link */
2311 	if (slot->link_retries == 1) {
2312 		if (slot->state == PHB4_SLOT_LINK_WAIT) {
2313 			PHBERR(p, "Falling back to GEN1 training\n");
2314 			p->max_link_speed = 1;
2315 		}
2316 	}
2317 
2318 	if (!slot->link_retries--) {
2319 		switch (slot->state) {
2320 		case PHB4_SLOT_LINK_WAIT_ELECTRICAL:
2321 			PHBERR(p, "Presence detected but no electrical link\n");
2322 			break;
2323 		case PHB4_SLOT_LINK_WAIT:
2324 			PHBERR(p, "Electrical link detected but won't train\n");
2325 			break;
2326 		case PHB4_SLOT_LINK_STABLE:
2327 			PHBERR(p, "Linked trained but was degraded or unstable\n");
2328 			break;
2329 		default:
2330 			PHBERR(p, "Unknown link issue\n");
2331 		}
2332 		return OPAL_HARDWARE;
2333 	}
2334 
2335 	pci_slot_set_state(slot, PHB4_SLOT_CRESET_START);
2336 	return pci_slot_set_sm_timeout(slot, msecs_to_tb(1));
2337 }
2338 
phb4_train_info(struct phb4 * p,uint64_t reg,unsigned long dt)2339 static uint64_t phb4_train_info(struct phb4 *p, uint64_t reg, unsigned long dt)
2340 {
2341 	uint64_t ltssm_state = GETFIELD(PHB_PCIE_DLP_LTSSM_TRC, reg);
2342 	char s[80];
2343 
2344 	snprintf(s, sizeof(s), "TRACE:0x%016llx % 2lims",
2345 		 reg, tb_to_msecs(dt));
2346 
2347 	if (reg & PHB_PCIE_DLP_TL_LINKACT)
2348 		snprintf(s, sizeof(s), "%s trained ", s);
2349 	else if (reg & PHB_PCIE_DLP_TRAINING)
2350 		snprintf(s, sizeof(s), "%s training", s);
2351 	else if (reg & PHB_PCIE_DLP_INBAND_PRESENCE)
2352 		snprintf(s, sizeof(s), "%s presence", s);
2353 	else
2354 		snprintf(s, sizeof(s), "%s         ", s);
2355 
2356 	snprintf(s, sizeof(s), "%s GEN%lli:x%02lli:", s,
2357 		 GETFIELD(PHB_PCIE_DLP_LINK_SPEED, reg),
2358 		 GETFIELD(PHB_PCIE_DLP_LINK_WIDTH, reg));
2359 
2360 	switch (ltssm_state) {
2361 	case PHB_PCIE_DLP_LTSSM_RESET:
2362 		snprintf(s, sizeof(s), "%sreset", s);
2363 		break;
2364 	case PHB_PCIE_DLP_LTSSM_DETECT:
2365 		snprintf(s, sizeof(s), "%sdetect", s);
2366 		break;
2367 	case PHB_PCIE_DLP_LTSSM_POLLING:
2368 		snprintf(s, sizeof(s), "%spolling", s);
2369 		break;
2370 	case PHB_PCIE_DLP_LTSSM_CONFIG:
2371 		snprintf(s, sizeof(s), "%sconfig", s);
2372 		break;
2373 	case PHB_PCIE_DLP_LTSSM_L0:
2374 		snprintf(s, sizeof(s), "%sL0", s);
2375 		break;
2376 	case PHB_PCIE_DLP_LTSSM_REC:
2377 		snprintf(s, sizeof(s), "%srecovery", s);
2378 		break;
2379 	case PHB_PCIE_DLP_LTSSM_L1:
2380 		snprintf(s, sizeof(s), "%sL1", s);
2381 		break;
2382 	case PHB_PCIE_DLP_LTSSM_L2:
2383 		snprintf(s, sizeof(s), "%sL2", s);
2384 		break;
2385 	case PHB_PCIE_DLP_LTSSM_HOTRESET:
2386 		snprintf(s, sizeof(s), "%shotreset", s);
2387 		break;
2388 	case PHB_PCIE_DLP_LTSSM_DISABLED:
2389 		snprintf(s, sizeof(s), "%sdisabled", s);
2390 		break;
2391 	case PHB_PCIE_DLP_LTSSM_LOOPBACK:
2392 		snprintf(s, sizeof(s), "%sloopback", s);
2393 		break;
2394 	default:
2395 		snprintf(s, sizeof(s), "%sunvalid", s);
2396 	}
2397 	PHBNOTICE(p, "%s\n", s);
2398 
2399 	return ltssm_state;
2400 }
2401 
phb4_dump_pec_err_regs(struct phb4 * p)2402 static void phb4_dump_pec_err_regs(struct phb4 *p)
2403 {
2404 	uint64_t nfir_p_wof, nfir_n_wof, err_aib;
2405 	uint64_t err_rpt0, err_rpt1;
2406 
2407 	/* Read the PCI and NEST FIRs and dump them. Also cache PCI/NEST FIRs */
2408 	xscom_read(p->chip_id,
2409 		   p->pci_stk_xscom + XPEC_PCI_STK_PCI_FIR,  &p->pfir_cache);
2410 	xscom_read(p->chip_id,
2411 		   p->pci_stk_xscom + XPEC_PCI_STK_PCI_FIR_WOF, &nfir_p_wof);
2412 	xscom_read(p->chip_id,
2413 		   p->pe_stk_xscom + XPEC_NEST_STK_PCI_NFIR, &p->nfir_cache);
2414 	xscom_read(p->chip_id,
2415 		   p->pe_stk_xscom + XPEC_NEST_STK_PCI_NFIR_WOF, &nfir_n_wof);
2416 	xscom_read(p->chip_id,
2417 		   p->pe_stk_xscom + XPEC_NEST_STK_ERR_RPT0, &err_rpt0);
2418 	xscom_read(p->chip_id,
2419 		   p->pe_stk_xscom + XPEC_NEST_STK_ERR_RPT1, &err_rpt1);
2420 	xscom_read(p->chip_id,
2421 		   p->pci_stk_xscom + XPEC_PCI_STK_PBAIB_ERR_REPORT, &err_aib);
2422 
2423 	PHBERR(p, "            PCI FIR=%016llx\n", p->pfir_cache);
2424 	PHBERR(p, "        PCI FIR WOF=%016llx\n", nfir_p_wof);
2425 	PHBERR(p, "           NEST FIR=%016llx\n", p->nfir_cache);
2426 	PHBERR(p, "       NEST FIR WOF=%016llx\n", nfir_n_wof);
2427 	PHBERR(p, "           ERR RPT0=%016llx\n", err_rpt0);
2428 	PHBERR(p, "           ERR RPT1=%016llx\n", err_rpt1);
2429 	PHBERR(p, "            AIB ERR=%016llx\n", err_aib);
2430 }
2431 
phb4_dump_capp_err_regs(struct phb4 * p)2432 static void phb4_dump_capp_err_regs(struct phb4 *p)
2433 {
2434 	uint64_t fir, apc_master_err, snoop_err, transport_err;
2435 	uint64_t tlbi_err, capp_err_status;
2436 	uint64_t offset = PHB4_CAPP_REG_OFFSET(p);
2437 
2438 	xscom_read(p->chip_id, CAPP_FIR + offset, &fir);
2439 	xscom_read(p->chip_id, CAPP_APC_MASTER_ERR_RPT + offset,
2440 		   &apc_master_err);
2441 	xscom_read(p->chip_id, CAPP_SNOOP_ERR_RTP + offset, &snoop_err);
2442 	xscom_read(p->chip_id, CAPP_TRANSPORT_ERR_RPT + offset, &transport_err);
2443 	xscom_read(p->chip_id, CAPP_TLBI_ERR_RPT + offset, &tlbi_err);
2444 	xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, &capp_err_status);
2445 
2446 	PHBERR(p, "           CAPP FIR=%016llx\n", fir);
2447 	PHBERR(p, "CAPP APC MASTER ERR=%016llx\n", apc_master_err);
2448 	PHBERR(p, "     CAPP SNOOP ERR=%016llx\n", snoop_err);
2449 	PHBERR(p, " CAPP TRANSPORT ERR=%016llx\n", transport_err);
2450 	PHBERR(p, "      CAPP TLBI ERR=%016llx\n", tlbi_err);
2451 	PHBERR(p, "    CAPP ERR STATUS=%016llx\n", capp_err_status);
2452 }
2453 
2454 /* Check if AIB is fenced via PBCQ NFIR */
phb4_fenced(struct phb4 * p)2455 static bool phb4_fenced(struct phb4 *p)
2456 {
2457 
2458 	/* Already fenced ? */
2459 	if (p->flags & PHB4_AIB_FENCED)
2460 		return true;
2461 
2462 	/*
2463 	 * An all 1's from the PHB indicates a PHB freeze/fence. We
2464 	 * don't really differenciate them at this point.
2465 	 */
2466 	if (in_be64(p->regs + PHB_CPU_LOADSTORE_STATUS)!= 0xfffffffffffffffful)
2467 		return false;
2468 
2469 	/* Mark ourselves fenced */
2470 	p->flags |= PHB4_AIB_FENCED;
2471 
2472 	PHBERR(p, "PHB Freeze/Fence detected !\n");
2473 	phb4_dump_pec_err_regs(p);
2474 
2475 	/*
2476 	 * dump capp error registers in case phb was fenced due to capp.
2477 	 * Expect p->nfir_cache already updated in phb4_dump_pec_err_regs()
2478 	 */
2479 	if (p->nfir_cache & XPEC_NEST_STK_PCI_NFIR_CXA_PE_CAPP)
2480 		phb4_dump_capp_err_regs(p);
2481 
2482 	phb4_eeh_dump_regs(p);
2483 
2484 	return true;
2485 }
2486 
phb4_check_reg(struct phb4 * p,uint64_t reg)2487 static bool phb4_check_reg(struct phb4 *p, uint64_t reg)
2488 {
2489 	if (reg == 0xffffffffffffffffUL)
2490 		return !phb4_fenced(p);
2491 	return true;
2492 }
2493 
phb4_get_info(struct phb * phb,uint16_t bdfn,uint8_t * speed,uint8_t * width)2494 static void phb4_get_info(struct phb *phb, uint16_t bdfn, uint8_t *speed,
2495 			  uint8_t *width)
2496 {
2497 	int32_t ecap;
2498 	uint32_t cap;
2499 
2500 	ecap = pci_find_cap(phb, bdfn, PCI_CFG_CAP_ID_EXP);
2501 	pci_cfg_read32(phb, bdfn, ecap + PCICAP_EXP_LCAP, &cap);
2502 	*width = (cap & PCICAP_EXP_LCAP_MAXWDTH) >> 4;
2503 	*speed = cap & PCICAP_EXP_LCAP_MAXSPD;
2504 }
2505 
2506 #define PVR_POWER9_CUMULUS		0x00002000
2507 
phb4_chip_retry_workaround(void)2508 static bool phb4_chip_retry_workaround(void)
2509 {
2510 	unsigned int pvr;
2511 
2512 	if (pci_retry_all)
2513 		return true;
2514 
2515 	/* Chips that need this retry are:
2516 	 *  - CUMULUS DD1.0
2517 	 *  - NIMBUS DD2.0 (and DD1.0, but it is unsupported so no check).
2518 	 */
2519 	pvr = mfspr(SPR_PVR);
2520 	if (pvr & PVR_POWER9_CUMULUS) {
2521 		if ((PVR_VERS_MAJ(pvr) == 1) && (PVR_VERS_MIN(pvr) == 0))
2522 			return true;
2523 	} else { /* NIMBUS */
2524 		if ((PVR_VERS_MAJ(pvr) == 2) && (PVR_VERS_MIN(pvr) == 0))
2525 			return true;
2526 	}
2527 	return false;
2528 }
2529 
2530 struct pci_card_id {
2531 	uint16_t vendor;
2532 	uint16_t device;
2533 };
2534 
2535 static struct pci_card_id retry_whitelist[] = {
2536 	{ 0x1000, 0x005d }, /* LSI Logic MegaRAID SAS-3 3108 */
2537 	{ 0x1000, 0x00c9 }, /* LSI MPT SAS-3 */
2538 	{ 0x104c, 0x8241 }, /* TI xHCI USB */
2539 	{ 0x1077, 0x2261 }, /* QLogic ISP2722-based 16/32Gb FC */
2540 	{ 0x10b5, 0x8725 }, /* PLX Switch: p9dsu, witherspoon */
2541 	{ 0x10b5, 0x8748 }, /* PLX Switch: ZZ */
2542 	{ 0x11f8, 0xf117 }, /* PMC-Sierra/MicroSemi NV1604 */
2543 	{ 0x15b3, 0x1013 }, /* Mellanox ConnectX-4 */
2544 	{ 0x15b3, 0x1017 }, /* Mellanox ConnectX-5 */
2545 	{ 0x15b3, 0x1019 }, /* Mellanox ConnectX-5 Ex */
2546 	{ 0x1a03, 0x1150 }, /* ASPEED AST2500 Switch */
2547 	{ 0x8086, 0x10fb }, /* Intel x520 10G Eth */
2548 	{ 0x9005, 0x028d }, /* MicroSemi PM8069 */
2549 };
2550 
2551 #define VENDOR(vdid) ((vdid) & 0xffff)
2552 #define DEVICE(vdid) (((vdid) >> 16) & 0xffff)
2553 
phb4_adapter_in_whitelist(uint32_t vdid)2554 static bool phb4_adapter_in_whitelist(uint32_t vdid)
2555 {
2556 	int i;
2557 
2558 	if (pci_retry_all)
2559 		return true;
2560 
2561 	for (i = 0; i < ARRAY_SIZE(retry_whitelist); i++)
2562 		if ((retry_whitelist[i].vendor == VENDOR(vdid)) &&
2563 		    (retry_whitelist[i].device == DEVICE(vdid)))
2564 			return true;
2565 
2566 	return false;
2567 }
2568 
2569 static struct pci_card_id lane_eq_disable[] = {
2570 	{ 0x10de, 0x17fd }, /* Nvidia GM200GL [Tesla M40] */
2571 	{ 0x10de, 0x1db4 }, /* Nvidia GV100 */
2572 };
2573 
phb4_lane_eq_retry_whitelist(uint32_t vdid)2574 static bool phb4_lane_eq_retry_whitelist(uint32_t vdid)
2575 {
2576 	int i;
2577 
2578 	for (i = 0; i < ARRAY_SIZE(lane_eq_disable); i++)
2579 		if ((lane_eq_disable[i].vendor == VENDOR(vdid)) &&
2580 		    (lane_eq_disable[i].device == DEVICE(vdid)))
2581 			return true;
2582 	return false;
2583 }
2584 
phb4_lane_eq_change(struct phb4 * p,uint32_t vdid)2585 static void phb4_lane_eq_change(struct phb4 *p, uint32_t vdid)
2586 {
2587 	p->lane_eq_en = !phb4_lane_eq_retry_whitelist(vdid);
2588 }
2589 
2590 #define min(x,y) ((x) < (y) ? x : y)
2591 #define max(x,y) ((x) < (y) ? x : y)
2592 
phb4_link_optimal(struct pci_slot * slot,uint32_t * vdid)2593 static bool phb4_link_optimal(struct pci_slot *slot, uint32_t *vdid)
2594 {
2595 	struct phb4 *p = phb_to_phb4(slot->phb);
2596 	uint64_t reg;
2597 	uint32_t id;
2598 	uint16_t bdfn, lane_errs;
2599 	uint8_t trained_speed, phb_speed, dev_speed, target_speed, rx_errs;
2600 	uint8_t trained_width, phb_width, dev_width, target_width;
2601 	bool optimal_speed, optimal_width, optimal, retry_enabled, rx_err_ok;
2602 
2603 
2604 	/* Current trained state */
2605 	phb4_get_link_info(slot, &trained_speed, &trained_width);
2606 
2607 	/* Get PHB capability */
2608 	/* NOTE: phb_speed will account for the software speed limit */
2609 	phb4_get_info(slot->phb, 0, &phb_speed, &phb_width);
2610 
2611 	/* Get device capability */
2612 	bdfn = 0x0100; /* bus=1 dev=0 device=0 */
2613 	/* Since this is the first access, we need to wait for CRS */
2614 	if (!pci_wait_crs(slot->phb, bdfn , &id))
2615 		return true;
2616 	phb4_get_info(slot->phb, bdfn, &dev_speed, &dev_width);
2617 
2618 	/* Work out if we are optimally trained */
2619 	target_speed = min(phb_speed, dev_speed);
2620 	optimal_speed = (trained_speed >= target_speed);
2621 	target_width = min(phb_width, dev_width);
2622 	optimal_width = (trained_width >= target_width);
2623 	optimal = optimal_width && optimal_speed;
2624 	retry_enabled = (phb4_chip_retry_workaround() &&
2625 			 phb4_adapter_in_whitelist(id)) ||
2626 		phb4_lane_eq_retry_whitelist(id);
2627 	reg = in_be64(p->regs + PHB_PCIE_DLP_ERR_COUNTERS);
2628 	rx_errs =  GETFIELD(PHB_PCIE_DLP_RX_ERR_CNT, reg);
2629 	rx_err_ok = (rx_errs < rx_err_max);
2630 	reg = in_be64(p->regs + PHB_PCIE_DLP_ERR_STATUS);
2631 	lane_errs = GETFIELD(PHB_PCIE_DLP_LANE_ERR, reg);
2632 
2633 	PHBDBG(p, "LINK: Card [%04x:%04x] %s Retry:%s\n", VENDOR(id),
2634 	       DEVICE(id), optimal ? "Optimal" : "Degraded",
2635 	       retry_enabled ? "enabled" : "disabled");
2636 	PHBDBG(p, "LINK: Speed Train:GEN%i PHB:GEN%i DEV:GEN%i%s\n",
2637 	       trained_speed, phb_speed, dev_speed, optimal_speed ? "" : " *");
2638 	PHBDBG(p, "LINK: Width Train:x%02i PHB:x%02i DEV:x%02i%s\n",
2639 	       trained_width, phb_width, dev_width, optimal_width ? "" : " *");
2640 	PHBDBG(p, "LINK: RX Errors Now:%i Max:%i Lane:0x%04x%s\n",
2641 	       rx_errs, rx_err_max, lane_errs, rx_err_ok ? "" : " *");
2642 
2643 	if (vdid)
2644 		*vdid = id;
2645 
2646 	/* Always do RX error retry irrespective of chip and card */
2647 	if (!rx_err_ok)
2648 		return false;
2649 
2650 	if (!retry_enabled)
2651 		return true;
2652 
2653 	return optimal;
2654 }
2655 
2656 /*
2657  * This is a trace function to watch what's happening duing pcie link
2658  * training.  If any errors are detected it simply returns so the
2659  * normal code can deal with it.
2660  */
phb4_link_trace(struct phb4 * p,uint64_t target_state,int max_ms)2661 static void phb4_link_trace(struct phb4 *p, uint64_t target_state, int max_ms)
2662 {
2663 	unsigned long now, end, start = mftb(), state = 0;
2664 	uint64_t trwctl, reg, reglast = -1;
2665 	bool enabled;
2666 
2667 	/*
2668 	 * Enable the DLP trace outputs. If we don't the LTSSM state in
2669 	 * PHB_PCIE_DLP_TRAIN_CTL won't be updated and always reads zero.
2670 	 */
2671 	trwctl = phb4_read_reg(p, PHB_PCIE_DLP_TRWCTL);
2672 	enabled = !!(trwctl & PHB_PCIE_DLP_TRWCTL_EN);
2673 	if (!enabled) {
2674 		phb4_write_reg(p, PHB_PCIE_DLP_TRWCTL,
2675 				trwctl | PHB_PCIE_DLP_TRWCTL_EN);
2676 	}
2677 
2678 	end = start + msecs_to_tb(max_ms);
2679 	now = start;
2680 
2681 	do {
2682 		reg = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
2683 		if (reg != reglast)
2684 			state = phb4_train_info(p, reg, now - start);
2685 		reglast = reg;
2686 
2687 		if (!phb4_check_reg(p, reg)) {
2688 			PHBNOTICE(p, "TRACE: PHB fenced.\n");
2689 			goto out;
2690 		}
2691 
2692 		if (tb_compare(now, end) == TB_AAFTERB) {
2693 			PHBNOTICE(p, "TRACE: Timed out after %dms\n", max_ms);
2694 			goto out;
2695 		}
2696 
2697 		now = mftb();
2698 	} while (state != target_state);
2699 
2700 	PHBNOTICE(p, "TRACE: Reached target state\n");
2701 
2702 out:
2703 	/*
2704 	 * The trace enable bit is a clock gate for the tracing logic. Turn
2705 	 * it off to save power if we're not using it otherwise.
2706 	 */
2707 	if (!enabled)
2708 		phb4_write_reg(p, PHB_PCIE_DLP_TRWCTL, trwctl);
2709 }
2710 
2711 /*
2712  * This helper is called repeatedly by the host sync notifier mechanism, which
2713  * relies on the kernel to regularly poll the OPAL_SYNC_HOST_REBOOT call as it
2714  * shuts down.
2715  */
phb4_host_sync_reset(void * data)2716 static bool phb4_host_sync_reset(void *data)
2717 {
2718 	struct phb4 *p = (struct phb4 *)data;
2719 	struct phb *phb = &p->phb;
2720 	int64_t rc = 0;
2721 
2722 	/* Make sure no-one modifies the phb flags while we are active */
2723 	phb_lock(phb);
2724 
2725 	/* Make sure CAPP is attached to the PHB */
2726 	if (p->capp)
2727 		/* Call phb ops to disable capi */
2728 		rc = phb->ops->set_capi_mode(phb, OPAL_PHB_CAPI_MODE_PCIE,
2729 				       p->capp->attached_pe);
2730 	else
2731 		rc = OPAL_SUCCESS;
2732 
2733 	/* Continue kicking state-machine if in middle of a mode transition */
2734 	if (rc == OPAL_BUSY)
2735 		rc = phb->slot->ops.run_sm(phb->slot);
2736 
2737 	phb_unlock(phb);
2738 
2739 	return rc <= OPAL_SUCCESS;
2740 }
2741 
2742 /*
2743  * Notification from the pci-core that a pci slot state machine completed.
2744  * We use this callback to mark the CAPP disabled if we were waiting for it.
2745  */
phb4_slot_sm_run_completed(struct pci_slot * slot,uint64_t err)2746 static int64_t phb4_slot_sm_run_completed(struct pci_slot *slot, uint64_t err)
2747 {
2748 	struct phb4 *p = phb_to_phb4(slot->phb);
2749 
2750 	/* Check if we are disabling the capp */
2751 	if (p->flags & PHB4_CAPP_DISABLE) {
2752 
2753 		/* Unset struct capp so that we dont fall into a creset loop */
2754 		p->flags &= ~(PHB4_CAPP_DISABLE);
2755 		p->capp->phb = NULL;
2756 		p->capp->attached_pe = phb4_get_reserved_pe_number(&p->phb);
2757 
2758 		/* Remove the host sync notifier is we are done.*/
2759 		opal_del_host_sync_notifier(phb4_host_sync_reset, p);
2760 		if (err) {
2761 			/* Force a CEC ipl reboot */
2762 			disable_fast_reboot("CAPP: reset failed");
2763 			PHBERR(p, "CAPP: Unable to reset. Error=%lld\n", err);
2764 		} else {
2765 			PHBINF(p, "CAPP: reset complete\n");
2766 		}
2767 	}
2768 
2769 	return OPAL_SUCCESS;
2770 }
2771 
phb4_poll_link(struct pci_slot * slot)2772 static int64_t phb4_poll_link(struct pci_slot *slot)
2773 {
2774 	struct phb4 *p = phb_to_phb4(slot->phb);
2775 	uint64_t reg;
2776 	uint32_t vdid;
2777 
2778 	switch (slot->state) {
2779 	case PHB4_SLOT_NORMAL:
2780 	case PHB4_SLOT_LINK_START:
2781 		PHBDBG(p, "LINK: Start polling\n");
2782 		slot->retries = PHB4_LINK_ELECTRICAL_RETRIES;
2783 		pci_slot_set_state(slot, PHB4_SLOT_LINK_WAIT_ELECTRICAL);
2784 		/* Polling early here has no chance of a false positive */
2785 		return pci_slot_set_sm_timeout(slot, msecs_to_tb(1));
2786 	case PHB4_SLOT_LINK_WAIT_ELECTRICAL:
2787 		/*
2788 		 * Wait for the link electrical connection to be
2789 		 * established (shorter timeout). This allows us to
2790 		 * workaround spurrious presence detect on some machines
2791 		 * without waiting 10s each time
2792 		 *
2793 		 * Note: We *also* check for the full link up bit here
2794 		 * because simics doesn't seem to implement the electrical
2795 		 * link bit at all
2796 		 */
2797 		reg = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
2798 		if (!phb4_check_reg(p, reg)) {
2799 			PHBERR(p, "PHB fence waiting for electrical link\n");
2800 			return phb4_retry_state(slot);
2801 		}
2802 
2803 		if (reg & (PHB_PCIE_DLP_INBAND_PRESENCE |
2804 			   PHB_PCIE_DLP_TL_LINKACT)) {
2805 			PHBDBG(p, "LINK: Electrical link detected\n");
2806 			pci_slot_set_state(slot, PHB4_SLOT_LINK_WAIT);
2807 			slot->retries = PHB4_LINK_WAIT_RETRIES;
2808 			/* No wait here since already have an elec link */
2809 			return pci_slot_set_sm_timeout(slot, msecs_to_tb(1));
2810 		}
2811 
2812 		if (slot->retries-- == 0) {
2813 			PHBDBG(p, "LINK: No in-band presence\n");
2814 			return OPAL_SUCCESS;
2815 		}
2816 		/* Retry */
2817 		return pci_slot_set_sm_timeout(slot, msecs_to_tb(10));
2818 	case PHB4_SLOT_LINK_WAIT:
2819 		reg = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
2820 		if (!phb4_check_reg(p, reg)) {
2821 			PHBERR(p, "LINK: PHB fence waiting for link training\n");
2822 			return phb4_retry_state(slot);
2823 		}
2824 		if (reg & PHB_PCIE_DLP_TL_LINKACT) {
2825 			PHBDBG(p, "LINK: Link is up\n");
2826 			phb4_prepare_link_change(slot, true);
2827 			pci_slot_set_state(slot, PHB4_SLOT_LINK_STABLE);
2828 			return pci_slot_set_sm_timeout(slot, secs_to_tb(1));
2829 		}
2830 
2831 		if (slot->retries-- == 0) {
2832 			PHBERR(p, "LINK: Timeout waiting for link up\n");
2833 			PHBDBG(p, "LINK: DLP train control: 0x%016llx\n", reg);
2834 			return phb4_retry_state(slot);
2835 		}
2836 		/* Retry */
2837 		return pci_slot_set_sm_timeout(slot, msecs_to_tb(10));
2838 	case PHB4_SLOT_LINK_STABLE:
2839 		/* Sanity check link */
2840 		if (phb4_fenced(p)) {
2841 			PHBERR(p, "LINK: PHB fenced waiting for stabilty\n");
2842 			return phb4_retry_state(slot);
2843 		}
2844 		reg = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
2845 		if (!phb4_check_reg(p, reg)) {
2846 			PHBERR(p, "LINK: PHB fence reading training control\n");
2847 			return phb4_retry_state(slot);
2848 		}
2849 		if (reg & PHB_PCIE_DLP_TL_LINKACT) {
2850 			PHBDBG(p, "LINK: Link is stable\n");
2851 			if (!phb4_link_optimal(slot, &vdid)) {
2852 				PHBDBG(p, "LINK: Link degraded\n");
2853 				if (slot->link_retries) {
2854 					phb4_lane_eq_change(p, vdid);
2855 					return phb4_retry_state(slot);
2856 				}
2857 				/*
2858 				 * Link is degraded but no more retries, so
2859 				 * settle for what we have :-(
2860 				 */
2861 				PHBERR(p, "LINK: Degraded but no more retries\n");
2862 			}
2863 			pci_restore_slot_bus_configs(slot);
2864 			pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
2865 			return OPAL_SUCCESS;
2866 		}
2867 		PHBERR(p, "LINK: Went down waiting for stabilty\n");
2868 		PHBDBG(p, "LINK: DLP train control: 0x%016llx\n", reg);
2869 		return phb4_retry_state(slot);
2870 	default:
2871 		PHBERR(p, "LINK: Unexpected slot state %08x\n",
2872 		       slot->state);
2873 	}
2874 
2875 	pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
2876 	return OPAL_HARDWARE;
2877 }
2878 
phb4_get_max_link_speed(struct phb4 * p,struct dt_node * np)2879 static unsigned int phb4_get_max_link_speed(struct phb4 *p, struct dt_node *np)
2880 {
2881 	unsigned int max_link_speed;
2882 	struct proc_chip *chip;
2883 	chip = get_chip(p->chip_id);
2884 
2885 	/* Priority order: NVRAM -> dt -> GEN3 dd2.00 -> GEN4 */
2886 	max_link_speed = 4;
2887 	if (p->rev == PHB4_REV_NIMBUS_DD20 &&
2888 	    ((0xf & chip->ec_level) == 0) && chip->ec_rev == 0)
2889 		max_link_speed = 3;
2890 	if (np) {
2891 		if (dt_has_node_property(np, "ibm,max-link-speed", NULL)) {
2892 			max_link_speed = dt_prop_get_u32(np, "ibm,max-link-speed");
2893 			p->dt_max_link_speed = max_link_speed;
2894 		}
2895 		else {
2896 			p->dt_max_link_speed = 0;
2897 		}
2898 	}
2899 	else {
2900 		if (p->dt_max_link_speed > 0) {
2901 			max_link_speed = p->dt_max_link_speed;
2902 		}
2903 	}
2904 	if (pcie_max_link_speed)
2905 		max_link_speed = pcie_max_link_speed;
2906 	if (max_link_speed > 4) /* clamp to 4 */
2907 		max_link_speed = 4;
2908 
2909 	return max_link_speed;
2910 }
2911 
phb4_assert_perst(struct pci_slot * slot,bool assert)2912 static void phb4_assert_perst(struct pci_slot *slot, bool assert)
2913 {
2914 	struct phb4 *p = phb_to_phb4(slot->phb);
2915 	uint16_t linkctl;
2916 	uint64_t reg;
2917 
2918 	/*
2919 	 * Disable the link before asserting PERST. The Cursed RAID card
2920 	 * in ozrom1 (9005:028c) has problems coming back if PERST is asserted
2921 	 * while link is active. To work around the problem we assert the link
2922 	 * disable bit before asserting PERST. Asserting the secondary reset
2923 	 * bit in the btctl register also works.
2924 	 */
2925 	phb4_pcicfg_read16(&p->phb, 0, p->ecap + PCICAP_EXP_LCTL, &linkctl);
2926 	reg = phb4_read_reg(p, PHB_PCIE_CRESET);
2927 
2928 	if (assert) {
2929 		linkctl |= PCICAP_EXP_LCTL_LINK_DIS;
2930 		reg &= ~PHB_PCIE_CRESET_PERST_N;
2931 	} else {
2932 		linkctl &= ~PCICAP_EXP_LCTL_LINK_DIS;
2933 		reg |= PHB_PCIE_CRESET_PERST_N;
2934 	}
2935 
2936 	phb4_write_reg(p, PHB_PCIE_CRESET, reg);
2937 	phb4_pcicfg_write16(&p->phb, 0, p->ecap + PCICAP_EXP_LCTL, linkctl);
2938 }
2939 
phb4_hreset(struct pci_slot * slot)2940 static int64_t phb4_hreset(struct pci_slot *slot)
2941 {
2942 	struct phb4 *p = phb_to_phb4(slot->phb);
2943 	uint16_t brctl;
2944 	uint8_t presence = 1;
2945 
2946 	switch (slot->state) {
2947 	case PHB4_SLOT_NORMAL:
2948 		PHBDBG(p, "HRESET: Starts\n");
2949 		if (slot->ops.get_presence_state)
2950 			slot->ops.get_presence_state(slot, &presence);
2951 		if (!presence) {
2952 			PHBDBG(p, "HRESET: No device\n");
2953 			return OPAL_SUCCESS;
2954 		}
2955 
2956 		PHBDBG(p, "HRESET: Prepare for link down\n");
2957 		phb4_prepare_link_change(slot, false);
2958 		/* fall through */
2959 	case PHB4_SLOT_HRESET_START:
2960 		PHBDBG(p, "HRESET: Assert\n");
2961 
2962 		phb4_pcicfg_read16(&p->phb, 0, PCI_CFG_BRCTL, &brctl);
2963 		brctl |= PCI_CFG_BRCTL_SECONDARY_RESET;
2964 		phb4_pcicfg_write16(&p->phb, 0, PCI_CFG_BRCTL, brctl);
2965 		pci_slot_set_state(slot, PHB4_SLOT_HRESET_DELAY);
2966 
2967 		return pci_slot_set_sm_timeout(slot, secs_to_tb(1));
2968 	case PHB4_SLOT_HRESET_DELAY:
2969 		PHBDBG(p, "HRESET: Deassert\n");
2970 
2971 		/* Clear link errors before we deassert reset */
2972 		phb4_err_clear_regb(p);
2973 
2974 		phb4_pcicfg_read16(&p->phb, 0, PCI_CFG_BRCTL, &brctl);
2975 		brctl &= ~PCI_CFG_BRCTL_SECONDARY_RESET;
2976 		phb4_pcicfg_write16(&p->phb, 0, PCI_CFG_BRCTL, brctl);
2977 
2978 		/*
2979 		 * Due to some oddball adapters bouncing the link
2980 		 * training a couple of times, we wait for a full second
2981 		 * before we start checking the link status, otherwise
2982 		 * we can get a spurrious link down interrupt which
2983 		 * causes us to EEH immediately.
2984 		 */
2985 		pci_slot_set_state(slot, PHB4_SLOT_HRESET_DELAY2);
2986 		return pci_slot_set_sm_timeout(slot, secs_to_tb(1));
2987 	case PHB4_SLOT_HRESET_DELAY2:
2988 		pci_slot_set_state(slot, PHB4_SLOT_LINK_START);
2989 		return slot->ops.poll_link(slot);
2990 	default:
2991 		PHBERR(p, "Unexpected slot state %08x\n", slot->state);
2992 	}
2993 
2994 	pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
2995 	return OPAL_HARDWARE;
2996 }
2997 
phb4_freset(struct pci_slot * slot)2998 static int64_t phb4_freset(struct pci_slot *slot)
2999 {
3000 	struct phb4 *p = phb_to_phb4(slot->phb);
3001 
3002 	switch(slot->state) {
3003 	case PHB4_SLOT_NORMAL:
3004 	case PHB4_SLOT_FRESET_START:
3005 		PHBDBG(p, "FRESET: Starts\n");
3006 
3007 		/* Reset max link speed for training */
3008 		p->max_link_speed = phb4_get_max_link_speed(p, NULL);
3009 
3010 		PHBDBG(p, "FRESET: Prepare for link down\n");
3011 		phb4_prepare_link_change(slot, false);
3012 
3013 		if (!p->skip_perst) {
3014 			PHBDBG(p, "FRESET: Assert\n");
3015 			phb4_assert_perst(slot, true);
3016 			pci_slot_set_state(slot, PHB4_SLOT_FRESET_ASSERT_DELAY);
3017 
3018 			/* 250ms assert time aligns with powernv */
3019 			return pci_slot_set_sm_timeout(slot, msecs_to_tb(250));
3020 		}
3021 
3022 		/* To skip the assert during boot time */
3023 		PHBDBG(p, "FRESET: Assert skipped\n");
3024 		pci_slot_set_state(slot, PHB4_SLOT_FRESET_ASSERT_DELAY);
3025 		p->skip_perst = false;
3026 		/* fall through */
3027 	case PHB4_SLOT_FRESET_ASSERT_DELAY:
3028 		/* Clear link errors before we deassert PERST */
3029 		phb4_err_clear_regb(p);
3030 
3031 		PHBDBG(p, "FRESET: Deassert\n");
3032 		phb4_assert_perst(slot, false);
3033 
3034 		if (pci_tracing)
3035 			phb4_link_trace(p, PHB_PCIE_DLP_LTSSM_L0, 3000);
3036 
3037 		pci_slot_set_state(slot, PHB4_SLOT_LINK_START);
3038 		return slot->ops.poll_link(slot);
3039 	default:
3040 		PHBERR(p, "Unexpected slot state %08x\n", slot->state);
3041 	}
3042 
3043 	pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
3044 	return OPAL_HARDWARE;
3045 }
3046 
load_capp_ucode(struct phb4 * p)3047 static int64_t load_capp_ucode(struct phb4 *p)
3048 {
3049 	int64_t rc;
3050 
3051 	if (p->index != CAPP0_PHB_INDEX && p->index != CAPP1_PHB_INDEX)
3052 		return OPAL_HARDWARE;
3053 
3054 	/* 0x434150504c494448 = 'CAPPLIDH' in ASCII */
3055 	rc = capp_load_ucode(p->chip_id, p->phb.opal_id, p->index,
3056 			0x434150504c494448UL, PHB4_CAPP_REG_OFFSET(p),
3057 			CAPP_APC_MASTER_ARRAY_ADDR_REG,
3058 			CAPP_APC_MASTER_ARRAY_WRITE_REG,
3059 			CAPP_SNP_ARRAY_ADDR_REG,
3060 			CAPP_SNP_ARRAY_WRITE_REG);
3061 	return rc;
3062 }
3063 
do_capp_recovery_scoms(struct phb4 * p)3064 static int do_capp_recovery_scoms(struct phb4 *p)
3065 {
3066 	uint64_t rc, reg, end;
3067 	uint64_t offset = PHB4_CAPP_REG_OFFSET(p);
3068 
3069 
3070 	/* Get the status of CAPP recovery */
3071 	xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, &reg);
3072 
3073 	/* No recovery in progress ignore */
3074 	if ((reg & PPC_BIT(0)) == 0) {
3075 		PHBDBG(p, "CAPP: No recovery in progress\n");
3076 		return OPAL_SUCCESS;
3077 	}
3078 
3079 	PHBDBG(p, "CAPP: Waiting for recovery to complete\n");
3080 	/* recovery timer failure period 168ms */
3081 	end = mftb() + msecs_to_tb(168);
3082 	while ((reg & (PPC_BIT(1) | PPC_BIT(5) | PPC_BIT(9))) == 0) {
3083 
3084 		time_wait_ms(5);
3085 		xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, &reg);
3086 
3087 		if (tb_compare(mftb(), end) != TB_ABEFOREB) {
3088 			PHBERR(p, "CAPP: Capp recovery Timed-out.\n");
3089 			end = 0;
3090 			break;
3091 		}
3092 	}
3093 
3094 	/* Check if the recovery failed or passed */
3095 	if (reg & PPC_BIT(1)) {
3096 		uint64_t act0, act1, mask, fir;
3097 
3098 		/* Use the Action0/1 and mask to only clear the bits
3099 		 * that cause local checkstop. Other bits needs attention
3100 		 * of the PRD daemon.
3101 		 */
3102 		xscom_read(p->chip_id, CAPP_FIR_ACTION0 + offset, &act0);
3103 		xscom_read(p->chip_id, CAPP_FIR_ACTION1 + offset, &act1);
3104 		xscom_read(p->chip_id, CAPP_FIR_MASK + offset, &mask);
3105 		xscom_read(p->chip_id, CAPP_FIR + offset, &fir);
3106 
3107 		fir = ~(fir & ~mask & act0 & act1);
3108 		PHBDBG(p, "Doing CAPP recovery scoms\n");
3109 
3110 		/* update capp fir clearing bits causing local checkstop */
3111 		PHBDBG(p, "Resetting CAPP Fir with mask 0x%016llX\n", fir);
3112 		xscom_write(p->chip_id, CAPP_FIR_CLEAR + offset, fir);
3113 
3114 		/* disable snoops */
3115 		xscom_write(p->chip_id, SNOOP_CAPI_CONFIG + offset, 0);
3116 		load_capp_ucode(p);
3117 
3118 		/* clear err rpt reg*/
3119 		xscom_write(p->chip_id, CAPP_ERR_RPT_CLR + offset, 0);
3120 
3121 		/* clear capp fir */
3122 		xscom_write(p->chip_id, CAPP_FIR + offset, 0);
3123 
3124 		/* Just reset Bit-0,1 and dont touch any other bit */
3125 		xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, &reg);
3126 		reg &= ~(PPC_BIT(0) | PPC_BIT(1));
3127 		xscom_write(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, reg);
3128 
3129 		PHBDBG(p, "CAPP recovery complete\n");
3130 		rc = OPAL_SUCCESS;
3131 
3132 	} else {
3133 		/* Most likely will checkstop here due to FIR ACTION for
3134 		 * failed recovery. So this message would never be logged.
3135 		 * But if we still enter here then return an error forcing a
3136 		 * fence of the PHB.
3137 		 */
3138 		if (reg  & PPC_BIT(5))
3139 			PHBERR(p, "CAPP: Capp recovery Failed\n");
3140 		else if (reg  & PPC_BIT(9))
3141 			PHBERR(p, "CAPP: Capp recovery hang detected\n");
3142 		else if (end != 0)
3143 			PHBERR(p, "CAPP: Unknown recovery failure\n");
3144 
3145 		PHBDBG(p, "CAPP: Err/Status-reg=0x%016llx\n", reg);
3146 		rc = OPAL_HARDWARE;
3147 	}
3148 
3149 	return rc;
3150 }
3151 
3152 /*
3153  * Disable CAPI mode on a PHB. Must be done while PHB is fenced and
3154  * not in recovery.
3155  */
disable_capi_mode(struct phb4 * p)3156 static void disable_capi_mode(struct phb4 *p)
3157 {
3158 	uint64_t reg;
3159 	struct capp *capp = p->capp;
3160 
3161 	PHBINF(p, "CAPP: Deactivating\n");
3162 
3163 	/* Check if CAPP attached to the PHB and active */
3164 	if (!capp || capp->phb != &p->phb) {
3165 		PHBDBG(p, "CAPP: Not attached to this PHB!\n");
3166 		return;
3167 	}
3168 
3169 	xscom_read(p->chip_id, p->pe_xscom + XPEC_NEST_CAPP_CNTL, &reg);
3170 	if (!(reg & PPC_BIT(0))) {
3171 		/* Not in CAPI mode, no action required */
3172 		PHBERR(p, "CAPP: Not enabled!\n");
3173 		return;
3174 	}
3175 
3176 	/* CAPP should already be out of recovery in this function */
3177 	capp_xscom_read(capp, CAPP_ERR_STATUS_CTRL, &reg);
3178 	if (reg & PPC_BIT(0)) {
3179 		PHBERR(p, "CAPP: Can't disable while still in recovery!\n");
3180 		return;
3181 	}
3182 
3183 	PHBINF(p, "CAPP: Disabling CAPI mode\n");
3184 
3185 	/* First Phase Reset CAPP Registers */
3186 	/* CAPP about to be disabled mark TLBI_FENCED and tlbi_psl_is_dead */
3187 	capp_xscom_write(capp, CAPP_ERR_STATUS_CTRL, PPC_BIT(3) | PPC_BIT(4));
3188 
3189 	/* Flush SUE uOP1 Register */
3190 	if (p->rev != PHB4_REV_NIMBUS_DD10)
3191 		capp_xscom_write(capp, FLUSH_SUE_UOP1, 0);
3192 
3193 	/* Release DMA/STQ engines */
3194 	capp_xscom_write(capp, APC_FSM_READ_MASK, 0ull);
3195 	capp_xscom_write(capp, XPT_FSM_RMM, 0ull);
3196 
3197 	/* Disable snoop */
3198 	capp_xscom_write(capp, SNOOP_CAPI_CONFIG, 0);
3199 
3200 	/* Clear flush SUE state map register */
3201 	capp_xscom_write(capp, FLUSH_SUE_STATE_MAP, 0);
3202 
3203 	/* Disable epoch timer */
3204 	capp_xscom_write(capp, EPOCH_RECOVERY_TIMERS_CTRL, 0);
3205 
3206 	/* CAPP Transport Control Register */
3207 	capp_xscom_write(capp, TRANSPORT_CONTROL, PPC_BIT(15));
3208 
3209 	/* Disable snooping */
3210 	capp_xscom_write(capp, SNOOP_CONTROL, 0);
3211 	capp_xscom_write(capp, SNOOP_CAPI_CONFIG, 0);
3212 
3213 	/* APC Master PB Control Register - disable examining cResps */
3214 	capp_xscom_write(capp, APC_MASTER_PB_CTRL, 0);
3215 
3216 	/* APC Master Config Register - de-select PHBs */
3217 	xscom_write_mask(p->chip_id, capp->capp_xscom_offset +
3218 			 APC_MASTER_CAPI_CTRL, 0, PPC_BITMASK(2, 3));
3219 
3220 	/* Clear all error registers */
3221 	capp_xscom_write(capp, CAPP_ERR_RPT_CLR, 0);
3222 	capp_xscom_write(capp, CAPP_FIR, 0);
3223 	capp_xscom_write(capp, CAPP_FIR_ACTION0, 0);
3224 	capp_xscom_write(capp, CAPP_FIR_ACTION1, 0);
3225 	capp_xscom_write(capp, CAPP_FIR_MASK, 0);
3226 
3227 	/* Second Phase Reset PEC/PHB Registers */
3228 
3229 	/* Reset the stack overrides if any */
3230 	xscom_write(p->chip_id, p->pci_xscom + XPEC_PCI_PRDSTKOVR, 0);
3231 	xscom_write(p->chip_id, p->pe_xscom +
3232 		    XPEC_NEST_READ_STACK_OVERRIDE, 0);
3233 
3234 	/* PE Bus AIB Mode Bits. Disable Tracing. Leave HOL Blocking as it is */
3235 	if (!(p->rev == PHB4_REV_NIMBUS_DD10) && p->index == CAPP1_PHB_INDEX)
3236 		xscom_write_mask(p->chip_id,
3237 				 p->pci_xscom + XPEC_PCI_PBAIB_HW_CONFIG, 0,
3238 				 PPC_BIT(30));
3239 
3240 	/* Reset for PCI to PB data movement */
3241 	xscom_write_mask(p->chip_id, p->pe_xscom + XPEC_NEST_PBCQ_HW_CONFIG,
3242 			 0, XPEC_NEST_PBCQ_HW_CONFIG_PBINIT);
3243 
3244 	/* Disable CAPP mode in PEC CAPP Control Register */
3245 	xscom_write(p->chip_id, p->pe_xscom + XPEC_NEST_CAPP_CNTL, 0ull);
3246 }
3247 
phb4_creset(struct pci_slot * slot)3248 static int64_t phb4_creset(struct pci_slot *slot)
3249 {
3250 	struct phb4 *p = phb_to_phb4(slot->phb);
3251 	struct capp *capp = p->capp;
3252 	uint64_t pbcq_status;
3253 	uint64_t creset_time, wait_time;
3254 
3255 	/* Don't even try fixing a broken PHB */
3256 	if (p->broken)
3257 		return OPAL_HARDWARE;
3258 
3259 	switch (slot->state) {
3260 	case PHB4_SLOT_NORMAL:
3261 	case PHB4_SLOT_CRESET_START:
3262 		PHBDBG(p, "CRESET: Starts\n");
3263 
3264 		p->creset_start_time = mftb();
3265 
3266 		phb4_prepare_link_change(slot, false);
3267 		/* Clear error inject register, preventing recursive errors */
3268 		xscom_write(p->chip_id, p->pe_xscom + 0x2, 0x0);
3269 
3270 		/* Prevent HMI when PHB gets fenced as we are disabling CAPP */
3271 		if (p->flags & PHB4_CAPP_DISABLE &&
3272 		    capp && capp->phb == slot->phb) {
3273 			/* Since no HMI, So set the recovery flag manually. */
3274 			p->flags |= PHB4_CAPP_RECOVERY;
3275 			xscom_write_mask(p->chip_id, capp->capp_xscom_offset +
3276 					 CAPP_FIR_MASK,
3277 					 PPC_BIT(31), PPC_BIT(31));
3278 		}
3279 
3280 		/* Force fence on the PHB to work around a non-existent PE */
3281 		if (!phb4_fenced(p))
3282 			xscom_write(p->chip_id, p->pe_stk_xscom + 0x2,
3283 				    0x0000002000000000UL);
3284 
3285 		/*
3286 		 * Force use of ASB for register access until the PHB has
3287 		 * been fully reset.
3288 		 */
3289 		p->flags |= PHB4_CFG_USE_ASB | PHB4_AIB_FENCED;
3290 
3291 		/* Assert PREST before clearing errors */
3292 		phb4_assert_perst(slot, true);
3293 
3294 		/* Clear errors, following the proper sequence */
3295 		phb4_err_clear(p);
3296 
3297 		/* Actual reset */
3298 		xscom_write(p->chip_id, p->pci_stk_xscom + XPEC_PCI_STK_ETU_RESET,
3299 			    0x8000000000000000UL);
3300 
3301 		/* Read errors in PFIR and NFIR */
3302 		xscom_read(p->chip_id, p->pci_stk_xscom + 0x0, &p->pfir_cache);
3303 		xscom_read(p->chip_id, p->pe_stk_xscom + 0x0, &p->nfir_cache);
3304 
3305 		pci_slot_set_state(slot, PHB4_SLOT_CRESET_WAIT_CQ);
3306 		slot->retries = 500;
3307 		return pci_slot_set_sm_timeout(slot, msecs_to_tb(10));
3308 	case PHB4_SLOT_CRESET_WAIT_CQ:
3309 
3310 		// Wait until operations are complete
3311 		xscom_read(p->chip_id, p->pe_stk_xscom + 0xc, &pbcq_status);
3312 		if (!(pbcq_status & 0xC000000000000000UL)) {
3313 			PHBDBG(p, "CRESET: No pending transactions\n");
3314 
3315 			/* capp recovery */
3316 			if ((p->flags & PHB4_CAPP_RECOVERY) &&
3317 			    (do_capp_recovery_scoms(p) != OPAL_SUCCESS))
3318 				goto error;
3319 
3320 			if (p->flags & PHB4_CAPP_DISABLE)
3321 				disable_capi_mode(p);
3322 
3323 			/* Clear errors in PFIR and NFIR */
3324 			xscom_write(p->chip_id, p->pci_stk_xscom + 0x1,
3325 				    ~p->pfir_cache);
3326 			xscom_write(p->chip_id, p->pe_stk_xscom + 0x1,
3327 				    ~p->nfir_cache);
3328 
3329 			/* Re-read errors in PFIR and NFIR and reset any new
3330 			 * error reported.
3331 			 */
3332 			xscom_read(p->chip_id, p->pci_stk_xscom +
3333 				   XPEC_PCI_STK_PCI_FIR, &p->pfir_cache);
3334 			xscom_read(p->chip_id, p->pe_stk_xscom +
3335 				   XPEC_NEST_STK_PCI_NFIR, &p->nfir_cache);
3336 
3337 			if (p->pfir_cache || p->nfir_cache) {
3338 				PHBERR(p, "CRESET: PHB still fenced !!\n");
3339 				phb4_dump_pec_err_regs(p);
3340 
3341 				/* Reset the PHB errors */
3342 				xscom_write(p->chip_id, p->pci_stk_xscom +
3343 					    XPEC_PCI_STK_PCI_FIR, 0);
3344 				xscom_write(p->chip_id, p->pe_stk_xscom +
3345 					    XPEC_NEST_STK_PCI_NFIR, 0);
3346 			}
3347 
3348 			/* Clear PHB from reset */
3349 			xscom_write(p->chip_id,
3350 				    p->pci_stk_xscom + XPEC_PCI_STK_ETU_RESET, 0x0);
3351 
3352 			pci_slot_set_state(slot, PHB4_SLOT_CRESET_REINIT);
3353 			/* After lifting PHB reset, wait while logic settles */
3354 			return pci_slot_set_sm_timeout(slot, msecs_to_tb(10));
3355 		}
3356 
3357 		if (slot->retries-- == 0) {
3358 			PHBERR(p, "Timeout waiting for pending transaction\n");
3359 			goto error;
3360 		}
3361 		return pci_slot_set_sm_timeout(slot, msecs_to_tb(100));
3362 	case PHB4_SLOT_CRESET_REINIT:
3363 		PHBDBG(p, "CRESET: Reinitialization\n");
3364 		p->flags &= ~PHB4_AIB_FENCED;
3365 		p->flags &= ~PHB4_CAPP_RECOVERY;
3366 		p->flags &= ~PHB4_CFG_USE_ASB;
3367 		phb4_init_hw(p);
3368 		pci_slot_set_state(slot, PHB4_SLOT_CRESET_FRESET);
3369 
3370 		/*
3371 		 * The PERST is sticky across resets, but LINK_DIS isn't.
3372 		 * Re-assert it here now that we've reset the PHB.
3373 		 */
3374 		phb4_assert_perst(slot, true);
3375 
3376 		/*
3377 		 * wait either 100ms (for the ETU logic) or until we've had
3378 		 * PERST asserted for 250ms.
3379 		 */
3380 		creset_time = tb_to_msecs(mftb() - p->creset_start_time);
3381 		if (creset_time < 250)
3382 			wait_time = max(100, 250 - creset_time);
3383 		else
3384 			wait_time = 100;
3385 		PHBDBG(p, "CRESET: wait_time = %lld\n", wait_time);
3386 		return pci_slot_set_sm_timeout(slot, msecs_to_tb(wait_time));
3387 
3388 	case PHB4_SLOT_CRESET_FRESET:
3389 		/*
3390 		 * We asserted PERST at the beginning of the CRESET and we
3391 		 * have waited long enough, so we can skip it in the freset
3392 		 * procedure.
3393 		 */
3394 		p->skip_perst = true;
3395 		pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
3396 		return slot->ops.freset(slot);
3397 	default:
3398 		PHBERR(p, "CRESET: Unexpected slot state %08x, resetting...\n",
3399 		       slot->state);
3400 		pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
3401 		return slot->ops.creset(slot);
3402 
3403 	}
3404 
3405 error:
3406 	/* Mark the PHB as dead and expect it to be removed */
3407 	p->broken = true;
3408 	return OPAL_HARDWARE;
3409 }
3410 
3411 /*
3412  * Initialize root complex slot, which is mainly used to
3413  * do fundamental reset before PCI enumeration in PCI core.
3414  * When probing root complex and building its real slot,
3415  * the operations will be copied over.
3416  */
phb4_slot_create(struct phb * phb)3417 static struct pci_slot *phb4_slot_create(struct phb *phb)
3418 {
3419 	struct pci_slot *slot;
3420 
3421 	slot = pci_slot_alloc(phb, NULL);
3422 	if (!slot)
3423 		return slot;
3424 
3425 	/* Elementary functions */
3426 	slot->ops.get_presence_state  = phb4_get_presence_state;
3427 	slot->ops.get_link_state      = phb4_get_link_state;
3428 	slot->ops.get_power_state     = NULL;
3429 	slot->ops.get_attention_state = NULL;
3430 	slot->ops.get_latch_state     = NULL;
3431 	slot->ops.set_power_state     = NULL;
3432 	slot->ops.set_attention_state = NULL;
3433 
3434 	/*
3435 	 * For PHB slots, we have to split the fundamental reset
3436 	 * into 2 steps. We might not have the first step which
3437 	 * is to power off/on the slot, or it's controlled by
3438 	 * individual platforms.
3439 	 */
3440 	slot->ops.prepare_link_change	= phb4_prepare_link_change;
3441 	slot->ops.poll_link		= phb4_poll_link;
3442 	slot->ops.hreset		= phb4_hreset;
3443 	slot->ops.freset		= phb4_freset;
3444 	slot->ops.creset		= phb4_creset;
3445 	slot->ops.completed_sm_run	= phb4_slot_sm_run_completed;
3446 	slot->link_retries		= PHB4_LINK_LINK_RETRIES;
3447 
3448 	return slot;
3449 }
3450 
phb4_get_pesta(struct phb4 * p,uint64_t pe_number)3451 static uint64_t phb4_get_pesta(struct phb4 *p, uint64_t pe_number)
3452 {
3453 	uint64_t pesta, *pPEST;
3454 
3455 	pPEST = (uint64_t *)p->tbl_pest;
3456 
3457 	phb4_ioda_sel(p, IODA3_TBL_PESTA, pe_number, false);
3458 	pesta = phb4_read_reg(p, PHB_IODA_DATA0);
3459 	if (pesta & IODA3_PESTA_MMIO_FROZEN)
3460 		pesta |= pPEST[2*pe_number];
3461 
3462 	return pesta;
3463 }
3464 
3465 /* Check if the chip requires escalating a freeze to fence on MMIO loads */
phb4_escalation_required(void)3466 static bool phb4_escalation_required(void)
3467 {
3468 	uint64_t pvr = mfspr(SPR_PVR);
3469 
3470 	/*
3471 	 * Escalation is required on the following chip versions:
3472 	 * - Cumulus DD1.0
3473 	 * - Nimbus DD2.0, DD2.1 (and DD1.0, but it is unsupported so no check).
3474 	 */
3475 	if (pvr & PVR_POWER9_CUMULUS) {
3476 		if (PVR_VERS_MAJ(pvr) == 1 && PVR_VERS_MIN(pvr) == 0)
3477 			return true;
3478 	} else { /* Nimbus */
3479 		if (PVR_VERS_MAJ(pvr) == 2 && PVR_VERS_MIN(pvr) < 2)
3480 			return true;
3481 	}
3482 
3483 	return false;
3484 }
3485 
phb4_freeze_escalate(uint64_t pesta)3486 static bool phb4_freeze_escalate(uint64_t pesta)
3487 {
3488 	if ((GETFIELD(IODA3_PESTA_TRANS_TYPE, pesta) ==
3489 	     IODA3_PESTA_TRANS_TYPE_MMIOLOAD) &&
3490 	    (pesta & (IODA3_PESTA_CA_CMPLT_TMT | IODA3_PESTA_UR)))
3491 		return true;
3492 	return false;
3493 }
3494 
phb4_eeh_freeze_status(struct phb * phb,uint64_t pe_number,uint8_t * freeze_state,uint16_t * pci_error_type,uint16_t * severity)3495 static int64_t phb4_eeh_freeze_status(struct phb *phb, uint64_t pe_number,
3496 				      uint8_t *freeze_state,
3497 				      uint16_t *pci_error_type,
3498 				      uint16_t *severity)
3499 {
3500 	struct phb4 *p = phb_to_phb4(phb);
3501 	uint64_t peev_bit = PPC_BIT(pe_number & 0x3f);
3502 	uint64_t peev, pesta, pestb;
3503 
3504 	/* Defaults: not frozen */
3505 	*freeze_state = OPAL_EEH_STOPPED_NOT_FROZEN;
3506 	*pci_error_type = OPAL_EEH_NO_ERROR;
3507 
3508 	/* Check dead */
3509 	if (p->broken) {
3510 		*freeze_state = OPAL_EEH_STOPPED_MMIO_DMA_FREEZE;
3511 		*pci_error_type = OPAL_EEH_PHB_ERROR;
3512 		if (severity)
3513 			*severity = OPAL_EEH_SEV_PHB_DEAD;
3514 		return OPAL_HARDWARE;
3515 	}
3516 
3517 	/* Check fence and CAPP recovery */
3518 	if (phb4_fenced(p) || (p->flags & PHB4_CAPP_RECOVERY)) {
3519 		*freeze_state = OPAL_EEH_STOPPED_MMIO_DMA_FREEZE;
3520 		*pci_error_type = OPAL_EEH_PHB_ERROR;
3521 		if (severity)
3522 			*severity = OPAL_EEH_SEV_PHB_FENCED;
3523 		return OPAL_SUCCESS;
3524 	}
3525 
3526 	/* Check the PEEV */
3527 	phb4_ioda_sel(p, IODA3_TBL_PEEV, pe_number / 64, false);
3528 	peev = in_be64(p->regs + PHB_IODA_DATA0);
3529 	if (!(peev & peev_bit))
3530 		return OPAL_SUCCESS;
3531 
3532 	/* Indicate that we have an ER pending */
3533 	phb4_set_err_pending(p, true);
3534 	if (severity)
3535 		*severity = OPAL_EEH_SEV_PE_ER;
3536 
3537 	/* Read the full PESTA */
3538 	pesta = phb4_get_pesta(p, pe_number);
3539 	/* Check if we need to escalate to fence */
3540 	if (phb4_escalation_required() && phb4_freeze_escalate(pesta)) {
3541 		PHBERR(p, "Escalating freeze to fence PESTA[%lli]=%016llx\n",
3542 		       pe_number, pesta);
3543 		*severity = OPAL_EEH_SEV_PHB_FENCED;
3544 		*pci_error_type = OPAL_EEH_PHB_ERROR;
3545 	}
3546 
3547 	/* Read the PESTB in the PHB */
3548 	phb4_ioda_sel(p, IODA3_TBL_PESTB, pe_number, false);
3549 	pestb = phb4_read_reg(p, PHB_IODA_DATA0);
3550 
3551 	/* Convert PESTA/B to freeze_state */
3552 	if (pesta & IODA3_PESTA_MMIO_FROZEN)
3553 		*freeze_state |= OPAL_EEH_STOPPED_MMIO_FREEZE;
3554 	if (pestb & IODA3_PESTB_DMA_STOPPED)
3555 		*freeze_state |= OPAL_EEH_STOPPED_DMA_FREEZE;
3556 
3557 	return OPAL_SUCCESS;
3558 }
3559 
phb4_eeh_freeze_clear(struct phb * phb,uint64_t pe_number,uint64_t eeh_action_token)3560 static int64_t phb4_eeh_freeze_clear(struct phb *phb, uint64_t pe_number,
3561 				     uint64_t eeh_action_token)
3562 {
3563 	struct phb4 *p = phb_to_phb4(phb);
3564 	uint64_t err, peev;
3565 	int32_t i;
3566 	bool frozen_pe = false;
3567 
3568 	if (p->broken)
3569 		return OPAL_HARDWARE;
3570 
3571 	/* Summary. If nothing, move to clearing the PESTs which can
3572 	 * contain a freeze state from a previous error or simply set
3573 	 * explicitely by the user
3574 	 */
3575 	err = in_be64(p->regs + PHB_ETU_ERR_SUMMARY);
3576 	if (err == 0xffffffffffffffffUL) {
3577 		if (phb4_fenced(p)) {
3578 			PHBERR(p, "eeh_freeze_clear on fenced PHB\n");
3579 			return OPAL_HARDWARE;
3580 		}
3581 	}
3582 	if (err != 0)
3583 		phb4_err_clear(p);
3584 
3585 	/*
3586 	 * We have PEEV in system memory. It would give more performance
3587 	 * to access that directly.
3588 	 */
3589 	if (eeh_action_token & OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO) {
3590 		phb4_ioda_sel(p, IODA3_TBL_PESTA, pe_number, false);
3591 		out_be64(p->regs + PHB_IODA_DATA0, 0);
3592 	}
3593 	if (eeh_action_token & OPAL_EEH_ACTION_CLEAR_FREEZE_DMA) {
3594 		phb4_ioda_sel(p, IODA3_TBL_PESTB, pe_number, false);
3595 		out_be64(p->regs + PHB_IODA_DATA0, 0);
3596 	}
3597 
3598 
3599 	/* Update ER pending indication */
3600 	phb4_ioda_sel(p, IODA3_TBL_PEEV, 0, true);
3601 	for (i = 0; i < p->num_pes/64; i++) {
3602 		peev = in_be64(p->regs + PHB_IODA_DATA0);
3603 		if (peev) {
3604 			frozen_pe = true;
3605 			break;
3606 		}
3607 	}
3608 	if (frozen_pe) {
3609 		p->err.err_src	 = PHB4_ERR_SRC_PHB;
3610 		p->err.err_class = PHB4_ERR_CLASS_ER;
3611 		p->err.err_bit   = -1;
3612 		phb4_set_err_pending(p, true);
3613 	} else
3614 		phb4_set_err_pending(p, false);
3615 
3616 	return OPAL_SUCCESS;
3617 }
3618 
phb4_eeh_freeze_set(struct phb * phb,uint64_t pe_number,uint64_t eeh_action_token)3619 static int64_t phb4_eeh_freeze_set(struct phb *phb, uint64_t pe_number,
3620 				   uint64_t eeh_action_token)
3621 {
3622 	struct phb4 *p = phb_to_phb4(phb);
3623 	uint64_t data;
3624 
3625 	if (p->broken)
3626 		return OPAL_HARDWARE;
3627 
3628 	if (pe_number >= p->num_pes)
3629 		return OPAL_PARAMETER;
3630 
3631 	if (eeh_action_token != OPAL_EEH_ACTION_SET_FREEZE_MMIO &&
3632 	    eeh_action_token != OPAL_EEH_ACTION_SET_FREEZE_DMA &&
3633 	    eeh_action_token != OPAL_EEH_ACTION_SET_FREEZE_ALL)
3634 		return OPAL_PARAMETER;
3635 
3636 	if (eeh_action_token & OPAL_EEH_ACTION_SET_FREEZE_MMIO) {
3637 		phb4_ioda_sel(p, IODA3_TBL_PESTA, pe_number, false);
3638 		data = in_be64(p->regs + PHB_IODA_DATA0);
3639 		data |= IODA3_PESTA_MMIO_FROZEN;
3640 		out_be64(p->regs + PHB_IODA_DATA0, data);
3641 	}
3642 
3643 	if (eeh_action_token & OPAL_EEH_ACTION_SET_FREEZE_DMA) {
3644 		phb4_ioda_sel(p, IODA3_TBL_PESTB, pe_number, false);
3645 		data = in_be64(p->regs + PHB_IODA_DATA0);
3646 		data |= IODA3_PESTB_DMA_STOPPED;
3647 		out_be64(p->regs + PHB_IODA_DATA0, data);
3648 	}
3649 
3650 	return OPAL_SUCCESS;
3651 }
3652 
phb4_eeh_next_error(struct phb * phb,uint64_t * first_frozen_pe,uint16_t * pci_error_type,uint16_t * severity)3653 static int64_t phb4_eeh_next_error(struct phb *phb,
3654 				   uint64_t *first_frozen_pe,
3655 				   uint16_t *pci_error_type,
3656 				   uint16_t *severity)
3657 {
3658 	struct phb4 *p = phb_to_phb4(phb);
3659 	uint64_t peev, pesta;
3660 	uint32_t peev_size = p->num_pes/64;
3661 	int32_t i, j;
3662 
3663 	/* If the PHB is broken, we needn't go forward */
3664 	if (p->broken) {
3665 		*pci_error_type = OPAL_EEH_PHB_ERROR;
3666 		*severity = OPAL_EEH_SEV_PHB_DEAD;
3667 		return OPAL_SUCCESS;
3668 	}
3669 
3670 	if ((p->flags & PHB4_CAPP_RECOVERY)) {
3671 		*pci_error_type = OPAL_EEH_PHB_ERROR;
3672 		*severity = OPAL_EEH_SEV_PHB_FENCED;
3673 		return OPAL_SUCCESS;
3674 	}
3675 
3676 	/*
3677 	 * Check if we already have pending errors. If that's
3678 	 * the case, then to get more information about the
3679 	 * pending errors. Here we try PBCQ prior to PHB.
3680 	 */
3681 	if (phb4_err_pending(p) /*&&
3682 	    !phb4_err_check_pbcq(p) &&
3683 	    !phb4_err_check_lem(p) */)
3684 		phb4_set_err_pending(p, false);
3685 
3686 	/* Clear result */
3687 	*pci_error_type  = OPAL_EEH_NO_ERROR;
3688 	*severity	 = OPAL_EEH_SEV_NO_ERROR;
3689 	*first_frozen_pe = (uint64_t)-1;
3690 
3691 	/* Check frozen PEs */
3692 	if (!phb4_err_pending(p)) {
3693 		phb4_ioda_sel(p, IODA3_TBL_PEEV, 0, true);
3694 		for (i = 0; i < peev_size; i++) {
3695 			peev = in_be64(p->regs + PHB_IODA_DATA0);
3696 			if (peev) {
3697 				p->err.err_src	 = PHB4_ERR_SRC_PHB;
3698 				p->err.err_class = PHB4_ERR_CLASS_ER;
3699 				p->err.err_bit	 = -1;
3700 				phb4_set_err_pending(p, true);
3701 				break;
3702 			}
3703 		}
3704 	}
3705 
3706 	if (!phb4_err_pending(p))
3707 		return OPAL_SUCCESS;
3708 	/*
3709 	 * If the frozen PE is caused by a malfunctioning TLP, we
3710 	 * need reset the PHB. So convert ER to PHB-fatal error
3711 	 * for the case.
3712 	 */
3713 	if (p->err.err_class == PHB4_ERR_CLASS_ER) {
3714 		for (i = peev_size - 1; i >= 0; i--) {
3715 			phb4_ioda_sel(p, IODA3_TBL_PEEV, i, false);
3716 			peev = in_be64(p->regs + PHB_IODA_DATA0);
3717 			for (j = 0; j < 64; j++) {
3718 				if (peev & PPC_BIT(j)) {
3719 					*first_frozen_pe = i * 64 + j;
3720 					break;
3721 				}
3722 			}
3723 			if (*first_frozen_pe != (uint64_t)(-1))
3724 				break;
3725 		}
3726 	}
3727 
3728 	if (*first_frozen_pe != (uint64_t)(-1)) {
3729 		pesta = phb4_get_pesta(p, *first_frozen_pe);
3730 		if (phb4_freeze_escalate(pesta)) {
3731 			PHBINF(p, "Escalating freeze to fence. PESTA[%lli]=%016llx\n",
3732 			       *first_frozen_pe, pesta);
3733 			p->err.err_class = PHB4_ERR_CLASS_FENCED;
3734 		}
3735 	}
3736 
3737 	switch (p->err.err_class) {
3738 	case PHB4_ERR_CLASS_DEAD:
3739 		*pci_error_type = OPAL_EEH_PHB_ERROR;
3740 		*severity = OPAL_EEH_SEV_PHB_DEAD;
3741 		break;
3742 	case PHB4_ERR_CLASS_FENCED:
3743 		*pci_error_type = OPAL_EEH_PHB_ERROR;
3744 		*severity = OPAL_EEH_SEV_PHB_FENCED;
3745 		break;
3746 	case PHB4_ERR_CLASS_ER:
3747 		*pci_error_type = OPAL_EEH_PE_ERROR;
3748 		*severity = OPAL_EEH_SEV_PE_ER;
3749 
3750 		/* No frozen PE ? */
3751 		if (*first_frozen_pe == (uint64_t)-1) {
3752 			*pci_error_type = OPAL_EEH_NO_ERROR;
3753 			*severity = OPAL_EEH_SEV_NO_ERROR;
3754 			phb4_set_err_pending(p, false);
3755 		}
3756 
3757 		break;
3758 	case PHB4_ERR_CLASS_INF:
3759 		*pci_error_type = OPAL_EEH_PHB_ERROR;
3760 		*severity = OPAL_EEH_SEV_INF;
3761 		break;
3762 	default:
3763 		*pci_error_type = OPAL_EEH_NO_ERROR;
3764 		*severity = OPAL_EEH_SEV_NO_ERROR;
3765 		phb4_set_err_pending(p, false);
3766 	}
3767 	return OPAL_SUCCESS;
3768 }
3769 
phb4_err_inject_finalize(struct phb4 * phb,uint64_t addr,uint64_t mask,uint64_t ctrl,bool is_write)3770 static int64_t phb4_err_inject_finalize(struct phb4 *phb, uint64_t addr,
3771 					uint64_t mask, uint64_t ctrl,
3772 					bool is_write)
3773 {
3774 	if (is_write)
3775 		ctrl |= PHB_PAPR_ERR_INJ_CTL_WR;
3776 	else
3777 		ctrl |= PHB_PAPR_ERR_INJ_CTL_RD;
3778 
3779 	out_be64(phb->regs + PHB_PAPR_ERR_INJ_ADDR, addr);
3780 	out_be64(phb->regs + PHB_PAPR_ERR_INJ_MASK, mask);
3781 	out_be64(phb->regs + PHB_PAPR_ERR_INJ_CTL, ctrl);
3782 
3783 	return OPAL_SUCCESS;
3784 }
3785 
phb4_err_inject_mem32(struct phb4 * phb __unused,uint64_t pe_number __unused,uint64_t addr __unused,uint64_t mask __unused,bool is_write __unused)3786 static int64_t phb4_err_inject_mem32(struct phb4 *phb __unused,
3787 				     uint64_t pe_number __unused,
3788 				     uint64_t addr __unused,
3789 				     uint64_t mask __unused,
3790 				     bool is_write __unused)
3791 {
3792 	return OPAL_UNSUPPORTED;
3793 }
3794 
phb4_err_inject_mem64(struct phb4 * phb __unused,uint64_t pe_number __unused,uint64_t addr __unused,uint64_t mask __unused,bool is_write __unused)3795 static int64_t phb4_err_inject_mem64(struct phb4 *phb __unused,
3796 				     uint64_t pe_number __unused,
3797 				     uint64_t addr __unused,
3798 				     uint64_t mask __unused,
3799 				     bool is_write __unused)
3800 {
3801 	return OPAL_UNSUPPORTED;
3802 }
3803 
phb4_err_inject_cfg(struct phb4 * phb,uint64_t pe_number,uint64_t addr,uint64_t mask,bool is_write)3804 static int64_t phb4_err_inject_cfg(struct phb4 *phb, uint64_t pe_number,
3805 				   uint64_t addr, uint64_t mask,
3806 				   bool is_write)
3807 {
3808 	uint64_t a, m, prefer, ctrl;
3809 	int bdfn;
3810 	bool is_bus_pe = false;
3811 
3812 	a = 0xffffull;
3813 	prefer = 0xffffull;
3814 	m = PHB_PAPR_ERR_INJ_MASK_CFG_ALL;
3815 	ctrl = PHB_PAPR_ERR_INJ_CTL_CFG;
3816 
3817 	for (bdfn = 0; bdfn < RTT_TABLE_ENTRIES; bdfn++) {
3818 		if (phb->tbl_rtt[bdfn] != pe_number)
3819 			continue;
3820 
3821 		/* The PE can be associated with PCI bus or device */
3822 		is_bus_pe = false;
3823 		if ((bdfn + 8) < RTT_TABLE_ENTRIES &&
3824 		    phb->tbl_rtt[bdfn + 8] == pe_number)
3825 			is_bus_pe = true;
3826 
3827 		/* Figure out the PCI config address */
3828 		if (prefer == 0xffffull) {
3829 			if (is_bus_pe) {
3830 				m = PHB_PAPR_ERR_INJ_MASK_CFG;
3831 				prefer = SETFIELD(m, 0x0ull, (bdfn >> 8));
3832 			} else {
3833 				m = PHB_PAPR_ERR_INJ_MASK_CFG_ALL;
3834 				prefer = SETFIELD(m, 0x0ull, bdfn);
3835 			}
3836 		}
3837 
3838 		/* Check the input address is valid or not */
3839 		if (!is_bus_pe &&
3840 		    GETFIELD(PHB_PAPR_ERR_INJ_MASK_CFG_ALL, addr) == bdfn) {
3841 			a = addr;
3842 			break;
3843 		}
3844 
3845 		if (is_bus_pe &&
3846 		    GETFIELD(PHB_PAPR_ERR_INJ_MASK_CFG, addr) == (bdfn >> 8)) {
3847 			a = addr;
3848 			break;
3849 		}
3850 	}
3851 
3852 	/* Invalid PE number */
3853 	if (prefer == 0xffffull)
3854 		return OPAL_PARAMETER;
3855 
3856 	/* Specified address is out of range */
3857 	if (a == 0xffffull)
3858 		a = prefer;
3859 	else
3860 		m = mask;
3861 
3862 	return phb4_err_inject_finalize(phb, a, m, ctrl, is_write);
3863 }
3864 
phb4_err_inject_dma(struct phb4 * phb __unused,uint64_t pe_number __unused,uint64_t addr __unused,uint64_t mask __unused,bool is_write __unused,bool is_64bits __unused)3865 static int64_t phb4_err_inject_dma(struct phb4 *phb __unused,
3866 				   uint64_t pe_number __unused,
3867 				   uint64_t addr __unused,
3868 				   uint64_t mask __unused,
3869 				   bool is_write __unused,
3870 				   bool is_64bits __unused)
3871 {
3872 	return OPAL_UNSUPPORTED;
3873 }
3874 
phb4_err_inject_dma32(struct phb4 * phb,uint64_t pe_number,uint64_t addr,uint64_t mask,bool is_write)3875 static int64_t phb4_err_inject_dma32(struct phb4 *phb, uint64_t pe_number,
3876 				     uint64_t addr, uint64_t mask,
3877 				     bool is_write)
3878 {
3879 	return phb4_err_inject_dma(phb, pe_number, addr, mask, is_write, false);
3880 }
3881 
phb4_err_inject_dma64(struct phb4 * phb,uint64_t pe_number,uint64_t addr,uint64_t mask,bool is_write)3882 static int64_t phb4_err_inject_dma64(struct phb4 *phb, uint64_t pe_number,
3883 				     uint64_t addr, uint64_t mask,
3884 				     bool is_write)
3885 {
3886 	return phb4_err_inject_dma(phb, pe_number, addr, mask, is_write, true);
3887 }
3888 
3889 
phb4_err_inject(struct phb * phb,uint64_t pe_number,uint32_t type,uint32_t func,uint64_t addr,uint64_t mask)3890 static int64_t phb4_err_inject(struct phb *phb, uint64_t pe_number,
3891 			       uint32_t type, uint32_t func,
3892 			       uint64_t addr, uint64_t mask)
3893 {
3894 	struct phb4 *p = phb_to_phb4(phb);
3895 	int64_t (*handler)(struct phb4 *p, uint64_t pe_number,
3896 			   uint64_t addr, uint64_t mask, bool is_write);
3897 	bool is_write;
3898 
3899 	/* We can't inject error to the reserved PE */
3900 	if (pe_number == PHB4_RESERVED_PE_NUM(p) || pe_number >= p->num_pes)
3901 		return OPAL_PARAMETER;
3902 
3903 	/* Clear leftover from last time */
3904 	out_be64(p->regs + PHB_PAPR_ERR_INJ_CTL, 0x0ul);
3905 
3906 	switch (func) {
3907 	case OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR:
3908 	case OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_DATA:
3909 		is_write = false;
3910 		if (type == OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64)
3911 			handler = phb4_err_inject_mem64;
3912 		else
3913 			handler = phb4_err_inject_mem32;
3914 		break;
3915 	case OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_ADDR:
3916 	case OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_DATA:
3917 		is_write = true;
3918 		if (type == OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64)
3919 			handler = phb4_err_inject_mem64;
3920 		else
3921 			handler = phb4_err_inject_mem32;
3922 		break;
3923 	case OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_ADDR:
3924 	case OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_DATA:
3925 		is_write = false;
3926 		handler = phb4_err_inject_cfg;
3927 		break;
3928 	case OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_ADDR:
3929 	case OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_DATA:
3930 		is_write = true;
3931 		handler = phb4_err_inject_cfg;
3932 		break;
3933 	case OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_ADDR:
3934 	case OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_DATA:
3935 	case OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_MASTER:
3936 	case OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_TARGET:
3937 		is_write = false;
3938 		if (type == OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64)
3939 			handler = phb4_err_inject_dma64;
3940 		else
3941 			handler = phb4_err_inject_dma32;
3942 		break;
3943 	case OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_ADDR:
3944 	case OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_DATA:
3945 	case OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_MASTER:
3946 	case OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET:
3947 		is_write = true;
3948 		if (type == OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64)
3949 			handler = phb4_err_inject_dma64;
3950 		else
3951 			handler = phb4_err_inject_dma32;
3952 		break;
3953 	default:
3954 		return OPAL_PARAMETER;
3955 	}
3956 
3957 	return handler(p, pe_number, addr, mask, is_write);
3958 }
3959 
phb4_get_diag_data(struct phb * phb,void * diag_buffer,uint64_t diag_buffer_len)3960 static int64_t phb4_get_diag_data(struct phb *phb,
3961 				  void *diag_buffer,
3962 				  uint64_t diag_buffer_len)
3963 {
3964 	bool fenced;
3965 	struct phb4 *p = phb_to_phb4(phb);
3966 	struct OpalIoPhb4ErrorData *data = diag_buffer;
3967 
3968 	if (diag_buffer_len < sizeof(struct OpalIoPhb4ErrorData))
3969 		return OPAL_PARAMETER;
3970 	if (p->broken)
3971 		return OPAL_HARDWARE;
3972 
3973 	/*
3974 	 * Dummy check for fence so that phb4_read_phb_status knows
3975 	 * whether to use ASB or AIB
3976 	 */
3977 	fenced = phb4_fenced(p);
3978 	phb4_read_phb_status(p, data);
3979 
3980 	if (!fenced)
3981 		phb4_eeh_dump_regs(p);
3982 
3983 	/*
3984 	 * We're running to here probably because of errors
3985 	 * (INF class). For that case, we need clear the error
3986 	 * explicitly.
3987 	 */
3988 	if (phb4_err_pending(p) &&
3989 	    p->err.err_class == PHB4_ERR_CLASS_INF &&
3990 	    p->err.err_src == PHB4_ERR_SRC_PHB) {
3991 		phb4_err_clear(p);
3992 		phb4_set_err_pending(p, false);
3993 	}
3994 
3995 	return OPAL_SUCCESS;
3996 }
3997 
tve_encode_50b_noxlate(uint64_t start_addr,uint64_t end_addr)3998 static uint64_t tve_encode_50b_noxlate(uint64_t start_addr, uint64_t end_addr)
3999 {
4000 	uint64_t tve;
4001 
4002 	/*
4003 	 * Put start address bits 49:24 into TVE[52:53]||[0:23]
4004 	 * and end address bits 49:24 into TVE[54:55]||[24:47]
4005 	 * and set TVE[51]
4006 	 */
4007 	tve  = (start_addr << 16) & (0xffffffull << 40);
4008 	tve |= (start_addr >> 38) & (3ull << 10);
4009 	tve |= (end_addr >>  8) & (0xfffffful << 16);
4010 	tve |= (end_addr >> 40) & (3ull << 8);
4011 	tve |= PPC_BIT(51) | IODA3_TVT_NON_TRANSLATE_50;
4012 	return tve;
4013 }
4014 
phb4_is_dd20(struct phb4 * p)4015 static bool phb4_is_dd20(struct phb4 *p)
4016 {
4017 	struct proc_chip *chip = get_chip(p->chip_id);
4018 
4019 	if (p->rev == PHB4_REV_NIMBUS_DD20 && ((0xf & chip->ec_level) == 0))
4020 		return true;
4021 	return false;
4022 }
4023 
phb4_get_capp_info(int chip_id,struct phb * phb,struct capp_info * info)4024 static int64_t phb4_get_capp_info(int chip_id, struct phb *phb,
4025 				  struct capp_info *info)
4026 {
4027 	struct phb4 *p = phb_to_phb4(phb);
4028 	uint32_t offset;
4029 
4030 	if (chip_id != p->chip_id)
4031 		return OPAL_PARAMETER;
4032 
4033 	/* Check is CAPP is attached to the PHB */
4034 	if (p->capp == NULL || p->capp->phb != phb)
4035 		return OPAL_PARAMETER;
4036 
4037 	offset = PHB4_CAPP_REG_OFFSET(p);
4038 
4039 	if (p->index == CAPP0_PHB_INDEX)
4040 		info->capp_index = 0;
4041 	if (p->index == CAPP1_PHB_INDEX)
4042 		info->capp_index = 1;
4043 	info->phb_index = p->index;
4044 	info->capp_fir_reg = CAPP_FIR + offset;
4045 	info->capp_fir_mask_reg = CAPP_FIR_MASK + offset;
4046 	info->capp_fir_action0_reg = CAPP_FIR_ACTION0 + offset;
4047 	info->capp_fir_action1_reg = CAPP_FIR_ACTION1 + offset;
4048 	info->capp_err_status_ctrl_reg = CAPP_ERR_STATUS_CTRL + offset;
4049 
4050 	return OPAL_SUCCESS;
4051 }
4052 
phb4_init_capp_regs(struct phb4 * p,uint32_t capp_eng)4053 static void phb4_init_capp_regs(struct phb4 *p, uint32_t capp_eng)
4054 {
4055 	uint64_t reg;
4056 	uint32_t offset;
4057 	uint8_t link_width_x16 = 1;
4058 
4059 	offset = PHB4_CAPP_REG_OFFSET(p);
4060 
4061 	/* Calculate the phb link width if card is attached to PEC2 */
4062 	if (p->index == CAPP1_PHB_INDEX) {
4063 		/* Check if PEC2 is in x8 or x16 mode.
4064 		 * PEC0 is always in x16
4065 		 */
4066 		xscom_read(p->chip_id, XPEC_PCI2_CPLT_CONF1, &reg);
4067 		link_width_x16 = ((reg & XPEC_PCI2_IOVALID_MASK) ==
4068 				  XPEC_PCI2_IOVALID_X16);
4069 	}
4070 
4071 	/* APC Master PowerBus Control Register */
4072 	xscom_read(p->chip_id, APC_MASTER_PB_CTRL + offset, &reg);
4073 	reg |= PPC_BIT(0); /* enable cResp exam */
4074 	reg |= PPC_BIT(3); /* disable vg not sys */
4075 	reg |= PPC_BIT(12);/* HW417025: disable capp virtual machines */
4076 	reg |= PPC_BIT(2); /* disable nn rn */
4077 	reg |= PPC_BIT(4); /* disable g */
4078 	reg |= PPC_BIT(5); /* disable ln */
4079 	xscom_write(p->chip_id, APC_MASTER_PB_CTRL + offset, reg);
4080 
4081 	/* Set PHB mode, HPC Dir State and P9 mode */
4082 	xscom_write(p->chip_id, APC_MASTER_CAPI_CTRL + offset,
4083 		    0x1772000000000000UL);
4084 	PHBINF(p, "CAPP: port attached\n");
4085 
4086 	/* Set snoop ttype decoding , dir size to 512K */
4087 	xscom_write(p->chip_id, SNOOP_CAPI_CONFIG + offset, 0x9000000000000000UL);
4088 
4089 	/* Use Read Epsilon Tier2 for all scopes.
4090 	 * Set Tier2 Read Epsilon.
4091 	 */
4092 	xscom_read(p->chip_id, SNOOP_CONTROL + offset, &reg);
4093 	reg |= PPC_BIT(0);
4094 	reg |= PPC_BIT(35);
4095 	reg |= PPC_BIT(45);
4096 	reg |= PPC_BIT(46);
4097 	reg |= PPC_BIT(47);
4098 	reg |= PPC_BIT(50);
4099 	xscom_write(p->chip_id, SNOOP_CONTROL + offset, reg);
4100 
4101 	/* Transport Control Register */
4102 	xscom_read(p->chip_id, TRANSPORT_CONTROL + offset, &reg);
4103 	if (p->index == CAPP0_PHB_INDEX) {
4104 		reg |= PPC_BIT(1); /* Send Packet Timer Value */
4105 		reg |= PPC_BITMASK(10, 13); /* Send Packet Timer Value */
4106 		reg &= ~PPC_BITMASK(14, 17); /* Set Max LPC CI store buffer to zeros */
4107 		reg &= ~PPC_BITMASK(18, 21); /* Set Max tlbi divider */
4108 		if (capp_eng & CAPP_MIN_STQ_ENGINES) {
4109 			/* 2 CAPP msg engines */
4110 			reg |= PPC_BIT(58);
4111 			reg |= PPC_BIT(59);
4112 			reg |= PPC_BIT(60);
4113 		}
4114 		if (capp_eng & CAPP_MAX_STQ_ENGINES) {
4115 			/* 14 CAPP msg engines */
4116 			reg |= PPC_BIT(60);
4117 		}
4118 		reg |= PPC_BIT(62);
4119 	}
4120 	if (p->index == CAPP1_PHB_INDEX) {
4121 		reg |= PPC_BIT(4); /* Send Packet Timer Value */
4122 		reg &= ~PPC_BIT(10); /* Set CI Store Buffer Threshold=5 */
4123 		reg |= PPC_BIT(11);  /* Set CI Store Buffer Threshold=5 */
4124 		reg &= ~PPC_BIT(12); /* Set CI Store Buffer Threshold=5 */
4125 		reg |= PPC_BIT(13);  /* Set CI Store Buffer Threshold=5 */
4126 		reg &= ~PPC_BITMASK(14, 17); /* Set Max LPC CI store buffer to zeros */
4127 		reg &= ~PPC_BITMASK(18, 21); /* Set Max tlbi divider */
4128 		if (capp_eng & CAPP_MIN_STQ_ENGINES) {
4129 			/* 2 CAPP msg engines */
4130 			reg |= PPC_BIT(59);
4131 			reg |= PPC_BIT(60);
4132 
4133 		} else if (capp_eng & CAPP_MAX_STQ_ENGINES) {
4134 
4135 			if (link_width_x16)
4136 				/* 14 CAPP msg engines */
4137 				reg |= PPC_BIT(60) | PPC_BIT(62);
4138 			else
4139 				/* 6 CAPP msg engines */
4140 				reg |= PPC_BIT(60);
4141 		}
4142 	}
4143 	xscom_write(p->chip_id, TRANSPORT_CONTROL + offset, reg);
4144 
4145 	/* The transport control register needs to be loaded in two
4146 	 * steps. Once the register values have been set, we have to
4147 	 * write bit 63 to a '1', which loads the register values into
4148 	 * the ci store buffer logic.
4149 	 */
4150 	xscom_read(p->chip_id, TRANSPORT_CONTROL + offset, &reg);
4151 	reg |= PPC_BIT(63);
4152 	xscom_write(p->chip_id, TRANSPORT_CONTROL + offset, reg);
4153 
4154 	/* Enable epoch timer */
4155 	xscom_write(p->chip_id, EPOCH_RECOVERY_TIMERS_CTRL + offset,
4156 		    0xC0000000FFF8FFE0UL);
4157 
4158 	/* Flush SUE State Map Register */
4159 	xscom_write(p->chip_id, FLUSH_SUE_STATE_MAP + offset,
4160 		    0x08020A0000000000UL);
4161 
4162 	/* Flush SUE uOP1 Register */
4163 	xscom_write(p->chip_id, FLUSH_SUE_UOP1 + offset,
4164 		    0xDCE0280428000000);
4165 
4166 	/* capp owns PHB read buffers */
4167 	if (p->index == CAPP0_PHB_INDEX) {
4168 		/* max PHB read buffers 0-47 */
4169 		reg = 0xFFFFFFFFFFFF0000UL;
4170 		if (capp_eng & CAPP_MAX_DMA_READ_ENGINES)
4171 			reg = 0xF000000000000000UL;
4172 		xscom_write(p->chip_id, APC_FSM_READ_MASK + offset, reg);
4173 		xscom_write(p->chip_id, XPT_FSM_RMM + offset, reg);
4174 	}
4175 	if (p->index == CAPP1_PHB_INDEX) {
4176 
4177 		if (capp_eng & CAPP_MAX_DMA_READ_ENGINES) {
4178 			reg = 0xF000000000000000ULL;
4179 		} else if (link_width_x16) {
4180 			/* 0-47 (Read machines) are available for
4181 			 * capp use
4182 			 */
4183 			reg = 0x0000FFFFFFFFFFFFULL;
4184 		} else {
4185 			/* Set 30 Read machines for CAPP Minus
4186 			 * 20-27 for DMA
4187 			 */
4188 			reg = 0xFFFFF00E00000000ULL;
4189 		}
4190 		xscom_write(p->chip_id, APC_FSM_READ_MASK + offset, reg);
4191 		xscom_write(p->chip_id, XPT_FSM_RMM + offset, reg);
4192 	}
4193 
4194 	/* CAPP FIR Action 0 */
4195 	xscom_write(p->chip_id, CAPP_FIR_ACTION0 + offset, 0x0b1c000104060000UL);
4196 
4197 	/* CAPP FIR Action 1 */
4198 	xscom_write(p->chip_id, CAPP_FIR_ACTION1 + offset, 0x2b9c0001240E0000UL);
4199 
4200 	/* CAPP FIR MASK */
4201 	xscom_write(p->chip_id, CAPP_FIR_MASK + offset, 0x80031f98d8717000UL);
4202 
4203 	/* Mask the CAPP PSL Credit Timeout Register error */
4204 	xscom_write_mask(p->chip_id, CAPP_FIR_MASK + offset,
4205 			 PPC_BIT(46), PPC_BIT(46));
4206 
4207 	/* Deassert TLBI_FENCED and tlbi_psl_is_dead */
4208 	xscom_write(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, 0);
4209 }
4210 
4211 /* override some inits with CAPI defaults */
phb4_init_capp_errors(struct phb4 * p)4212 static void phb4_init_capp_errors(struct phb4 *p)
4213 {
4214 	/* Init_77: TXE Error AIB Fence Enable Register */
4215 	if (phb4_is_dd20(p))
4216 		out_be64(p->regs + 0x0d30,	0xdfffbf0ff7ddfff0ull);
4217 	else
4218 		out_be64(p->regs + 0x0d30,	0xdff7bf0ff7ddfff0ull);
4219 	/* Init_86: RXE_ARB Error AIB Fence Enable Register */
4220 	out_be64(p->regs + 0x0db0,	0xfbffd7bbfb7fbfefull);
4221 
4222 	/* Init_95: RXE_MRG Error AIB Fence Enable Register */
4223 	out_be64(p->regs + 0x0e30,	0xfffffeffff7fff57ull);
4224 
4225 	/* Init_104: RXE_TCE Error AIB Fence Enable Register */
4226 	out_be64(p->regs + 0x0eb0,	0xffaeffafffffffffull);
4227 
4228 	/* Init_113: PHB Error AIB Fence Enable Register */
4229 	out_be64(p->regs + 0x0cb0,	0x35777073ff000000ull);
4230 }
4231 
4232  /*
4233  * The capi indicator is over the 8 most significant bits on p9 (and
4234  * not 16). We stay away from bits 59 (TVE select), 60 and 61 (MSI)
4235  *
4236  * For the mask, we keep bit 59 in, as capi messages must hit TVE#0.
4237  * Bit 56 is not part of the mask, so that a NBW message (see below)
4238  * is also considered a capi message.
4239  */
4240 #define CAPIIND		0x0200
4241 #define CAPIMASK	0xFE00
4242 
4243 /*
4244  * Non-Blocking Write messages are a subset of capi messages, so the
4245  * indicator is the same as capi + an extra bit (56) to differentiate.
4246  * Mask is the same as capi + the extra bit
4247  */
4248 #define NBWIND		0x0300
4249 #define NBWMASK		0xFF00
4250 
4251 /*
4252  * The ASN indicator is used for tunneled operations (as_notify and
4253  * atomics).  Tunneled operation messages can be sent in PCI mode as
4254  * well as CAPI mode.
4255  *
4256  * The format of those messages is specific and, for as_notify
4257  * messages, the address field is hijacked to encode the LPID/PID/TID
4258  * of the target thread, so those messages should not go through
4259  * translation. They must hit TVE#1. Therefore bit 59 is part of the
4260  * indicator.
4261  */
4262 #define ASNIND		0x0C00
4263 #define ASNMASK		0xFF00
4264 
4265 /* Power Bus Common Queue Registers
4266  * All PBCQ and PBAIB registers are accessed via SCOM
4267  * NestBase = 4010C00 for PEC0
4268  *            4011000 for PEC1
4269  *            4011400 for PEC2
4270  * PCIBase  = D010800 for PE0
4271  *            E010800 for PE1
4272  *            F010800 for PE2
4273  *
4274  * Some registers are shared amongst all of the stacks and will only
4275  * have 1 copy. Other registers are implemented one per stack.
4276  * Registers that are duplicated will have an additional offset
4277  * of “StackBase” so that they have a unique address.
4278  * Stackoffset = 00000040 for Stack0
4279  *             = 00000080 for Stack1
4280  *             = 000000C0 for Stack2
4281  */
enable_capi_mode(struct phb4 * p,uint64_t pe_number,uint32_t capp_eng)4282 static int64_t enable_capi_mode(struct phb4 *p, uint64_t pe_number,
4283 				uint32_t capp_eng)
4284 {
4285 	uint64_t reg, start_addr, end_addr, stq_eng, dma_eng;
4286 	uint64_t mbt0, mbt1;
4287 	int i, window_num = -1;
4288 
4289 	/* CAPP Control Register */
4290 	xscom_read(p->chip_id, p->pe_xscom + XPEC_NEST_CAPP_CNTL, &reg);
4291 	if (reg & PPC_BIT(0)) {
4292 		PHBDBG(p, "Already in CAPP mode\n");
4293 	}
4294 
4295 	for (i = 0; i < 500000; i++) {
4296 		/* PBCQ General Status Register */
4297 		xscom_read(p->chip_id,
4298 			   p->pe_stk_xscom + XPEC_NEST_STK_PBCQ_STAT,
4299 			   &reg);
4300 		if (!(reg & 0xC000000000000000UL))
4301 			break;
4302 		time_wait_us(10);
4303 	}
4304 	if (reg & 0xC000000000000000UL) {
4305 		PHBERR(p, "CAPP: Timeout waiting for pending transaction\n");
4306 		return OPAL_HARDWARE;
4307 	}
4308 
4309 	stq_eng = 0x0000000000000000ULL;
4310 	dma_eng = 0x0000000000000000ULL;
4311 	if (p->index == CAPP0_PHB_INDEX) {
4312 		/* PBCQ is operating as a x16 stack
4313 		 * - The maximum number of engines give to CAPP will be
4314 		 * 14 and will be assigned in the order of STQ 15 to 2.
4315 		 * - 0-47 (Read machines) are available for capp use.
4316 		 */
4317 		stq_eng = 0x000E000000000000ULL; /* 14 CAPP msg engines */
4318 		dma_eng = 0x0000FFFFFFFFFFFFULL; /* 48 CAPP Read machines */
4319 	}
4320 
4321 	if (p->index == CAPP1_PHB_INDEX) {
4322 		/* Check if PEC is in x8 or x16 mode */
4323 		xscom_read(p->chip_id, XPEC_PCI2_CPLT_CONF1, &reg);
4324 
4325 		if ((reg & XPEC_PCI2_IOVALID_MASK) == XPEC_PCI2_IOVALID_X16) {
4326 			/* PBCQ is operating as a x16 stack
4327 			 * - The maximum number of engines give to CAPP will be
4328 			 * 14 and will be assigned in the order of STQ 15 to 2.
4329 			 * - 0-47 (Read machines) are available for capp use.
4330 			 */
4331 			stq_eng = 0x000E000000000000ULL;
4332 			dma_eng = 0x0000FFFFFFFFFFFFULL;
4333 		} else {
4334 
4335 			/* PBCQ is operating as a x8 stack
4336 			 * - The maximum number of engines given to CAPP should
4337 			 * be 6 and will be assigned in the order of 7 to 2.
4338 			 * - 0-30 (Read machines) are available for capp use.
4339 			 */
4340 			stq_eng = 0x0006000000000000ULL;
4341 			/* 30 Read machines for CAPP Minus 20-27 for DMA */
4342 			dma_eng = 0x0000FFFFF00E0000ULL;
4343 		}
4344 	}
4345 
4346 	if (capp_eng & CAPP_MIN_STQ_ENGINES)
4347 		stq_eng = 0x0002000000000000ULL; /* 2 capp msg engines */
4348 
4349 	/* CAPP Control Register. Enable CAPP Mode */
4350 	reg = 0x8000000000000000ULL; /* PEC works in CAPP Mode */
4351 	reg |= stq_eng;
4352 	if (capp_eng & CAPP_MAX_DMA_READ_ENGINES)
4353 		dma_eng = 0x0000F00000000000ULL; /* 4 CAPP Read machines */
4354 	reg |= dma_eng;
4355 	xscom_write(p->chip_id, p->pe_xscom + XPEC_NEST_CAPP_CNTL, reg);
4356 
4357 	/* PEC2 has 3 ETU's + 16 pci lanes that can operate as x16,
4358 	 * x8+x8 (bifurcated) or x8+x4+x4 (trifurcated) mode. When
4359 	 * Mellanox CX5 card is attached to stack0 of this PEC, indicated by
4360 	 * request to allocate CAPP_MAX_DMA_READ_ENGINES; we tweak the default
4361 	 * dma-read engines allocations to maximize the DMA read performance
4362 	 */
4363 	if ((p->index == CAPP1_PHB_INDEX) &&
4364 	    (capp_eng & CAPP_MAX_DMA_READ_ENGINES)) {
4365 
4366 		/*
4367 		 * Allocate Additional 16/8 dma read engines to stack0/stack1
4368 		 * respectively. Read engines 0:31 are anyways always assigned
4369 		 * to stack0. Also skip allocating DMA Read Engine-32 by
4370 		 * enabling Bit[0] in XPEC_NEST_READ_STACK_OVERRIDE register.
4371 		 * Enabling this bit seems cause a parity error reported in
4372 		 * NFIR[1]-nonbar_pe.
4373 		 */
4374 		reg = 0x7fff80007F008000ULL;
4375 
4376 		xscom_write(p->chip_id, p->pci_xscom + XPEC_PCI_PRDSTKOVR, reg);
4377 		xscom_write(p->chip_id, p->pe_xscom +
4378 			    XPEC_NEST_READ_STACK_OVERRIDE, reg);
4379 
4380 		/* Log this reallocation as it may impact dma performance of
4381 		 * other slots connected to PEC2
4382 		 */
4383 		PHBINF(p, "CAPP: Set %d dma-read engines for PEC2/stack-0\n",
4384 		      32 + __builtin_popcountll(reg & PPC_BITMASK(0, 31)));
4385 		PHBDBG(p, "CAPP: XPEC_NEST_READ_STACK_OVERRIDE: %016llx\n",
4386 		       reg);
4387 	}
4388 
4389 	/* PCI to PB data movement ignores the PB init signal. */
4390 	xscom_write_mask(p->chip_id, p->pe_xscom + XPEC_NEST_PBCQ_HW_CONFIG,
4391 			 XPEC_NEST_PBCQ_HW_CONFIG_PBINIT,
4392 			 XPEC_NEST_PBCQ_HW_CONFIG_PBINIT);
4393 
4394 	/* If pump mode is enabled don't do nodal broadcasts.
4395 	 */
4396 	xscom_read(p->chip_id, PB_CENT_HP_MODE_CURR, &reg);
4397 	if (reg & PB_CFG_PUMP_MODE) {
4398 		reg = XPEC_NEST_PBCQ_HW_CONFIG_DIS_NODAL;
4399 		reg |= XPEC_NEST_PBCQ_HW_CONFIG_DIS_RNNN;
4400 		xscom_write_mask(p->chip_id,
4401 				 p->pe_xscom + XPEC_NEST_PBCQ_HW_CONFIG,
4402 				 reg, reg);
4403 	}
4404 
4405 	/* PEC Phase 4 (PHB) registers adjustment
4406 	 * Inbound CAPP traffic: The CAPI can send both CAPP packets and
4407 	 * I/O packets. A PCIe packet is indentified as a CAPP packet in
4408 	 * the PHB if the PCIe address matches either the CAPI
4409 	 * Compare/Mask register or its NBW Compare/Mask register.
4410 	 */
4411 
4412 	/*
4413 	 * Bit [0:7] XSL_DSNCTL[capiind]
4414 	 * Init_26 - CAPI Compare/Mask
4415 	 */
4416 	out_be64(p->regs + PHB_CAPI_CMPM,
4417 		 ((u64)CAPIIND << 48) |
4418 		 ((u64)CAPIMASK << 32) | PHB_CAPI_CMPM_ENABLE);
4419 
4420 	/* PB AIB Hardware Control Register
4421 	 * Wait 32 PCI clocks for a credit to become available
4422 	 * before rejecting.
4423 	 */
4424 	xscom_read(p->chip_id, p->pci_xscom + XPEC_PCI_PBAIB_HW_CONFIG, &reg);
4425 	reg |= PPC_BITMASK(40, 42);
4426 	if (p->index == CAPP1_PHB_INDEX)
4427 		reg |= PPC_BIT(30);
4428 	xscom_write(p->chip_id, p->pci_xscom + XPEC_PCI_PBAIB_HW_CONFIG, reg);
4429 
4430 	/* non-translate/50-bit mode */
4431 	out_be64(p->regs + PHB_NXLATE_PREFIX, 0x0000000000000000Ull);
4432 
4433 	/* set tve no translate mode allow mmio window */
4434 	memset(p->tve_cache, 0x0, sizeof(p->tve_cache));
4435 
4436 	/*
4437 	 * In 50-bit non-translate mode, the fields of the TVE are
4438 	 * used to perform an address range check. In this mode TCE
4439 	 * Table Size(0) must be a '1' (TVE[51] = 1)
4440 	 *      PCI Addr(49:24) >= TVE[52:53]+TVE[0:23] and
4441 	 *      PCI Addr(49:24) < TVE[54:55]+TVE[24:47]
4442 	 *
4443 	 * TVE[51] = 1
4444 	 * TVE[56] = 1: 50-bit Non-Translate Mode Enable
4445 	 * TVE[0:23] = 0x000000
4446 	 * TVE[24:47] = 0xFFFFFF
4447 	 *
4448 	 * capi dma mode: CAPP DMA mode needs access to all of memory
4449 	 * capi mode: Allow address range (bit 14 = 1)
4450 	 *            0x0002000000000000: 0x0002FFFFFFFFFFFF
4451 	 *            TVE[52:53] = '10' and TVE[54:55] = '10'
4452 	 */
4453 
4454 	/* TVT#0: CAPI window + DMA, all memory */
4455 	start_addr = 0ull;
4456 	end_addr   = 0x0003ffffffffffffull;
4457 	p->tve_cache[pe_number * 2] =
4458 		tve_encode_50b_noxlate(start_addr, end_addr);
4459 
4460 	/* TVT#1: CAPI window + DMA, all memory, in bypass mode */
4461 	start_addr = (1ull << 59);
4462 	end_addr   = start_addr + 0x0003ffffffffffffull;
4463 	p->tve_cache[pe_number * 2 + 1] =
4464 		tve_encode_50b_noxlate(start_addr, end_addr);
4465 
4466 	phb4_ioda_sel(p, IODA3_TBL_TVT, 0, true);
4467 	for (i = 0; i < p->tvt_size; i++)
4468 		out_be64(p->regs + PHB_IODA_DATA0, p->tve_cache[i]);
4469 
4470 	/*
4471 	 * Since TVT#0 is in by-pass mode, disable 32-bit MSI, as a
4472 	 * DMA write targeting 0x00000000FFFFxxxx would be interpreted
4473 	 * as a 32-bit MSI
4474 	 */
4475 	reg = in_be64(p->regs + PHB_PHB4_CONFIG);
4476 	reg &= ~PHB_PHB4C_32BIT_MSI_EN;
4477 	out_be64(p->regs + PHB_PHB4_CONFIG, reg);
4478 
4479 	/* set mbt bar to pass capi mmio window and keep the other
4480 	 * mmio values
4481 	 */
4482 	mbt0 = IODA3_MBT0_ENABLE | IODA3_MBT0_TYPE_M64 |
4483 	       SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_SINGLE_PE) |
4484 	       SETFIELD(IODA3_MBT0_MDT_COLUMN, 0ull, 0) |
4485 	       (0x0002000000000000ULL & IODA3_MBT0_BASE_ADDR);
4486 
4487 	mbt1 = IODA3_MBT1_ENABLE |
4488 	       (0x00ff000000000000ULL & IODA3_MBT1_MASK) |
4489 	       SETFIELD(IODA3_MBT1_SINGLE_PE_NUM, 0ull, pe_number);
4490 
4491 	for (i = 0; i < p->mbt_size; i++) {
4492 		/* search if the capi mmio window is already present */
4493 		if ((p->mbt_cache[i][0] == mbt0) &&
4494 		    (p->mbt_cache[i][1] == mbt1))
4495 			break;
4496 
4497 		/* search a free entry */
4498 		if ((window_num == -1) &&
4499 		   ((!(p->mbt_cache[i][0] & IODA3_MBT0_ENABLE)) &&
4500 		    (!(p->mbt_cache[i][1] & IODA3_MBT1_ENABLE))))
4501 			window_num = i;
4502 	}
4503 
4504 	if (window_num >= 0 && i == p->mbt_size) {
4505 		/* no capi mmio window found, so add it */
4506 		p->mbt_cache[window_num][0] = mbt0;
4507 		p->mbt_cache[window_num][1] = mbt1;
4508 
4509 		phb4_ioda_sel(p, IODA3_TBL_MBT, window_num << 1, true);
4510 		out_be64(p->regs + PHB_IODA_DATA0, mbt0);
4511 		out_be64(p->regs + PHB_IODA_DATA0, mbt1);
4512 	} else if (i == p->mbt_size) {
4513 		/* mbt cache full, this case should never happen */
4514 		PHBERR(p, "CAPP: Failed to add CAPI mmio window\n");
4515 	} else {
4516 		/* duplicate entry. Nothing to do */
4517 	}
4518 
4519 	phb4_init_capp_errors(p);
4520 
4521 	phb4_init_capp_regs(p, capp_eng);
4522 
4523 	if (!chiptod_capp_timebase_sync(p->chip_id, CAPP_TFMR,
4524 					CAPP_TB,
4525 					PHB4_CAPP_REG_OFFSET(p)))
4526 		PHBERR(p, "CAPP: Failed to sync timebase\n");
4527 
4528 	/* set callbacks to handle HMI events */
4529 	capi_ops.get_capp_info = &phb4_get_capp_info;
4530 
4531 	return OPAL_SUCCESS;
4532 }
4533 
4534 
phb4_init_capp(struct phb4 * p)4535 static int64_t phb4_init_capp(struct phb4 *p)
4536 {
4537 	struct capp *capp;
4538 	int rc;
4539 
4540 	if (p->index != CAPP0_PHB_INDEX &&
4541 	    p->index != CAPP1_PHB_INDEX)
4542 		return OPAL_UNSUPPORTED;
4543 
4544 	capp = zalloc(sizeof(struct capp));
4545 	if (capp == NULL)
4546 		return OPAL_NO_MEM;
4547 
4548 	if (p->index == CAPP0_PHB_INDEX) {
4549 		capp->capp_index = 0;
4550 		capp->capp_xscom_offset = 0;
4551 
4552 	} else if (p->index == CAPP1_PHB_INDEX) {
4553 		capp->capp_index = 1;
4554 		capp->capp_xscom_offset = CAPP1_REG_OFFSET;
4555 	}
4556 
4557 	capp->attached_pe = phb4_get_reserved_pe_number(&p->phb);
4558 	capp->chip_id = p->chip_id;
4559 
4560 	/* Load capp microcode into the capp unit */
4561 	rc = load_capp_ucode(p);
4562 
4563 	if (rc == OPAL_SUCCESS)
4564 		p->capp = capp;
4565 	else
4566 		free(capp);
4567 
4568 	return rc;
4569 }
4570 
phb4_set_capi_mode(struct phb * phb,uint64_t mode,uint64_t pe_number)4571 static int64_t phb4_set_capi_mode(struct phb *phb, uint64_t mode,
4572 				  uint64_t pe_number)
4573 {
4574 	struct phb4 *p = phb_to_phb4(phb);
4575 	struct proc_chip *chip = get_chip(p->chip_id);
4576 	struct capp *capp = p->capp;
4577 	uint64_t reg, ret;
4578 
4579 	/* cant do a mode switch when capp is in recovery mode */
4580 	ret = capp_xscom_read(capp, CAPP_ERR_STATUS_CTRL, &reg);
4581 	if (ret != OPAL_SUCCESS)
4582 		return ret;
4583 
4584 	if ((reg & PPC_BIT(0)) && (!(reg & PPC_BIT(1)))) {
4585 		PHBDBG(p, "CAPP: recovery in progress\n");
4586 		return OPAL_BUSY;
4587 	}
4588 
4589 
4590 	switch (mode) {
4591 
4592 	case OPAL_PHB_CAPI_MODE_DMA: /* Enabled by default on p9 */
4593 	case OPAL_PHB_CAPI_MODE_SNOOP_ON:
4594 		/* nothing to do on P9 if CAPP is already enabled */
4595 		ret = p->capp->phb ? OPAL_SUCCESS : OPAL_UNSUPPORTED;
4596 		break;
4597 
4598 	case OPAL_PHB_CAPI_MODE_SNOOP_OFF:
4599 		ret = p->capp->phb ? OPAL_UNSUPPORTED : OPAL_SUCCESS;
4600 		break;
4601 
4602 	case OPAL_PHB_CAPI_MODE_PCIE:
4603 		if (p->flags & PHB4_CAPP_DISABLE) {
4604 			/* We are in middle of a CAPP disable */
4605 			ret = OPAL_BUSY;
4606 
4607 		} else if (capp->phb) {
4608 			/* Kick start a creset */
4609 			p->flags |= PHB4_CAPP_DISABLE;
4610 			PHBINF(p, "CAPP: PCIE mode needs a cold-reset\n");
4611 			/* Kick off the pci state machine */
4612 			ret = phb4_creset(phb->slot);
4613 			ret = ret > 0 ? OPAL_BUSY : ret;
4614 
4615 		} else {
4616 			/* PHB already in PCI mode */
4617 			ret = OPAL_SUCCESS;
4618 		}
4619 		break;
4620 
4621 	case OPAL_PHB_CAPI_MODE_CAPI: /* Fall Through */
4622 	case OPAL_PHB_CAPI_MODE_DMA_TVT1:
4623 		/* Make sure that PHB is not disabling CAPP */
4624 		if (p->flags & PHB4_CAPP_DISABLE) {
4625 			PHBERR(p, "CAPP: Disable in progress\n");
4626 			ret = OPAL_BUSY;
4627 			break;
4628 		}
4629 
4630 		/* Check if ucode is available */
4631 		if (!capp_ucode_loaded(chip, p->index)) {
4632 			PHBERR(p, "CAPP: ucode not loaded\n");
4633 			ret = OPAL_RESOURCE;
4634 			break;
4635 		}
4636 
4637 		/*
4638 		 * Mark the CAPP attached to the PHB right away so that
4639 		 * if a MCE happens during CAPP init we can handle it.
4640 		 * In case of an error in CAPP init we remove the PHB
4641 		 * from the attached_mask later.
4642 		 */
4643 		capp->phb = phb;
4644 		capp->attached_pe = pe_number;
4645 
4646 		if (mode == OPAL_PHB_CAPI_MODE_DMA_TVT1)
4647 			ret = enable_capi_mode(p, pe_number,
4648 					       CAPP_MIN_STQ_ENGINES |
4649 					       CAPP_MAX_DMA_READ_ENGINES);
4650 
4651 		else
4652 			ret = enable_capi_mode(p, pe_number,
4653 					       CAPP_MAX_STQ_ENGINES |
4654 					       CAPP_MIN_DMA_READ_ENGINES);
4655 		if (ret == OPAL_SUCCESS) {
4656 			/* register notification on system shutdown */
4657 			opal_add_host_sync_notifier(&phb4_host_sync_reset, p);
4658 
4659 		} else {
4660 			/* In case of an error mark the PHB detached */
4661 			capp->phb = NULL;
4662 			capp->attached_pe = phb4_get_reserved_pe_number(phb);
4663 		}
4664 		break;
4665 
4666 	default:
4667 		ret = OPAL_UNSUPPORTED;
4668 		break;
4669 	};
4670 
4671 	return ret;
4672 }
4673 
phb4_p2p_set_initiator(struct phb4 * p,uint16_t pe_number)4674 static void phb4_p2p_set_initiator(struct phb4 *p, uint16_t pe_number)
4675 {
4676 	uint64_t tve;
4677 	uint16_t window_id = (pe_number << 1) + 1;
4678 
4679 	/*
4680 	 * Initiator needs access to the MMIO space of the target,
4681 	 * which is well beyond the 'normal' memory area. Set its TVE
4682 	 * with no range checking.
4683 	 */
4684 	PHBDBG(p, "Setting TVE#1 for peer-to-peer for pe %d\n", pe_number);
4685 	tve = PPC_BIT(51);
4686 	phb4_ioda_sel(p, IODA3_TBL_TVT, window_id, false);
4687 	out_be64(p->regs + PHB_IODA_DATA0, tve);
4688 	p->tve_cache[window_id] = tve;
4689 }
4690 
phb4_p2p_set_target(struct phb4 * p,bool enable)4691 static void phb4_p2p_set_target(struct phb4 *p, bool enable)
4692 {
4693 	uint64_t val;
4694 
4695 	/*
4696 	 * Enabling p2p on a target PHB reserves an outbound (as seen
4697 	 * from the CPU) store queue for p2p
4698 	 */
4699 	PHBDBG(p, "%s peer-to-peer\n", (enable ? "Enabling" : "Disabling"));
4700 	xscom_read(p->chip_id,
4701 		p->pe_stk_xscom + XPEC_NEST_STK_PBCQ_MODE, &val);
4702 	if (enable)
4703 		val |= XPEC_NEST_STK_PBCQ_MODE_P2P;
4704 	else
4705 		val &= ~XPEC_NEST_STK_PBCQ_MODE_P2P;
4706 	xscom_write(p->chip_id,
4707 		p->pe_stk_xscom + XPEC_NEST_STK_PBCQ_MODE, val);
4708 }
4709 
phb4_set_p2p(struct phb * phb,uint64_t mode,uint64_t flags,uint16_t pe_number)4710 static void phb4_set_p2p(struct phb *phb, uint64_t mode, uint64_t flags,
4711 			uint16_t pe_number)
4712 {
4713 	struct phb4 *p = phb_to_phb4(phb);
4714 
4715 	switch (mode) {
4716 	case OPAL_PCI_P2P_INITIATOR:
4717 		if (flags & OPAL_PCI_P2P_ENABLE)
4718 			phb4_p2p_set_initiator(p, pe_number);
4719 		/*
4720 		 * When disabling p2p on the initiator, we should
4721 		 * reset the TVE to its default bypass setting, but it
4722 		 * is more easily done from the OS, as it knows the
4723 		 * the start and end address and there's already an
4724 		 * opal call for it, so let linux handle it.
4725 		 */
4726 		break;
4727 	case OPAL_PCI_P2P_TARGET:
4728 		phb4_p2p_set_target(p, !!(flags & OPAL_PCI_P2P_ENABLE));
4729 		break;
4730 	default:
4731 		assert(0);
4732 	}
4733 }
4734 
phb4_set_capp_recovery(struct phb * phb)4735 static int64_t phb4_set_capp_recovery(struct phb *phb)
4736 {
4737 	struct phb4 *p = phb_to_phb4(phb);
4738 
4739 	if (p->flags & PHB4_CAPP_RECOVERY)
4740 		return 0;
4741 
4742 	/* set opal event flag to indicate eeh condition */
4743 	opal_update_pending_evt(OPAL_EVENT_PCI_ERROR,
4744 				OPAL_EVENT_PCI_ERROR);
4745 
4746 	p->flags |= PHB4_CAPP_RECOVERY;
4747 
4748 	return 0;
4749 }
4750 
4751 /*
4752  * Return the address out of a PBCQ Tunnel Bar register.
4753  */
phb4_get_tunnel_bar(struct phb * phb,uint64_t * addr)4754 static void phb4_get_tunnel_bar(struct phb *phb, uint64_t *addr)
4755 {
4756 	struct phb4 *p = phb_to_phb4(phb);
4757 	uint64_t val;
4758 
4759 	xscom_read(p->chip_id, p->pe_stk_xscom + XPEC_NEST_STK_TUNNEL_BAR,
4760 		   &val);
4761 	*addr = val >> 8;
4762 }
4763 
4764 /*
4765  * Set PBCQ Tunnel Bar register.
4766  * Store addr bits [8:50] in PBCQ Tunnel Bar register bits [0:42].
4767  * Note that addr bits [8:50] must also match PSL_TNR_ADDR[8:50].
4768  * Reset register if val == 0.
4769  *
4770  * This interface is required to let device drivers set the Tunnel Bar
4771  * value of their choice.
4772  *
4773  * Compatibility with older versions of linux, that do not set the
4774  * Tunnel Bar with phb4_set_tunnel_bar(), is ensured by enable_capi_mode(),
4775  * that will set the default value that used to be assumed.
4776  */
phb4_set_tunnel_bar(struct phb * phb,uint64_t addr)4777 static int64_t phb4_set_tunnel_bar(struct phb *phb, uint64_t addr)
4778 {
4779 	struct phb4 *p = phb_to_phb4(phb);
4780 	uint64_t mask = 0x00FFFFFFFFFFE000ULL;
4781 
4782 	if (!addr) {
4783 		/* Reset register */
4784 		xscom_write(p->chip_id,
4785 			    p->pe_stk_xscom + XPEC_NEST_STK_TUNNEL_BAR, addr);
4786 		return OPAL_SUCCESS;
4787 	}
4788 	if ((addr & ~mask))
4789 		return OPAL_PARAMETER;
4790 	if (!(addr & mask))
4791 		return OPAL_PARAMETER;
4792 
4793 	xscom_write(p->chip_id, p->pe_stk_xscom + XPEC_NEST_STK_TUNNEL_BAR,
4794 		    (addr & mask) << 8);
4795 	return OPAL_SUCCESS;
4796 }
4797 
4798 static const struct phb_ops phb4_ops = {
4799 	.cfg_read8		= phb4_pcicfg_read8,
4800 	.cfg_read16		= phb4_pcicfg_read16,
4801 	.cfg_read32		= phb4_pcicfg_read32,
4802 	.cfg_write8		= phb4_pcicfg_write8,
4803 	.cfg_write16		= phb4_pcicfg_write16,
4804 	.cfg_write32		= phb4_pcicfg_write32,
4805 	.choose_bus		= phb4_choose_bus,
4806 	.get_reserved_pe_number	= phb4_get_reserved_pe_number,
4807 	.device_init		= phb4_device_init,
4808 	.device_remove		= NULL,
4809 	.ioda_reset		= phb4_ioda_reset,
4810 	.papr_errinjct_reset	= phb4_papr_errinjct_reset,
4811 	.pci_reinit		= phb4_pci_reinit,
4812 	.set_phb_mem_window	= phb4_set_phb_mem_window,
4813 	.phb_mmio_enable	= phb4_phb_mmio_enable,
4814 	.map_pe_mmio_window	= phb4_map_pe_mmio_window,
4815 	.map_pe_dma_window	= phb4_map_pe_dma_window,
4816 	.map_pe_dma_window_real = phb4_map_pe_dma_window_real,
4817 	.set_xive_pe		= phb4_set_ive_pe,
4818 	.get_msi_32		= phb4_get_msi_32,
4819 	.get_msi_64		= phb4_get_msi_64,
4820 	.set_pe			= phb4_set_pe,
4821 	.set_peltv		= phb4_set_peltv,
4822 	.eeh_freeze_status	= phb4_eeh_freeze_status,
4823 	.eeh_freeze_clear	= phb4_eeh_freeze_clear,
4824 	.eeh_freeze_set		= phb4_eeh_freeze_set,
4825 	.next_error		= phb4_eeh_next_error,
4826 	.err_inject		= phb4_err_inject,
4827 	.get_diag_data2		= phb4_get_diag_data,
4828 	.tce_kill		= phb4_tce_kill,
4829 	.set_capi_mode		= phb4_set_capi_mode,
4830 	.set_p2p		= phb4_set_p2p,
4831 	.set_capp_recovery	= phb4_set_capp_recovery,
4832 	.get_tunnel_bar         = phb4_get_tunnel_bar,
4833 	.set_tunnel_bar         = phb4_set_tunnel_bar,
4834 };
4835 
phb4_init_ioda3(struct phb4 * p)4836 static void phb4_init_ioda3(struct phb4 *p)
4837 {
4838 	/* Init_18 - Interrupt Notify Base Address */
4839 	out_be64(p->regs + PHB_INT_NOTIFY_ADDR, p->irq_port);
4840 
4841 	/* Init_19 - Interrupt Notify Base Index */
4842 	out_be64(p->regs + PHB_INT_NOTIFY_INDEX,
4843 		 xive_get_notify_base(p->base_msi));
4844 
4845 	/* Init_19x - Not in spec: Initialize source ID */
4846 	PHBDBG(p, "Reset state SRC_ID: %016llx\n",
4847 	       in_be64(p->regs + PHB_LSI_SOURCE_ID));
4848 	out_be64(p->regs + PHB_LSI_SOURCE_ID,
4849 		 SETFIELD(PHB_LSI_SRC_ID, 0ull, (p->num_irqs - 1) >> 3));
4850 
4851 	/* Init_20 - RTT BAR */
4852 	out_be64(p->regs + PHB_RTT_BAR, (u64) p->tbl_rtt | PHB_RTT_BAR_ENABLE);
4853 
4854 	/* Init_21 - PELT-V BAR */
4855 	out_be64(p->regs + PHB_PELTV_BAR,
4856 		 (u64) p->tbl_peltv | PHB_PELTV_BAR_ENABLE);
4857 
4858 	/* Init_22 - Setup M32 starting address */
4859 	out_be64(p->regs + PHB_M32_START_ADDR, M32_PCI_START);
4860 
4861 	/* Init_23 - Setup PEST BAR */
4862 	out_be64(p->regs + PHB_PEST_BAR,
4863 		 p->tbl_pest | PHB_PEST_BAR_ENABLE);
4864 
4865 	/* Init_24 - CRW Base Address Reg */
4866 	/* See enable_capi_mode() */
4867 
4868 	/* Init_25 - ASN Compare/Mask */
4869 	out_be64(p->regs + PHB_ASN_CMPM, ((u64)ASNIND << 48) |
4870 		 ((u64)ASNMASK << 32) | PHB_ASN_CMPM_ENABLE);
4871 
4872 	/* Init_26 - CAPI Compare/Mask */
4873 	/* See enable_capi_mode() */
4874 	/* if CAPP being disabled then reset CAPI Compare/Mask Register */
4875 	if (p->flags & PHB4_CAPP_DISABLE)
4876 		out_be64(p->regs + PHB_CAPI_CMPM, 0);
4877 
4878 	/* Init_27 - PCIE Outbound upper address */
4879 	out_be64(p->regs + PHB_M64_UPPER_BITS, 0);
4880 
4881 	/* Init_28 - PHB4 Configuration */
4882 	out_be64(p->regs + PHB_PHB4_CONFIG,
4883 		 PHB_PHB4C_32BIT_MSI_EN |
4884 		 PHB_PHB4C_64BIT_MSI_EN);
4885 
4886 	/* Init_29 - At least 256ns delay according to spec. Do a dummy
4887 	 * read first to flush posted writes
4888 	 */
4889 	in_be64(p->regs + PHB_PHB4_CONFIG);
4890 	time_wait_us(2);
4891 
4892 	/* Init_30..41 - On-chip IODA tables init */
4893 	phb4_ioda_reset(&p->phb, false);
4894 }
4895 
4896 /* phb4_init_rc - Initialize the Root Complex config space
4897  */
phb4_init_rc_cfg(struct phb4 * p)4898 static bool phb4_init_rc_cfg(struct phb4 *p)
4899 {
4900 	int64_t ecap, aercap;
4901 
4902 	/* XXX Handle errors ? */
4903 
4904 	/* Init_46:
4905 	 *
4906 	 * Set primary bus to 0, secondary to 1 and subordinate to 0xff
4907 	 */
4908 	phb4_pcicfg_write32(&p->phb, 0, PCI_CFG_PRIMARY_BUS, 0x00ff0100);
4909 
4910 	/* Init_47 - Clear errors */
4911 	/* see phb4_rc_err_clear() called below */
4912 
4913 	/* Init_48
4914 	 *
4915 	 * PCIE Device control/status, enable error reporting, disable relaxed
4916 	 * ordering, set MPS to 128 (see note), clear errors.
4917 	 *
4918 	 * Note: The doc recommends to set MPS to 512. This has proved to have
4919 	 * some issues as it requires specific clamping of MRSS on devices and
4920 	 * we've found devices in the field that misbehave when doing that.
4921 	 *
4922 	 * We currently leave it all to 128 bytes (minimum setting) at init
4923 	 * time. The generic PCIe probing later on might apply a different
4924 	 * value, or the kernel will, but we play it safe at early init
4925 	 */
4926 	if (p->ecap <= 0) {
4927 		ecap = pci_find_cap(&p->phb, 0, PCI_CFG_CAP_ID_EXP);
4928 		if (ecap < 0) {
4929 			PHBERR(p, "Can't locate PCI-E capability\n");
4930 			return false;
4931 		}
4932 		p->ecap = ecap;
4933 	} else {
4934 		ecap = p->ecap;
4935 	}
4936 
4937 	phb4_pcicfg_write16(&p->phb, 0, ecap + PCICAP_EXP_DEVCTL,
4938 			     PCICAP_EXP_DEVCTL_CE_REPORT	|
4939 			     PCICAP_EXP_DEVCTL_NFE_REPORT	|
4940 			     PCICAP_EXP_DEVCTL_FE_REPORT	|
4941 			     PCICAP_EXP_DEVCTL_UR_REPORT	|
4942 			     SETFIELD(PCICAP_EXP_DEVCTL_MPS, 0, PCIE_MPS_128B));
4943 
4944 	/* Init_49 - Device Control/Status 2 */
4945 	phb4_pcicfg_write16(&p->phb, 0, ecap + PCICAP_EXP_DCTL2,
4946 			     SETFIELD(PCICAP_EXP_DCTL2_CMPTOUT, 0, 0x5) |
4947 			     PCICAP_EXP_DCTL2_ARI_FWD);
4948 
4949 	/* Init_50..54
4950 	 *
4951 	 * AER inits
4952 	 */
4953 	if (p->aercap <= 0) {
4954 		aercap = pci_find_ecap(&p->phb, 0, PCIECAP_ID_AER, NULL);
4955 		if (aercap < 0) {
4956 			PHBERR(p, "Can't locate AER capability\n");
4957 			return false;
4958 		}
4959 		p->aercap = aercap;
4960 	} else {
4961 		aercap = p->aercap;
4962 	}
4963 
4964 	/* Disable some error reporting as per the PHB4 spec */
4965 	phb4_pcicfg_write32(&p->phb, 0, aercap + PCIECAP_AER_UE_MASK,
4966 			     PCIECAP_AER_UE_POISON_TLP		|
4967 			     PCIECAP_AER_UE_COMPL_TIMEOUT	|
4968 			     PCIECAP_AER_UE_COMPL_ABORT);
4969 
4970 	/* Enable ECRC generation & checking */
4971 	phb4_pcicfg_write32(&p->phb, 0, aercap + PCIECAP_AER_CAPCTL,
4972 			     PCIECAP_AER_CAPCTL_ECRCG_EN	|
4973 			     PCIECAP_AER_CAPCTL_ECRCC_EN);
4974 
4975 	phb4_rc_err_clear(p);
4976 
4977 	return true;
4978 }
4979 
phb4_init_errors(struct phb4 * p)4980 static void phb4_init_errors(struct phb4 *p)
4981 {
4982 	/* Init_55..63 - PBL errors */
4983 	out_be64(p->regs + 0x1900,	0xffffffffffffffffull);
4984 	out_be64(p->regs + 0x1908,	0x0000000000000000ull);
4985 	out_be64(p->regs + 0x1920,	0x000000004d1780f8ull);
4986 	out_be64(p->regs + 0x1928,	0x0000000000000000ull);
4987 	out_be64(p->regs + 0x1930,	0xffffffffb2f87f07ull);
4988 	out_be64(p->regs + 0x1940,	0x0000000000000000ull);
4989 	out_be64(p->regs + 0x1948,	0x0000000000000000ull);
4990 	out_be64(p->regs + 0x1950,	0x0000000000000000ull);
4991 	out_be64(p->regs + 0x1958,	0x0000000000000000ull);
4992 
4993 	/* Init_64..72 - REGB errors */
4994 	out_be64(p->regs + 0x1c00,	0xffffffffffffffffull);
4995 	out_be64(p->regs + 0x1c08,	0x0000000000000000ull);
4996 	/* Enable/disable error status indicators that trigger irqs */
4997 	if (p->has_link) {
4998 		out_be64(p->regs + 0x1c20,	0x2130006efca8bc00ull);
4999 		out_be64(p->regs + 0x1c30,	0xde1fff91035743ffull);
5000 	} else {
5001 		out_be64(p->regs + 0x1c20,	0x0000000000000000ull);
5002 		out_be64(p->regs + 0x1c30,	0x0000000000000000ull);
5003 	}
5004 	out_be64(p->regs + 0x1c28,	0x0080000000000000ull);
5005 	out_be64(p->regs + 0x1c40,	0x0000000000000000ull);
5006 	out_be64(p->regs + 0x1c48,	0x0000000000000000ull);
5007 	out_be64(p->regs + 0x1c50,	0x0000000000000000ull);
5008 	out_be64(p->regs + 0x1c58,	0x0040000000000000ull);
5009 
5010 	/* Init_73..81 - TXE errors */
5011 	out_be64(p->regs + 0x0d08,	0x0000000000000000ull);
5012 	/* Errata: Clear bit 17, otherwise a CFG write UR/CA will incorrectly
5013 	 * freeze a "random" PE (whatever last PE did an MMIO)
5014 	 */
5015 	out_be64(p->regs + 0x0d28,	0x0000000a00000000ull);
5016 	if (phb4_is_dd20(p)) {
5017 		out_be64(p->regs + 0x0d00,	0xf3acff0ff7ddfff0ull);
5018 		out_be64(p->regs + 0x0d18,	0xf3acff0ff7ddfff0ull);
5019 		out_be64(p->regs + 0x0d30,	0xdfffbd05f7ddfff0ull); /* XXX CAPI has diff. value */
5020 	} else  {
5021 		out_be64(p->regs + 0x0d00,	0xffffffffffffffffull);
5022 		out_be64(p->regs + 0x0d18,	0xffffff0fffffffffull);
5023 		out_be64(p->regs + 0x0d30,	0xdff7bd05f7ddfff0ull);
5024 	}
5025 
5026 	out_be64(p->regs + 0x0d40,	0x0000000000000000ull);
5027 	out_be64(p->regs + 0x0d48,	0x0000000000000000ull);
5028 	out_be64(p->regs + 0x0d50,	0x0000000000000000ull);
5029 	out_be64(p->regs + 0x0d58,	0x0000000000000000ull);
5030 
5031 	/* Init_82..90 - RXE_ARB errors */
5032 	out_be64(p->regs + 0x0d80,	0xffffffffffffffffull);
5033 	out_be64(p->regs + 0x0d88,	0x0000000000000000ull);
5034 	out_be64(p->regs + 0x0d98,	0xfffffffffbffffffull);
5035 	out_be64(p->regs + 0x0da8,	0xc00018b801000060ull);
5036 	/*
5037 	 * Errata ER20161123 says we should set the top two bits in
5038 	 * 0x0db0 but this causes config space accesses which don't
5039 	 * get a response to fence the PHB. This breaks probing,
5040 	 * hence we don't set them here.
5041 	 */
5042 	out_be64(p->regs + 0x0db0,	0x3bffd703fa7fbf8full); /* XXX CAPI has diff. value */
5043 	out_be64(p->regs + 0x0dc0,	0x0000000000000000ull);
5044 	out_be64(p->regs + 0x0dc8,	0x0000000000000000ull);
5045 	out_be64(p->regs + 0x0dd0,	0x0000000000000000ull);
5046 	out_be64(p->regs + 0x0dd8,	0x0000000004000000ull);
5047 
5048 	/* Init_91..99 - RXE_MRG errors */
5049 	out_be64(p->regs + 0x0e00,	0xffffffffffffffffull);
5050 	out_be64(p->regs + 0x0e08,	0x0000000000000000ull);
5051 	out_be64(p->regs + 0x0e18,	0xffffffffffffffffull);
5052 	out_be64(p->regs + 0x0e28,	0x0000600000000000ull);
5053 	out_be64(p->regs + 0x0e30,	0xfffffeffff7fff57ull);
5054 	out_be64(p->regs + 0x0e40,	0x0000000000000000ull);
5055 	out_be64(p->regs + 0x0e48,	0x0000000000000000ull);
5056 	out_be64(p->regs + 0x0e50,	0x0000000000000000ull);
5057 	out_be64(p->regs + 0x0e58,	0x0000000000000000ull);
5058 
5059 	/* Init_100..108 - RXE_TCE errors */
5060 	out_be64(p->regs + 0x0e80,	0xffffffffffffffffull);
5061 	out_be64(p->regs + 0x0e88,	0x0000000000000000ull);
5062 	out_be64(p->regs + 0x0e98,	0xffffffffffffffffull);
5063 	out_be64(p->regs + 0x0ea8,	0x60000000c0000000ull);
5064 	out_be64(p->regs + 0x0eb0,	0x9faeffaf3fffffffull); /* XXX CAPI has diff. value */
5065 	out_be64(p->regs + 0x0ec0,	0x0000000000000000ull);
5066 	out_be64(p->regs + 0x0ec8,	0x0000000000000000ull);
5067 	out_be64(p->regs + 0x0ed0,	0x0000000000000000ull);
5068 	out_be64(p->regs + 0x0ed8,	0x0000000000000000ull);
5069 
5070 	/* Init_109..117 - RXPHB errors */
5071 	out_be64(p->regs + 0x0c80,	0xffffffffffffffffull);
5072 	out_be64(p->regs + 0x0c88,	0x0000000000000000ull);
5073 	out_be64(p->regs + 0x0c98,	0xffffffffffffffffull);
5074 	out_be64(p->regs + 0x0ca8,	0x0000004000000000ull);
5075 	out_be64(p->regs + 0x0cb0,	0x35777033ff000000ull); /* XXX CAPI has diff. value */
5076 	out_be64(p->regs + 0x0cc0,	0x0000000000000000ull);
5077 	out_be64(p->regs + 0x0cc8,	0x0000000000000000ull);
5078 	out_be64(p->regs + 0x0cd0,	0x0000000000000000ull);
5079 	out_be64(p->regs + 0x0cd8,	0x0000000000000000ull);
5080 
5081 	/* Init_118..121 - LEM */
5082 	out_be64(p->regs + 0x0c00,	0x0000000000000000ull);
5083 	if (phb4_is_dd20(p)) {
5084 		out_be64(p->regs + 0x0c30,	0xf3ffffffffffffffull);
5085 		out_be64(p->regs + 0x0c38,	0xf3ffffffffffffffull);
5086 	} else {
5087 		out_be64(p->regs + 0x0c30,	0xffffffffffffffffull);
5088 		out_be64(p->regs + 0x0c38,	0xffffffffffffffffull);
5089 	}
5090 	out_be64(p->regs + 0x0c40,	0x0000000000000000ull);
5091 }
5092 
5093 
phb4_wait_dlp_reset(struct phb4 * p)5094 static bool phb4_wait_dlp_reset(struct phb4 *p)
5095 {
5096 	unsigned int i;
5097 	uint64_t val;
5098 
5099 	/*
5100 	 * Firmware cannot access the UTL core regs or PCI config space
5101 	 * until the cores are out of DL_PGRESET.
5102 	 * DL_PGRESET should be polled until it is inactive with a value
5103 	 * of '0'. The recommended polling frequency is once every 1ms.
5104 	 * Firmware should poll at least 200 attempts before giving up.
5105 	 * MMIO Stores to the link are silently dropped by the UTL core if
5106 	 * the link is down.
5107 	 * MMIO Loads to the link will be dropped by the UTL core and will
5108 	 * eventually time-out and will return an all ones response if the
5109 	 * link is down.
5110 	 */
5111 #define DLP_RESET_ATTEMPTS	200
5112 
5113 	PHBDBG(p, "Waiting for DLP PG reset to complete...\n");
5114 	for (i = 0; i < DLP_RESET_ATTEMPTS; i++) {
5115 		val = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
5116 		if (!(val & PHB_PCIE_DLP_DL_PGRESET))
5117 			break;
5118 		time_wait_ms(1);
5119 	}
5120 	if (val & PHB_PCIE_DLP_DL_PGRESET) {
5121 		PHBERR(p, "Timeout waiting for DLP PG reset !\n");
5122 		return false;
5123 	}
5124 	return true;
5125 }
phb4_init_hw(struct phb4 * p)5126 static void phb4_init_hw(struct phb4 *p)
5127 {
5128 	uint64_t val, creset;
5129 
5130 	PHBDBG(p, "Initializing PHB4...\n");
5131 
5132 	/* Init_1 - Sync reset
5133 	 *
5134 	 * At this point we assume the PHB has already been reset.
5135 	 */
5136 
5137 	/* Init_2 - Mask FIRs */
5138 	out_be64(p->regs + PHB_LEM_ERROR_MASK,			0xffffffffffffffffull);
5139 
5140 	/* Init_3 - TCE tag enable */
5141 	out_be64(p->regs + PHB_TCE_TAG_ENABLE,			0xffffffffffffffffull);
5142 
5143 	/* Init_4 - PCIE System Configuration Register
5144 	 *
5145 	 * Adjust max speed based on system config
5146 	 */
5147 	val = in_be64(p->regs + PHB_PCIE_SCR);
5148 	PHBDBG(p, "Default system config: 0x%016llx\n", val);
5149 	val = SETFIELD(PHB_PCIE_SCR_MAXLINKSPEED, val, p->max_link_speed);
5150 	out_be64(p->regs + PHB_PCIE_SCR, val);
5151 	PHBDBG(p, "New system config    : 0x%016llx\n",
5152 	       in_be64(p->regs + PHB_PCIE_SCR));
5153 
5154 	/* Init_5 - deassert CFG reset */
5155 	creset = in_be64(p->regs + PHB_PCIE_CRESET);
5156 	PHBDBG(p, "Initial PHB CRESET is 0x%016llx\n", creset);
5157 	creset &= ~PHB_PCIE_CRESET_CFG_CORE;
5158 	out_be64(p->regs + PHB_PCIE_CRESET,			creset);
5159 
5160 	/* Init_6..13 - PCIE DLP Lane EQ control */
5161 	if (p->lane_eq) {
5162 		out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL0, be64_to_cpu(p->lane_eq[0]));
5163 		out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL1, be64_to_cpu(p->lane_eq[1]));
5164 		out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL2, be64_to_cpu(p->lane_eq[2]));
5165 		out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL3, be64_to_cpu(p->lane_eq[3]));
5166 		out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL20, be64_to_cpu(p->lane_eq[4]));
5167 		out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL21, be64_to_cpu(p->lane_eq[5]));
5168 	}
5169 	if (!p->lane_eq_en) {
5170 		/* Read modify write and set to 2 bits */
5171 		PHBDBG(p, "LINK: Disabling Lane EQ\n");
5172 		val = in_be64(p->regs + PHB_PCIE_DLP_CTL);
5173 		val |= PHB_PCIE_DLP_CTL_BYPASS_PH2 | PHB_PCIE_DLP_CTL_BYPASS_PH3;
5174 		out_be64(p->regs + PHB_PCIE_DLP_CTL, val);
5175 	}
5176 
5177 	/* Init_14 - Clear link training */
5178 	phb4_pcicfg_write32(&p->phb, 0, 0x78,
5179 			    0x07FE0000 | p->max_link_speed);
5180 
5181 	/* Init_15 - deassert cores reset */
5182 	/*
5183 	 * Lift the PHB resets but not PERST, this will be lifted
5184 	 * later by the initial PERST state machine
5185 	 */
5186 	creset &= ~(PHB_PCIE_CRESET_TLDLP | PHB_PCIE_CRESET_PBL);
5187 	creset |= PHB_PCIE_CRESET_PIPE_N;
5188 	out_be64(p->regs + PHB_PCIE_CRESET,			   creset);
5189 
5190 	/* Init_16 - Wait for DLP PGRESET to clear */
5191 	if (!phb4_wait_dlp_reset(p))
5192 		goto failed;
5193 
5194 	/* Init_17 - PHB Control */
5195 	val = PHB_CTRLR_IRQ_PGSZ_64K;
5196 	val |= SETFIELD(PHB_CTRLR_TVT_ADDR_SEL, 0ull, TVT_2_PER_PE);
5197 	if (PHB4_CAN_STORE_EOI(p))
5198 		val |= PHB_CTRLR_IRQ_STORE_EOI;
5199 
5200 	if (!pci_eeh_mmio)
5201 		val |= PHB_CTRLR_MMIO_EEH_DISABLE;
5202 
5203 	out_be64(p->regs + PHB_CTRLR, val);
5204 
5205 	/* Init_18..41 - Architected IODA3 inits */
5206 	phb4_init_ioda3(p);
5207 
5208 	/* Init_42..45 - Clear DLP error logs */
5209 	out_be64(p->regs + 0x1aa0,			0xffffffffffffffffull);
5210 	out_be64(p->regs + 0x1aa8,			0xffffffffffffffffull);
5211 	out_be64(p->regs + 0x1ab0,			0xffffffffffffffffull);
5212 	out_be64(p->regs + 0x1ab8,			0x0);
5213 
5214 
5215 	/* Init_46..54 : Init root complex config space */
5216 	if (!phb4_init_rc_cfg(p))
5217 		goto failed;
5218 
5219 	/* Init_55..121  : Setup error registers */
5220 	phb4_init_errors(p);
5221 
5222 	/* Init_122..123 : Wait for link
5223 	 * NOTE: At this point the spec waits for the link to come up. We
5224 	 * don't bother as we are doing a PERST soon.
5225 	 */
5226 
5227 	/* Init_124 :  NBW. XXX TODO */
5228 	/* See enable_capi_mode() */
5229 
5230 	/* Init_125 : Setup PCI command/status on root complex
5231 	 * I don't know why the spec does this now and not earlier, so
5232 	 * to be sure to get it right we might want to move it to the freset
5233 	 * state machine, though the generic PCI layer will probably do
5234 	 * this anyway (ie, enable MEM, etc... in the RC)
5235 
5236 	 */
5237 	phb4_pcicfg_write16(&p->phb, 0, PCI_CFG_CMD,
5238 			    PCI_CFG_CMD_MEM_EN |
5239 			    PCI_CFG_CMD_BUS_MASTER_EN);
5240 
5241 	/* Clear errors */
5242 	phb4_pcicfg_write16(&p->phb, 0, PCI_CFG_STAT,
5243 			    PCI_CFG_STAT_SENT_TABORT |
5244 			    PCI_CFG_STAT_RECV_TABORT |
5245 			    PCI_CFG_STAT_RECV_MABORT |
5246 			    PCI_CFG_STAT_SENT_SERR |
5247 			    PCI_CFG_STAT_RECV_PERR);
5248 
5249 	/* Init_126..130 - Re-enable error interrupts */
5250 	out_be64(p->regs + PHB_ERR_IRQ_ENABLE,			0xca8880cc00000000ull);
5251 	if (phb4_is_dd20(p))
5252 		out_be64(p->regs + PHB_TXE_ERR_IRQ_ENABLE,		0x2000400e08200000ull);
5253 	else
5254 		out_be64(p->regs + PHB_TXE_ERR_IRQ_ENABLE,		0x2008400e08200000ull);
5255 	out_be64(p->regs + PHB_RXE_ARB_ERR_IRQ_ENABLE,		0xc40038fc01804070ull);
5256 	out_be64(p->regs + PHB_RXE_MRG_ERR_IRQ_ENABLE,		0x00006100008000a8ull);
5257 	out_be64(p->regs + PHB_RXE_TCE_ERR_IRQ_ENABLE,	0x60510050c0000000ull);
5258 
5259 	/* Init_131 - Re-enable LEM error mask */
5260 	out_be64(p->regs + PHB_LEM_ERROR_MASK,			0x0000000000000000ull);
5261 
5262 
5263 	/* Init_132 - Enable DMA address speculation */
5264 	out_be64(p->regs + PHB_TCE_SPEC_CTL,			0x0000000000000000ull);
5265 
5266 	/* Init_133 - Timeout Control Register 1 */
5267 	out_be64(p->regs + PHB_TIMEOUT_CTRL1,			0x0015150000150000ull);
5268 
5269 	/* Init_134 - Timeout Control Register 2 */
5270 	out_be64(p->regs + PHB_TIMEOUT_CTRL2,			0x0000151500000000ull);
5271 
5272 	/* Init_135 - PBL Timeout Control Register */
5273 	out_be64(p->regs + PHB_PBL_TIMEOUT_CTRL,		0x2013000000000000ull);
5274 
5275 	/* Mark the PHB as functional which enables all the various sequences */
5276 	p->broken = false;
5277 
5278 	PHBDBG(p, "Initialization complete\n");
5279 
5280 	return;
5281 
5282  failed:
5283 	PHBERR(p, "Initialization failed\n");
5284 	p->broken = true;
5285 }
5286 
5287 /* FIXME: Use scoms rather than MMIO incase we are fenced */
phb4_read_capabilities(struct phb4 * p)5288 static bool phb4_read_capabilities(struct phb4 *p)
5289 {
5290 	uint64_t val;
5291 
5292 	/* XXX Should make sure ETU is out of reset ! */
5293 
5294 	/* Grab version and fit it in an int */
5295 	val = phb4_read_reg_asb(p, PHB_VERSION);
5296 	if (val == 0 || val == 0xffffffffffffffffUL) {
5297 		PHBERR(p, "Failed to read version, PHB appears broken\n");
5298 		return false;
5299 	}
5300 
5301 	p->rev = ((val >> 16) & 0x00ff0000) | (val & 0xffff);
5302 	PHBDBG(p, "Core revision 0x%x\n", p->rev);
5303 
5304 	/* Read EEH capabilities */
5305 	val = in_be64(p->regs + PHB_PHB4_EEH_CAP);
5306 	if (val == 0xffffffffffffffffUL) {
5307 		PHBERR(p, "Failed to read EEH cap, PHB appears broken\n");
5308 		return false;
5309 	}
5310 	p->max_num_pes = val >> 52;
5311 	if (p->max_num_pes >= 512) {
5312 		p->mrt_size = 16;
5313 		p->mbt_size = 32;
5314 		p->tvt_size = 1024;
5315 	} else {
5316 		p->mrt_size = 8;
5317 		p->mbt_size = 16;
5318 		p->tvt_size = 512;
5319 	}
5320 
5321 	val = in_be64(p->regs + PHB_PHB4_IRQ_CAP);
5322 	if (val == 0xffffffffffffffffUL) {
5323 		PHBERR(p, "Failed to read IRQ cap, PHB appears broken\n");
5324 		return false;
5325 	}
5326 	p->num_irqs = val & 0xffff;
5327 
5328 	/* This works for 512 PEs.  FIXME calculate for any hardware
5329 	 * size returned above
5330 	 */
5331 	p->tbl_peltv_size = PELTV_TABLE_SIZE_MAX;
5332 
5333 	p->tbl_pest_size = p->max_num_pes*16;
5334 
5335 	PHBDBG(p, "Found %d max PEs and %d IRQs \n",
5336 	       p->max_num_pes, p->num_irqs);
5337 
5338 	return true;
5339 }
5340 
phb4_allocate_tables(struct phb4 * p)5341 static void phb4_allocate_tables(struct phb4 *p)
5342 {
5343 	uint32_t i;
5344 
5345 	/* XXX Our current memalign implementation sucks,
5346 	 *
5347 	 * It will do the job, however it doesn't support freeing
5348 	 * the memory and wastes space by always allocating twice
5349 	 * as much as requested (size + alignment)
5350 	 */
5351 	p->tbl_rtt = local_alloc(p->chip_id, RTT_TABLE_SIZE, RTT_TABLE_SIZE);
5352 	assert(p->tbl_rtt);
5353 	for (i = 0; i < RTT_TABLE_ENTRIES; i++)
5354 		p->tbl_rtt[i] = PHB4_RESERVED_PE_NUM(p);
5355 
5356 	p->tbl_peltv = local_alloc(p->chip_id, p->tbl_peltv_size, p->tbl_peltv_size);
5357 	assert(p->tbl_peltv);
5358 	memset(p->tbl_peltv, 0, p->tbl_peltv_size);
5359 
5360 	p->tbl_pest = (uint64_t)local_alloc(p->chip_id, p->tbl_pest_size, p->tbl_pest_size);
5361 	assert(p->tbl_pest);
5362 	memset((void *)p->tbl_pest, 0, p->tbl_pest_size);
5363 }
5364 
phb4_add_properties(struct phb4 * p)5365 static void phb4_add_properties(struct phb4 *p)
5366 {
5367 	struct dt_node *np = p->phb.dt_node;
5368 	uint32_t lsibase, icsp = get_ics_phandle();
5369 	uint64_t m32b, m64b, m64s;
5370 
5371 	/* Add various properties that HB doesn't have to
5372 	 * add, some of them simply because they result from
5373 	 * policy decisions made in skiboot rather than in HB
5374 	 * such as the MMIO windows going to PCI, interrupts,
5375 	 * etc...
5376 	 */
5377 	dt_add_property_cells(np, "#address-cells", 3);
5378 	dt_add_property_cells(np, "#size-cells", 2);
5379 	dt_add_property_cells(np, "#interrupt-cells", 1);
5380 	dt_add_property_cells(np, "bus-range", 0, 0xff);
5381 	dt_add_property_cells(np, "clock-frequency", 0x200, 0); /* ??? */
5382 
5383 	dt_add_property_cells(np, "interrupt-parent", icsp);
5384 
5385 	/* XXX FIXME: add slot-name */
5386 	//dt_property_cell("bus-width", 8); /* Figure it out from VPD ? */
5387 
5388 	/* "ranges", we only expose M32 (PHB4 doesn't do IO)
5389 	 *
5390 	 * Note: The kernel expects us to have chopped of 64k from the
5391 	 * M32 size (for the 32-bit MSIs). If we don't do that, it will
5392 	 * get confused (OPAL does it)
5393 	 */
5394 	m32b = cleanup_addr(p->mm1_base);
5395 	m64b = cleanup_addr(p->mm0_base);
5396 	m64s = p->mm0_size;
5397 	dt_add_property_cells(np, "ranges",
5398 			      /* M32 space */
5399 			      0x02000000, 0x00000000, M32_PCI_START,
5400 			      hi32(m32b), lo32(m32b), 0, M32_PCI_SIZE - 0x10000);
5401 
5402 	/* XXX FIXME: add opal-memwin32, dmawins, etc... */
5403 	dt_add_property_u64s(np, "ibm,opal-m64-window", m64b, m64b, m64s);
5404 	dt_add_property(np, "ibm,opal-single-pe", NULL, 0);
5405 	dt_add_property_cells(np, "ibm,opal-num-pes", p->num_pes);
5406 	dt_add_property_cells(np, "ibm,opal-reserved-pe",
5407 			      PHB4_RESERVED_PE_NUM(p));
5408 	dt_add_property_cells(np, "ibm,opal-msi-ranges",
5409 			      p->base_msi, p->num_irqs - 8);
5410 	/* M64 ranges start at 1 as MBT0 is used for M32 */
5411 	dt_add_property_cells(np, "ibm,opal-available-m64-ranges",
5412 			      1, p->mbt_size - 1);
5413 	dt_add_property_cells(np, "ibm,supported-tce-sizes",
5414 			      12, // 4K
5415 			      16, // 64K
5416 			      21, // 2M
5417 			      30); // 1G
5418 
5419 	/* Tell Linux about alignment limits for segment splits.
5420 	 *
5421 	 * XXX We currently only expose splits of 1 and "num PEs",
5422 	 */
5423 	dt_add_property_cells(np, "ibm,opal-m64-segment-splits",
5424 			      /* Full split, number of segments: */
5425 			      p->num_pes,
5426 			      /* Encoding passed to the enable call */
5427 			      OPAL_ENABLE_M64_SPLIT,
5428 			      /* Alignement/size restriction in #bits*/
5429 			      /* XXX VERIFY VALUE */
5430 			      12,
5431 			      /* Unused */
5432 			      0,
5433 			      /* single PE, number of segments: */
5434 			      1,
5435 			      /* Encoding passed to the enable call */
5436 			      OPAL_ENABLE_M64_NON_SPLIT,
5437 			      /* Alignement/size restriction in #bits*/
5438 			      /* XXX VERIFY VALUE */
5439 			      12,
5440 			      /* Unused */
5441 			      0);
5442 
5443 	/* The interrupt maps will be generated in the RC node by the
5444 	 * PCI code based on the content of this structure:
5445 	 */
5446 	lsibase = p->base_lsi;
5447 	p->phb.lstate.int_size = 2;
5448 	p->phb.lstate.int_val[0][0] = lsibase + PHB4_LSI_PCIE_INTA;
5449 	p->phb.lstate.int_val[0][1] = 1;
5450 	p->phb.lstate.int_val[1][0] = lsibase + PHB4_LSI_PCIE_INTB;
5451 	p->phb.lstate.int_val[1][1] = 1;
5452 	p->phb.lstate.int_val[2][0] = lsibase + PHB4_LSI_PCIE_INTC;
5453 	p->phb.lstate.int_val[2][1] = 1;
5454 	p->phb.lstate.int_val[3][0] = lsibase + PHB4_LSI_PCIE_INTD;
5455 	p->phb.lstate.int_val[3][1] = 1;
5456 	p->phb.lstate.int_parent[0] = icsp;
5457 	p->phb.lstate.int_parent[1] = icsp;
5458 	p->phb.lstate.int_parent[2] = icsp;
5459 	p->phb.lstate.int_parent[3] = icsp;
5460 
5461 	/* Indicators for variable tables */
5462 	dt_add_property_cells(np, "ibm,opal-rtt-table",
5463 		hi32((u64) p->tbl_rtt), lo32((u64) p->tbl_rtt), RTT_TABLE_SIZE);
5464 
5465 	dt_add_property_cells(np, "ibm,opal-peltv-table",
5466 		hi32((u64) p->tbl_peltv), lo32((u64) p->tbl_peltv),
5467 		p->tbl_peltv_size);
5468 
5469 	dt_add_property_cells(np, "ibm,opal-pest-table",
5470 		hi32(p->tbl_pest), lo32(p->tbl_pest), p->tbl_pest_size);
5471 
5472 	dt_add_property_cells(np, "ibm,phb-diag-data-size",
5473 			      sizeof(struct OpalIoPhb4ErrorData));
5474 
5475 	/* Indicate to Linux that CAPP timebase sync is supported */
5476 	dt_add_property_string(np, "ibm,capp-timebase-sync", NULL);
5477 
5478 	/* Tell Linux Compare/Mask indication values */
5479 	dt_add_property_cells(np, "ibm,phb-indications", CAPIIND, ASNIND,
5480 			      NBWIND);
5481 }
5482 
phb4_calculate_windows(struct phb4 * p)5483 static bool phb4_calculate_windows(struct phb4 *p)
5484 {
5485 	const struct dt_property *prop;
5486 
5487 	/* Get PBCQ MMIO windows from device-tree */
5488 	prop = dt_require_property(p->phb.dt_node,
5489 				   "ibm,mmio-windows", -1);
5490 	assert(prop->len >= (2 * sizeof(uint64_t)));
5491 
5492 	p->mm0_base = ((const uint64_t *)prop->prop)[0];
5493 	p->mm0_size = ((const uint64_t *)prop->prop)[1];
5494 	if (prop->len > 16) {
5495 		p->mm1_base = ((const uint64_t *)prop->prop)[2];
5496 		p->mm1_size = ((const uint64_t *)prop->prop)[3];
5497 	}
5498 
5499 	/* Sort them so that 0 is big and 1 is small */
5500 	if (p->mm1_size && p->mm1_size > p->mm0_size) {
5501 		uint64_t b = p->mm0_base;
5502 		uint64_t s = p->mm0_size;
5503 		p->mm0_base = p->mm1_base;
5504 		p->mm0_size = p->mm1_size;
5505 		p->mm1_base = b;
5506 		p->mm1_size = s;
5507 	}
5508 
5509 	/* If 1 is too small, ditch it */
5510 	if (p->mm1_size < M32_PCI_SIZE)
5511 		p->mm1_size = 0;
5512 
5513 	/* If 1 doesn't exist, carve it out of 0 */
5514 	if (p->mm1_size == 0) {
5515 		p->mm0_size /= 2;
5516 		p->mm1_base = p->mm0_base + p->mm0_size;
5517 		p->mm1_size = p->mm0_size;
5518 	}
5519 
5520 	/* Crop mm1 to our desired size */
5521 	if (p->mm1_size > M32_PCI_SIZE)
5522 		p->mm1_size = M32_PCI_SIZE;
5523 
5524 	return true;
5525 }
5526 
phb4_err_interrupt(struct irq_source * is,uint32_t isn)5527 static void phb4_err_interrupt(struct irq_source *is, uint32_t isn)
5528 {
5529 	struct phb4 *p = is->data;
5530 
5531 	PHBDBG(p, "Got interrupt 0x%08x\n", isn);
5532 
5533 #if 0
5534 	/* Update pending event */
5535 	opal_update_pending_evt(OPAL_EVENT_PCI_ERROR,
5536 				OPAL_EVENT_PCI_ERROR);
5537 
5538 	/* If the PHB is broken, go away */
5539 	if (p->broken)
5540 		return;
5541 
5542 	/*
5543 	 * Mark the PHB has pending error so that the OS
5544 	 * can handle it at late point.
5545 	 */
5546 	phb3_set_err_pending(p, true);
5547 #endif
5548 }
5549 
phb4_lsi_attributes(struct irq_source * is __unused,uint32_t isn __unused)5550 static uint64_t phb4_lsi_attributes(struct irq_source *is __unused,
5551 				uint32_t isn __unused)
5552 {
5553 #ifndef DISABLE_ERR_INTS
5554 	struct phb4 *p = is->data;
5555 	uint32_t idx = isn - p->base_lsi;
5556 
5557 	if (idx == PHB4_LSI_PCIE_INF || idx == PHB4_LSI_PCIE_ER)
5558 		return IRQ_ATTR_TARGET_OPAL | IRQ_ATTR_TARGET_RARE | IRQ_ATTR_TYPE_LSI;
5559 #endif
5560 	return IRQ_ATTR_TARGET_LINUX;
5561 }
5562 
5563 static const struct irq_source_ops phb4_lsi_ops = {
5564 	.interrupt = phb4_err_interrupt,
5565 	.attributes = phb4_lsi_attributes,
5566 };
5567 
5568 #ifdef HAVE_BIG_ENDIAN
5569 static u64 lane_eq_default[8] = {
5570 	0x5454545454545454UL, 0x5454545454545454UL,
5571 	0x5454545454545454UL, 0x5454545454545454UL,
5572 	0x7777777777777777UL, 0x7777777777777777UL,
5573 	0x7777777777777777UL, 0x7777777777777777UL
5574 };
5575 #else
5576 #error lane_eq_default needs to be big endian (device tree property)
5577 #endif
5578 
phb4_create(struct dt_node * np)5579 static void phb4_create(struct dt_node *np)
5580 {
5581 	const struct dt_property *prop;
5582 	struct phb4 *p;
5583 	struct pci_slot *slot;
5584 	size_t lane_eq_len, lane_eq_len_req;
5585 	struct dt_node *iplp;
5586 	char *path;
5587 	uint32_t irq_base, irq_flags;
5588 	int i;
5589 	int chip_id;
5590 
5591 	chip_id = dt_prop_get_u32(np, "ibm,chip-id");
5592 	p = local_alloc(chip_id, sizeof(struct phb4), 8);
5593 	assert(p);
5594 	memset(p, 0x0, sizeof(struct phb4));
5595 
5596 	/* Populate base stuff */
5597 	p->index = dt_prop_get_u32(np, "ibm,phb-index");
5598 	p->chip_id = chip_id;
5599 	p->pec = dt_prop_get_u32(np, "ibm,phb-pec-index");
5600 	p->regs = (void *)dt_get_address(np, 0, NULL);
5601 	p->int_mmio = (void *)dt_get_address(np, 1, NULL);
5602 	p->phb.dt_node = np;
5603 	p->phb.ops = &phb4_ops;
5604 	p->phb.phb_type = phb_type_pcie_v4;
5605 	p->phb.scan_map = 0x1; /* Only device 0 to scan */
5606 
5607 	if (!phb4_calculate_windows(p))
5608 		return;
5609 
5610 	/* Get the various XSCOM register bases from the device-tree */
5611 	prop = dt_require_property(np, "ibm,xscom-bases", 5 * sizeof(uint32_t));
5612 	p->pe_xscom = ((const uint32_t *)prop->prop)[0];
5613 	p->pe_stk_xscom = ((const uint32_t *)prop->prop)[1];
5614 	p->pci_xscom = ((const uint32_t *)prop->prop)[2];
5615 	p->pci_stk_xscom = ((const uint32_t *)prop->prop)[3];
5616 	p->etu_xscom = ((const uint32_t *)prop->prop)[4];
5617 
5618 	/*
5619 	 * We skip the initial PERST assertion requested by the generic code
5620 	 * when doing a cold boot because we are coming out of cold boot already
5621 	 * so we save boot time that way. The PERST state machine will still
5622 	 * handle waiting for the link to come up, it will just avoid actually
5623 	 * asserting & deasserting the PERST output
5624 	 *
5625 	 * For a hot IPL, we still do a PERST
5626 	 *
5627 	 * Note: In absence of property (ie, FSP-less), we stick to the old
5628 	 * behaviour and set skip_perst to true
5629 	 */
5630 	p->skip_perst = true; /* Default */
5631 
5632 	iplp = dt_find_by_path(dt_root, "ipl-params/ipl-params");
5633 	if (iplp) {
5634 		const char *ipl_type = dt_prop_get_def(iplp, "cec-major-type", NULL);
5635 		if (ipl_type && (!strcmp(ipl_type, "hot")))
5636 			p->skip_perst = false;
5637 	}
5638 
5639 	/* By default link is assumed down */
5640 	p->has_link = false;
5641 
5642 	/* We register the PHB before we initialize it so we
5643 	 * get a useful OPAL ID for it
5644 	 */
5645 	pci_register_phb(&p->phb, phb4_get_opal_id(p->chip_id, p->index));
5646 
5647 	/* Create slot structure */
5648 	slot = phb4_slot_create(&p->phb);
5649 	if (!slot)
5650 		PHBERR(p, "Cannot create PHB slot\n");
5651 
5652 	/* Hello ! */
5653 	path = dt_get_path(np);
5654 	PHBINF(p, "Found %s @%p\n", path, p->regs);
5655 	PHBINF(p, "  M32 [0x%016llx..0x%016llx]\n",
5656 	       p->mm1_base, p->mm1_base + p->mm1_size - 1);
5657 	PHBINF(p, "  M64 [0x%016llx..0x%016llx]\n",
5658 	       p->mm0_base, p->mm0_base + p->mm0_size - 1);
5659 	free(path);
5660 
5661 	/* Find base location code from root node */
5662 	p->phb.base_loc_code = dt_prop_get_def(dt_root,
5663 					       "ibm,io-base-loc-code", NULL);
5664 	if (!p->phb.base_loc_code)
5665 		PHBDBG(p, "Base location code not found !\n");
5666 
5667 	/*
5668 	 * Grab CEC IO VPD load info from the root of the device-tree,
5669 	 * on P8 there's a single such VPD for the whole machine
5670 	 */
5671 	prop = dt_find_property(dt_root, "ibm,io-vpd");
5672 	if (!prop) {
5673 		/* LX VPD Lid not already loaded */
5674 		if (platform.vpd_iohub_load)
5675 			platform.vpd_iohub_load(dt_root);
5676 	}
5677 
5678 	/* Obtain informatin about the PHB from the hardware directly */
5679 	if (!phb4_read_capabilities(p))
5680 		goto failed;
5681 
5682 	p->max_link_speed = phb4_get_max_link_speed(p, np);
5683 	PHBINF(p, "Max link speed: GEN%i\n", p->max_link_speed);
5684 
5685 	/* Check for lane equalization values from HB or HDAT */
5686 	p->lane_eq_en = true;
5687 	p->lane_eq = dt_prop_get_def_size(np, "ibm,lane-eq", NULL, &lane_eq_len);
5688 	lane_eq_len_req = 6 * 8;
5689 	if (p->lane_eq) {
5690 		if (lane_eq_len < lane_eq_len_req) {
5691 			PHBERR(p, "Device-tree has ibm,lane-eq too short: %ld"
5692 			       " (want %ld)\n", lane_eq_len, lane_eq_len_req);
5693 			p->lane_eq = NULL;
5694 		}
5695 	} else {
5696 		PHBDBG(p, "Using default lane equalization settings\n");
5697 		p->lane_eq = lane_eq_default;
5698 	}
5699 	if (p->lane_eq) {
5700 		PHBDBG(p, "Override lane equalization settings:\n");
5701 		for (i = 0 ; i < lane_eq_len_req/(8 * 2) ; i++)
5702 			PHBDBG(p, "  0x%016llx 0x%016llx\n",
5703 			       be64_to_cpu(p->lane_eq[2 * i]),
5704 			       be64_to_cpu(p->lane_eq[2 * i + 1]));
5705 	}
5706 
5707 	/* Allocate a block of interrupts. We need to know if it needs
5708 	 * 2K or 4K interrupts ... for now we just use 4K but that
5709 	 * needs to be fixed
5710 	 */
5711 	irq_base = xive_alloc_hw_irqs(p->chip_id, p->num_irqs, p->num_irqs);
5712 	if (irq_base == XIVE_IRQ_ERROR) {
5713 		PHBERR(p, "Failed to allocate %d interrupt sources\n",
5714 		       p->num_irqs);
5715 		goto failed;
5716 	}
5717 	p->base_msi = irq_base;
5718 	p->base_lsi = irq_base + p->num_irqs - 8;
5719 	p->irq_port = xive_get_notify_port(p->chip_id,
5720 					   XIVE_HW_SRC_PHBn(p->index));
5721 	p->num_pes = p->max_num_pes;
5722 
5723 	/* Allocate the SkiBoot internal in-memory tables for the PHB */
5724 	phb4_allocate_tables(p);
5725 
5726 	phb4_add_properties(p);
5727 
5728 	/* Clear IODA3 cache */
5729 	phb4_init_ioda_cache(p);
5730 
5731 	/* Get the HW up and running */
5732 	phb4_init_hw(p);
5733 
5734 	/* init capp that might get attached to the phb */
5735 	phb4_init_capp(p);
5736 
5737 	/* Compute XIVE source flags depending on PHB revision */
5738 	irq_flags = 0;
5739 	if (PHB4_CAN_STORE_EOI(p))
5740 		irq_flags |= XIVE_SRC_STORE_EOI;
5741 	else
5742 		irq_flags |= XIVE_SRC_TRIGGER_PAGE;
5743 
5744 	/* Register all interrupt sources with XIVE */
5745 	xive_register_hw_source(p->base_msi, p->num_irqs - 8, 16,
5746 				p->int_mmio, irq_flags, NULL, NULL);
5747 
5748 	xive_register_hw_source(p->base_lsi, 8, 16,
5749 				p->int_mmio + ((p->num_irqs - 8) << 16),
5750 				XIVE_SRC_LSI | XIVE_SRC_SHIFT_BUG,
5751 				p,
5752 				&phb4_lsi_ops);
5753 
5754 	/* Platform additional setup */
5755 	if (platform.pci_setup_phb)
5756 		platform.pci_setup_phb(&p->phb, p->index);
5757 
5758 	dt_add_property_string(np, "status", "okay");
5759 
5760 	return;
5761 
5762  failed:
5763 	p->broken = true;
5764 
5765 	/* Tell Linux it's broken */
5766 	dt_add_property_string(np, "status", "error");
5767 }
5768 
phb4_probe_stack(struct dt_node * stk_node,uint32_t pec_index,uint32_t nest_base,uint32_t pci_base)5769 static void phb4_probe_stack(struct dt_node *stk_node, uint32_t pec_index,
5770 			     uint32_t nest_base, uint32_t pci_base)
5771 {
5772 	uint32_t pci_stack, nest_stack, etu_base, gcid, phb_num, stk_index;
5773 	uint64_t val, phb_bar = 0, irq_bar = 0, bar_en;
5774 	uint64_t mmio0_bar = 0, mmio0_bmask, mmio0_sz;
5775 	uint64_t mmio1_bar = 0, mmio1_bmask, mmio1_sz;
5776 	uint64_t reg[4];
5777 	void *foo;
5778 	uint64_t mmio_win[4];
5779 	unsigned int mmio_win_sz;
5780 	struct dt_node *np;
5781 	char *path;
5782 	uint64_t capp_ucode_base;
5783 	unsigned int max_link_speed;
5784 	int rc;
5785 
5786 	gcid = dt_get_chip_id(stk_node);
5787 	stk_index = dt_prop_get_u32(stk_node, "reg");
5788 	phb_num = dt_prop_get_u32(stk_node, "ibm,phb-index");
5789 	path = dt_get_path(stk_node);
5790 	prlog(PR_INFO, "PHB: Chip %d Found PHB4 PBCQ%d Stack %d at %s\n",
5791 	      gcid, pec_index, stk_index, path);
5792 	free(path);
5793 
5794 	pci_stack = pci_base + 0x40 * (stk_index + 1);
5795 	nest_stack = nest_base + 0x40 * (stk_index + 1);
5796 	etu_base = pci_base + 0x100 + 0x40 * stk_index;
5797 
5798 	prlog(PR_DEBUG, "PHB[%d:%d] X[PE]=0x%08x/0x%08x X[PCI]=0x%08x/0x%08x X[ETU]=0x%08x\n",
5799 	      gcid, phb_num, nest_base, nest_stack, pci_base, pci_stack, etu_base);
5800 
5801 	/* Default BAR enables */
5802 	bar_en = 0;
5803 
5804 	/* Initialize PHB register BAR */
5805 	phys_map_get(gcid, PHB4_REG_SPC, phb_num, &phb_bar, NULL);
5806 	rc = xscom_write(gcid, nest_stack + XPEC_NEST_STK_PHB_REG_BAR,
5807 			 phb_bar << 8);
5808 
5809 	/* A scom error here probably indicates a defective/garded PHB */
5810 	if (rc != OPAL_SUCCESS) {
5811 		prerror("PHB[%d:%d] Unable to set PHB BAR. Error=%d\n",
5812 		      gcid, phb_num, rc);
5813 		return;
5814 	}
5815 
5816 	bar_en |= XPEC_NEST_STK_BAR_EN_PHB;
5817 
5818 	/* Same with INT BAR (ESB) */
5819 	phys_map_get(gcid, PHB4_XIVE_ESB, phb_num, &irq_bar, NULL);
5820 	xscom_write(gcid, nest_stack + XPEC_NEST_STK_IRQ_BAR, irq_bar << 8);
5821 	bar_en |= XPEC_NEST_STK_BAR_EN_INT;
5822 
5823 
5824 	/* Same with MMIO windows */
5825 	phys_map_get(gcid, PHB4_64BIT_MMIO, phb_num, &mmio0_bar, &mmio0_sz);
5826 	mmio0_bmask =  (~(mmio0_sz - 1)) & 0x00FFFFFFFFFFFFFFULL;
5827 	xscom_write(gcid, nest_stack + XPEC_NEST_STK_MMIO_BAR0, mmio0_bar << 8);
5828 	xscom_write(gcid, nest_stack + XPEC_NEST_STK_MMIO_BAR0_MASK, mmio0_bmask << 8);
5829 
5830 	phys_map_get(gcid, PHB4_32BIT_MMIO, phb_num, &mmio1_bar, &mmio1_sz);
5831 	mmio1_bmask =  (~(mmio1_sz - 1)) & 0x00FFFFFFFFFFFFFFULL;
5832 	xscom_write(gcid, nest_stack + XPEC_NEST_STK_MMIO_BAR1, mmio1_bar << 8);
5833 	xscom_write(gcid, nest_stack + XPEC_NEST_STK_MMIO_BAR1_MASK, mmio1_bmask << 8);
5834 
5835 	/* Build MMIO windows list */
5836 	mmio_win_sz = 0;
5837 	if (mmio0_bar) {
5838 		mmio_win[mmio_win_sz++] = mmio0_bar;
5839 		mmio_win[mmio_win_sz++] = mmio0_sz;
5840 		bar_en |= XPEC_NEST_STK_BAR_EN_MMIO0;
5841 	}
5842 	if (mmio1_bar) {
5843 		mmio_win[mmio_win_sz++] = mmio1_bar;
5844 		mmio_win[mmio_win_sz++] = mmio1_sz;
5845 		bar_en |= XPEC_NEST_STK_BAR_EN_MMIO1;
5846 	}
5847 
5848 	/* Set the appropriate enables */
5849 	xscom_read(gcid, nest_stack + XPEC_NEST_STK_BAR_EN, &val);
5850 	val |= bar_en;
5851 	xscom_write(gcid, nest_stack + XPEC_NEST_STK_BAR_EN, val);
5852 
5853 	/* No MMIO windows ? Barf ! */
5854 	if (mmio_win_sz == 0) {
5855 		prerror("PHB[%d:%d] No MMIO windows enabled !\n", gcid, phb_num);
5856 		return;
5857 	}
5858 
5859 	/* Clear errors in PFIR and NFIR */
5860 	xscom_write(gcid, pci_stack + XPEC_PCI_STK_PCI_FIR, 0);
5861 	xscom_write(gcid, nest_stack + XPEC_NEST_STK_PCI_NFIR, 0);
5862 
5863 	/* Check ETU reset */
5864 	xscom_read(gcid, pci_stack + XPEC_PCI_STK_ETU_RESET, &val);
5865 	prlog_once(PR_DEBUG, "ETU reset: %llx\n", val);
5866 	xscom_write(gcid, pci_stack + XPEC_PCI_STK_ETU_RESET, 0);
5867 	time_wait_ms(1);
5868 
5869 	// show we can read phb mmio space
5870 	foo = (void *)(phb_bar + 0x800); // phb version register
5871 	prlog_once(PR_DEBUG, "Version reg: 0x%016llx\n", in_be64(foo));
5872 
5873 	/* Create PHB node */
5874 	reg[0] = phb_bar;
5875 	reg[1] = 0x1000;
5876 	reg[2] = irq_bar;
5877 	reg[3] = 0x10000000;
5878 
5879 	np = dt_new_addr(dt_root, "pciex", reg[0]);
5880 	if (!np)
5881 		return;
5882 
5883 	dt_add_property_strings(np, "compatible", "ibm,power9-pciex", "ibm,ioda3-phb");
5884 	dt_add_property_strings(np, "device_type", "pciex");
5885 	dt_add_property(np, "reg", reg, sizeof(reg));
5886 
5887 	/* Everything else is handled later by skiboot, we just
5888 	 * stick a few hints here
5889 	 */
5890 	dt_add_property_cells(np, "ibm,xscom-bases",
5891 			      nest_base, nest_stack, pci_base, pci_stack, etu_base);
5892 	dt_add_property(np, "ibm,mmio-windows", mmio_win, 8 * mmio_win_sz);
5893 	dt_add_property_cells(np, "ibm,phb-index", phb_num);
5894 	dt_add_property_cells(np, "ibm,phb-pec-index", pec_index);
5895 	dt_add_property_cells(np, "ibm,phb-stack", stk_node->phandle);
5896 	dt_add_property_cells(np, "ibm,phb-stack-index", stk_index);
5897 	dt_add_property_cells(np, "ibm,chip-id", gcid);
5898 
5899 	/* read the hub-id out of the pbcq node */
5900 	if (dt_has_node_property(stk_node->parent, "ibm,hub-id", NULL)) {
5901 		uint32_t hub_id;
5902 
5903 		hub_id = dt_prop_get_u32(stk_node->parent, "ibm,hub-id");
5904 		dt_add_property_cells(np, "ibm,hub-id", hub_id);
5905 	}
5906 
5907 	if (dt_has_node_property(stk_node->parent, "ibm,loc-code", NULL)) {
5908 		const char *lc = dt_prop_get(stk_node->parent, "ibm,loc-code");
5909 		dt_add_property_string(np, "ibm,loc-code", lc);
5910 	}
5911 	if (dt_has_node_property(stk_node, "ibm,lane-eq", NULL)) {
5912 		size_t leq_size;
5913 		const void *leq = dt_prop_get_def_size(stk_node, "ibm,lane-eq",
5914 						       NULL, &leq_size);
5915 		if (leq != NULL && leq_size >= 6 * 8)
5916 			dt_add_property(np, "ibm,lane-eq", leq, leq_size);
5917 	}
5918 	if (dt_has_node_property(stk_node, "ibm,capp-ucode", NULL)) {
5919 		capp_ucode_base = dt_prop_get_u32(stk_node, "ibm,capp-ucode");
5920 		dt_add_property_cells(np, "ibm,capp-ucode", capp_ucode_base);
5921 	}
5922 	if (dt_has_node_property(stk_node, "ibm,max-link-speed", NULL)) {
5923 		max_link_speed = dt_prop_get_u32(stk_node, "ibm,max-link-speed");
5924 		dt_add_property_cells(np, "ibm,max-link-speed", max_link_speed);
5925 	}
5926 	dt_add_property_cells(np, "ibm,capi-flags",
5927 			      OPAL_PHB_CAPI_FLAG_SNOOP_CONTROL);
5928 
5929 	add_chip_dev_associativity(np);
5930 }
5931 
phb4_probe_pbcq(struct dt_node * pbcq)5932 static void phb4_probe_pbcq(struct dt_node *pbcq)
5933 {
5934 	uint32_t nest_base, pci_base, pec_index;
5935 	struct dt_node *stk;
5936 
5937 	/* REMOVEME: force this for now until we stabalise PCIe */
5938 	verbose_eeh = 1;
5939 
5940 	nest_base = dt_get_address(pbcq, 0, NULL);
5941 	pci_base = dt_get_address(pbcq, 1, NULL);
5942 	pec_index = dt_prop_get_u32(pbcq, "ibm,pec-index");
5943 
5944 	dt_for_each_child(pbcq, stk) {
5945 		if (dt_node_is_enabled(stk))
5946 			phb4_probe_stack(stk, pec_index, nest_base, pci_base);
5947 	}
5948 }
5949 
probe_phb4(void)5950 void probe_phb4(void)
5951 {
5952 	struct dt_node *np;
5953 	const char *s;
5954 
5955 	pci_tracing = nvram_query_eq_safe("pci-tracing", "true");
5956 	pci_eeh_mmio = !nvram_query_eq_dangerous("pci-eeh-mmio", "disabled");
5957 	pci_retry_all = nvram_query_eq_dangerous("pci-retry-all", "true");
5958 	s = nvram_query_dangerous("phb-rx-err-max");
5959 	if (s) {
5960 		rx_err_max = atoi(s);
5961 
5962 		/* Clip to uint8_t used by hardware */
5963 		rx_err_max = MAX(rx_err_max, 0);
5964 		rx_err_max = MIN(rx_err_max, 255);
5965 	}
5966 	prlog(PR_DEBUG, "PHB4: Maximum RX errors during training: %d\n", rx_err_max);
5967 	/* Look for PBCQ XSCOM nodes */
5968 	dt_for_each_compatible(dt_root, np, "ibm,power9-pbcq")
5969 		phb4_probe_pbcq(np);
5970 
5971 	/* Look for newly created PHB nodes */
5972 	dt_for_each_compatible(dt_root, np, "ibm,power9-pciex")
5973 		phb4_create(np);
5974 }
5975