1 // SPDX-License-Identifier: Apache-2.0 OR GPL-2.0-or-later
2 /*
3 * PHB4: PCI Host Bridge 4, in POWER9
4 *
5 * Copyright 2013-2019 IBM Corp.
6 * Copyright 2018 Raptor Engineering, LLC
7 */
8
9 /*
10 *
11 * FIXME:
12 * More stuff for EEH support:
13 * - PBCQ error reporting interrupt
14 * - I2C-based power management (replacing SHPC)
15 * - Directly detect fenced PHB through one dedicated HW reg
16 */
17
18 /*
19 * This is a simplified view of the PHB4 reset and link training steps
20 *
21 * Step 1:
22 * - Check for hotplug status:
23 * o PHB_PCIE_HOTPLUG_STATUS bit PHB_PCIE_HPSTAT_PRESENCE
24 * o If not set -> Bail out (Slot is empty)
25 *
26 * Step 2:
27 * - Do complete PHB reset:
28 * o PHB/ETU reset procedure
29 *
30 * Step 3:
31 * - Drive PERST active (skip if already asserted. ie. after cold reboot)
32 * - Wait 250ms (for cards to reset)
33 * o powervm have used 250ms for a long time without any problems
34 *
35 * Step 4:
36 * - Drive PERST inactive
37 *
38 * Step 5:
39 * - Look for inband presence:
40 * o From PERST we have two stages to get inband presence detected
41 * 1) Devices must enter Detect state within 20 ms of the end of
42 * Fundamental Reset
43 * 2) Receiver detect pulse are every 12ms
44 * - Hence minimum wait time 20 + 12 = 32ms
45 * o Unfortunatey, we've seen cards take 440ms
46 * o Hence we are conservative and poll here for 1000ms (> 440ms)
47 * - If no inband presence after 100ms -> Bail out (Slot is broken)
48 * o PHB_PCIE_DLP_TRAIN_CTL bit PHB_PCIE_DLP_INBAND_PRESENCE
49 *
50 * Step 6:
51 * - Look for link training done:
52 * o PHB_PCIE_DLP_TRAIN_CTL bit PHB_PCIE_DLP_TL_LINKACT
53 * - If not set after 2000ms, Retry (3 times) -> Goto Step 2
54 * o phy lockup could link training failure, hence going back to a
55 * complete PHB reset on retry
56 * o not expect to happen very often
57 *
58 * Step 7:
59 * - Wait for 1 sec (before touching device config space):
60 * - From PCIe spec:
61 * Root Complex and/or system software must allow at least 1.0 s after
62 * a Conventional Reset of a device, before it may determine that a
63 * device which fails to return a Successful Completion status for a
64 * valid Configuration Request is a broken device.
65 *
66 * Step 8:
67 * - Sanity check for fence and link still up:
68 * o If fenced or link down, Retry (3 times) -> Goto Step 2
69 * o This is not nessary but takes no time and can be useful
70 * o Once we leave here, much harder to recover from errors
71 *
72 * Step 9:
73 * - Check for optimised link for directly attached devices:
74 * o Wait for CRS (so we can read device config space)
75 * o Check chip and device are in allowlist. if not, Goto Step 10
76 * o If trained link speed is degraded, retry -> Goto Step 2
77 * o If trained link width is degraded, retry -> Goto Step 2
78 * o If still degraded after 3 retries. Give up, Goto Step 10.
79 *
80 * Step 10:
81 * - PHB good, start probing config space.
82 * o core/pci.c: pci_reset_phb() -> pci_scan_phb()
83 */
84
85
86 #undef NO_ASB
87 #undef LOG_CFG
88
89 #include <skiboot.h>
90 #include <io.h>
91 #include <timebase.h>
92 #include <pci.h>
93 #include <pci-cfg.h>
94 #include <pci-slot.h>
95 #include <vpd.h>
96 #include <interrupts.h>
97 #include <opal.h>
98 #include <cpu.h>
99 #include <device.h>
100 #include <ccan/str/str.h>
101 #include <ccan/array_size/array_size.h>
102 #include <xscom.h>
103 #include <affinity.h>
104 #include <phb4.h>
105 #include <phb4-regs.h>
106 #include <phb4-capp.h>
107 #include <capp.h>
108 #include <fsp.h>
109 #include <chip.h>
110 #include <chiptod.h>
111 #include <xive.h>
112 #include <xscom-p9-regs.h>
113 #include <phys-map.h>
114 #include <nvram.h>
115
116 /* Enable this to disable error interrupts for debug purposes */
117 #undef DISABLE_ERR_INTS
118
119 static void phb4_init_hw(struct phb4 *p);
120
121 #define PHBDBG(p, fmt, a...) prlog(PR_DEBUG, "PHB#%04x[%d:%d]: " fmt, \
122 (p)->phb.opal_id, (p)->chip_id, \
123 (p)->index, ## a)
124 #define PHBINF(p, fmt, a...) prlog(PR_INFO, "PHB#%04x[%d:%d]: " fmt, \
125 (p)->phb.opal_id, (p)->chip_id, \
126 (p)->index, ## a)
127 #define PHBNOTICE(p, fmt, a...) prlog(PR_NOTICE, "PHB#%04x[%d:%d]: " fmt, \
128 (p)->phb.opal_id, (p)->chip_id, \
129 (p)->index, ## a)
130 #define PHBERR(p, fmt, a...) prlog(PR_ERR, "PHB#%04x[%d:%d]: " fmt, \
131 (p)->phb.opal_id, (p)->chip_id, \
132 (p)->index, ## a)
133 #ifdef LOG_CFG
134 #define PHBLOGCFG(p, fmt, a...) PHBDBG(p, fmt, ## a)
135 #else
136 #define PHBLOGCFG(p, fmt, a...) do {} while (0)
137 #endif
138
139 static bool pci_eeh_mmio;
140 static bool pci_retry_all;
141 static int rx_err_max = PHB4_RX_ERR_MAX;
142
is_phb4(void)143 static inline bool is_phb4(void)
144 {
145 return (proc_gen == proc_gen_p9);
146 }
147
is_phb5(void)148 static inline bool is_phb5(void)
149 {
150 return (proc_gen == proc_gen_p10);
151 }
152
153 /* PQ offloading on the XIVE IC. */
phb_pq_disable(struct phb4 * p __unused)154 static inline bool phb_pq_disable(struct phb4 *p __unused)
155 {
156 if (is_phb5())
157 return xive2_cap_phb_pq_disable();
158
159 return false;
160 }
161
162 /*
163 * Use the ESB page of the XIVE IC for event notification. Latency
164 * improvement.
165 */
phb_abt_mode(struct phb4 * p __unused)166 static inline bool phb_abt_mode(struct phb4 *p __unused)
167 {
168 if (is_phb5())
169 return xive2_cap_phb_abt();
170
171 return false;
172 }
173
phb_can_store_eoi(struct phb4 * p)174 static inline bool phb_can_store_eoi(struct phb4 *p)
175 {
176 if (is_phb5())
177 /* PQ offloading is required for StoreEOI */
178 return XIVE2_STORE_EOI_ENABLED && phb_pq_disable(p);
179
180 return XIVE_STORE_EOI_ENABLED;
181 }
182
183 /* Note: The "ASB" name is historical, practically this means access via
184 * the XSCOM backdoor
185 */
phb4_read_reg_asb(struct phb4 * p,uint32_t offset)186 static inline uint64_t phb4_read_reg_asb(struct phb4 *p, uint32_t offset)
187 {
188 #ifdef NO_ASB
189 return in_be64(p->regs + offset);
190 #else
191 int64_t rc;
192 uint64_t addr, val;
193
194 /* Address register: must use 4 bytes for built-in config space.
195 *
196 * This path isn't usable for outbound configuration space
197 */
198 if (((offset & 0xfffffffc) == PHB_CONFIG_DATA) && (offset & 3)) {
199 PHBERR(p, "XSCOM unaligned access to CONFIG_DATA unsupported\n");
200 return -1ull;
201 }
202 addr = XETU_HV_IND_ADDR_VALID | offset;
203 if ((offset >= 0x1000 && offset < 0x1800) || (offset == PHB_CONFIG_DATA))
204 addr |= XETU_HV_IND_ADDR_4B;
205 rc = xscom_write(p->chip_id, p->etu_xscom + XETU_HV_IND_ADDRESS, addr);
206 if (rc != 0) {
207 PHBERR(p, "XSCOM error addressing register 0x%x\n", offset);
208 return -1ull;
209 }
210 rc = xscom_read(p->chip_id, p->etu_xscom + XETU_HV_IND_DATA, &val);
211 if (rc != 0) {
212 PHBERR(p, "XSCOM error reading register 0x%x\n", offset);
213 return -1ull;
214 }
215 return val;
216 #endif
217 }
218
phb4_write_reg_asb(struct phb4 * p,uint32_t offset,uint64_t val)219 static inline void phb4_write_reg_asb(struct phb4 *p,
220 uint32_t offset, uint64_t val)
221 {
222 #ifdef NO_ASB
223 out_be64(p->regs + offset, val);
224 #else
225 int64_t rc;
226 uint64_t addr;
227
228 /* Address register: must use 4 bytes for built-in config space.
229 *
230 * This path isn't usable for outbound configuration space
231 */
232 if (((offset & 0xfffffffc) == PHB_CONFIG_DATA) && (offset & 3)) {
233 PHBERR(p, "XSCOM access to CONFIG_DATA unsupported\n");
234 return;
235 }
236 addr = XETU_HV_IND_ADDR_VALID | offset;
237 if ((offset >= 0x1000 && offset < 0x1800) || (offset == PHB_CONFIG_DATA))
238 addr |= XETU_HV_IND_ADDR_4B;
239 rc = xscom_write(p->chip_id, p->etu_xscom + XETU_HV_IND_ADDRESS, addr);
240 if (rc != 0) {
241 PHBERR(p, "XSCOM error addressing register 0x%x\n", offset);
242 return;
243 }
244 rc = xscom_write(p->chip_id, p->etu_xscom + XETU_HV_IND_DATA, val);
245 if (rc != 0) {
246 PHBERR(p, "XSCOM error writing register 0x%x\n", offset);
247 return;
248 }
249 #endif
250 }
251
phb4_read_reg(struct phb4 * p,uint32_t offset)252 static uint64_t phb4_read_reg(struct phb4 *p, uint32_t offset)
253 {
254 /* No register accesses are permitted while in reset */
255 if (p->flags & PHB4_ETU_IN_RESET)
256 return -1ull;
257
258 if (p->flags & PHB4_CFG_USE_ASB)
259 return phb4_read_reg_asb(p, offset);
260 else
261 return in_be64(p->regs + offset);
262 }
263
phb4_write_reg(struct phb4 * p,uint32_t offset,uint64_t val)264 static void phb4_write_reg(struct phb4 *p, uint32_t offset, uint64_t val)
265 {
266 /* No register accesses are permitted while in reset */
267 if (p->flags & PHB4_ETU_IN_RESET)
268 return;
269
270 if (p->flags & PHB4_CFG_USE_ASB)
271 phb4_write_reg_asb(p, offset, val);
272 else
273 return out_be64(p->regs + offset, val);
274 }
275
276 /* Helper to select an IODA table entry */
phb4_ioda_sel(struct phb4 * p,uint32_t table,uint32_t addr,bool autoinc)277 static inline void phb4_ioda_sel(struct phb4 *p, uint32_t table,
278 uint32_t addr, bool autoinc)
279 {
280 phb4_write_reg(p, PHB_IODA_ADDR,
281 (autoinc ? PHB_IODA_AD_AUTOINC : 0) |
282 SETFIELD(PHB_IODA_AD_TSEL, 0ul, table) |
283 SETFIELD(PHB_IODA_AD_TADR, 0ul, addr));
284 }
285
286 /*
287 * Configuration space access
288 *
289 * The PHB lock is assumed to be already held
290 */
phb4_pcicfg_check(struct phb4 * p,uint32_t bdfn,uint32_t offset,uint32_t size,uint16_t * pe)291 static int64_t phb4_pcicfg_check(struct phb4 *p, uint32_t bdfn,
292 uint32_t offset, uint32_t size,
293 uint16_t *pe)
294 {
295 uint32_t sm = size - 1;
296
297 if (offset > 0xfff || bdfn > 0xffff)
298 return OPAL_PARAMETER;
299 if (offset & sm)
300 return OPAL_PARAMETER;
301
302 /* The root bus only has a device at 0 and we get into an
303 * error state if we try to probe beyond that, so let's
304 * avoid that and just return an error to Linux
305 */
306 if (PCI_BUS_NUM(bdfn) == 0 && (bdfn & 0xff))
307 return OPAL_HARDWARE;
308
309 /* Check PHB state */
310 if (p->broken)
311 return OPAL_HARDWARE;
312
313 /* Fetch the PE# from cache */
314 *pe = be16_to_cpu(p->tbl_rtt[bdfn]);
315
316 return OPAL_SUCCESS;
317 }
318
phb4_rc_read(struct phb4 * p,uint32_t offset,uint8_t sz,void * data,bool use_asb)319 static int64_t phb4_rc_read(struct phb4 *p, uint32_t offset, uint8_t sz,
320 void *data, bool use_asb)
321 {
322 uint32_t reg = offset & ~3;
323 uint32_t oval;
324
325 /* Some registers are handled locally */
326 switch (reg) {
327 /* Bridge base/limit registers are cached here as HW
328 * doesn't implement them (it hard codes values that
329 * will confuse a proper PCI implementation).
330 */
331 case PCI_CFG_MEM_BASE: /* Includes PCI_CFG_MEM_LIMIT */
332 oval = p->rc_cache[(reg - 0x20) >> 2] & 0xfff0fff0;
333 break;
334 case PCI_CFG_PREF_MEM_BASE: /* Includes PCI_CFG_PREF_MEM_LIMIT */
335 oval = p->rc_cache[(reg - 0x20) >> 2] & 0xfff0fff0;
336 oval |= 0x00010001;
337 break;
338 case PCI_CFG_IO_BASE_U16: /* Includes PCI_CFG_IO_LIMIT_U16 */
339 oval = 0;
340 break;
341 case PCI_CFG_PREF_MEM_BASE_U32:
342 case PCI_CFG_PREF_MEM_LIMIT_U32:
343 oval = p->rc_cache[(reg - 0x20) >> 2];
344 break;
345 default:
346 oval = 0xffffffff; /* default if offset too big */
347 if (reg < PHB_RC_CONFIG_SIZE) {
348 if (use_asb)
349 oval = bswap_32(phb4_read_reg_asb(p, PHB_RC_CONFIG_BASE
350 + reg));
351 else
352 oval = in_le32(p->regs + PHB_RC_CONFIG_BASE + reg);
353 }
354 }
355
356 /* Apply any post-read fixups */
357 switch (reg) {
358 case PCI_CFG_IO_BASE:
359 oval |= 0x01f1; /* Set IO base < limit to disable the window */
360 break;
361 }
362
363 switch (sz) {
364 case 1:
365 offset &= 3;
366 *((uint8_t *)data) = (oval >> (offset << 3)) & 0xff;
367 PHBLOGCFG(p, "000 CFG08 Rd %02x=%02x\n",
368 offset, *((uint8_t *)data));
369 break;
370 case 2:
371 offset &= 2;
372 *((uint16_t *)data) = (oval >> (offset << 3)) & 0xffff;
373 PHBLOGCFG(p, "000 CFG16 Rd %02x=%04x\n",
374 offset, *((uint16_t *)data));
375 break;
376 case 4:
377 *((uint32_t *)data) = oval;
378 PHBLOGCFG(p, "000 CFG32 Rd %02x=%08x\n",
379 offset, *((uint32_t *)data));
380 break;
381 default:
382 assert(false);
383 }
384 return OPAL_SUCCESS;
385 }
386
phb4_rc_write(struct phb4 * p,uint32_t offset,uint8_t sz,uint32_t val,bool use_asb)387 static int64_t phb4_rc_write(struct phb4 *p, uint32_t offset, uint8_t sz,
388 uint32_t val, bool use_asb)
389 {
390 uint32_t reg = offset & ~3;
391 uint32_t old, mask, shift, oldold;
392 int64_t rc;
393
394 if (reg > PHB_RC_CONFIG_SIZE)
395 return OPAL_SUCCESS;
396
397 /* If size isn't 4-bytes, do a RMW cycle */
398 if (sz < 4) {
399 rc = phb4_rc_read(p, reg, 4, &old, use_asb);
400 if (rc != OPAL_SUCCESS)
401 return rc;
402
403 /*
404 * Since we have to Read-Modify-Write here, we need to filter
405 * out registers that have write-1-to-clear bits to prevent
406 * clearing stuff we shouldn't be. So for any register this
407 * applies to, mask out those bits.
408 */
409 oldold = old;
410 switch(reg) {
411 case 0x1C: /* Secondary status */
412 old &= 0x00ffffff; /* mask out 24-31 */
413 break;
414 case 0x50: /* EC - Device status */
415 old &= 0xfff0ffff; /* mask out 16-19 */
416 break;
417 case 0x58: /* EC - Link status */
418 old &= 0x3fffffff; /* mask out 30-31 */
419 break;
420 case 0x78: /* EC - Link status 2 */
421 old &= 0xf000ffff; /* mask out 16-27 */
422 break;
423 /* These registers *only* have write-1-to-clear bits */
424 case 0x104: /* AER - Uncorr. error status */
425 case 0x110: /* AER - Corr. error status */
426 case 0x130: /* AER - Root error status */
427 case 0x180: /* P16 - status */
428 case 0x184: /* P16 - LDPM status */
429 case 0x188: /* P16 - FRDPM status */
430 case 0x18C: /* P16 - SRDPM status */
431 old &= 0x00000000;
432 break;
433 }
434
435 if (old != oldold) {
436 PHBLOGCFG(p, "Rewrote %x to %x for reg %x for W1C\n",
437 oldold, old, reg);
438 }
439
440 if (sz == 1) {
441 shift = (offset & 3) << 3;
442 mask = 0xff << shift;
443 val = (old & ~mask) | ((val & 0xff) << shift);
444 } else {
445 shift = (offset & 2) << 3;
446 mask = 0xffff << shift;
447 val = (old & ~mask) | ((val & 0xffff) << shift);
448 }
449 }
450
451 /* Some registers are handled locally */
452 switch (reg) {
453 /* See comment in phb4_rc_read() */
454 case PCI_CFG_MEM_BASE: /* Includes PCI_CFG_MEM_LIMIT */
455 case PCI_CFG_PREF_MEM_BASE: /* Includes PCI_CFG_PREF_MEM_LIMIT */
456 case PCI_CFG_PREF_MEM_BASE_U32:
457 case PCI_CFG_PREF_MEM_LIMIT_U32:
458 p->rc_cache[(reg - 0x20) >> 2] = val;
459 break;
460 case PCI_CFG_IO_BASE_U16: /* Includes PCI_CFG_IO_LIMIT_U16 */
461 break;
462 default:
463 /* Workaround PHB config space enable */
464 PHBLOGCFG(p, "000 CFG%02d Wr %02x=%08x\n", 8 * sz, reg, val);
465 if (use_asb)
466 phb4_write_reg_asb(p, PHB_RC_CONFIG_BASE + reg, val);
467 else
468 out_le32(p->regs + PHB_RC_CONFIG_BASE + reg, val);
469 }
470 return OPAL_SUCCESS;
471 }
472
phb4_pcicfg_read(struct phb4 * p,uint32_t bdfn,uint32_t offset,uint32_t size,void * data)473 static int64_t phb4_pcicfg_read(struct phb4 *p, uint32_t bdfn,
474 uint32_t offset, uint32_t size,
475 void *data)
476 {
477 uint64_t addr, val64;
478 int64_t rc;
479 uint16_t pe;
480 bool use_asb = false;
481
482 rc = phb4_pcicfg_check(p, bdfn, offset, size, &pe);
483 if (rc)
484 return rc;
485
486 if (p->flags & PHB4_AIB_FENCED) {
487 if (!(p->flags & PHB4_CFG_USE_ASB))
488 return OPAL_HARDWARE;
489 if (bdfn != 0)
490 return OPAL_HARDWARE;
491 use_asb = true;
492 } else if ((p->flags & PHB4_CFG_BLOCKED) && bdfn != 0) {
493 return OPAL_HARDWARE;
494 }
495
496 /* Handle per-device filters */
497 rc = pci_handle_cfg_filters(&p->phb, bdfn, offset, size,
498 (uint32_t *)data, false);
499 if (rc != OPAL_PARTIAL)
500 return rc;
501
502 /* Handle root complex MMIO based config space */
503 if (bdfn == 0)
504 return phb4_rc_read(p, offset, size, data, use_asb);
505
506 addr = PHB_CA_ENABLE;
507 addr = SETFIELD(PHB_CA_BDFN, addr, bdfn);
508 addr = SETFIELD(PHB_CA_REG, addr, offset & ~3u);
509 addr = SETFIELD(PHB_CA_PE, addr, pe);
510 if (use_asb) {
511 phb4_write_reg_asb(p, PHB_CONFIG_ADDRESS, addr);
512 sync();
513 val64 = bswap_64(phb4_read_reg_asb(p, PHB_CONFIG_DATA));
514 switch(size) {
515 case 1:
516 *((uint8_t *)data) = val64 >> (8 * (offset & 3));
517 break;
518 case 2:
519 *((uint16_t *)data) = val64 >> (8 * (offset & 2));
520 break;
521 case 4:
522 *((uint32_t *)data) = val64;
523 break;
524 default:
525 return OPAL_PARAMETER;
526 }
527 } else {
528 out_be64(p->regs + PHB_CONFIG_ADDRESS, addr);
529 switch(size) {
530 case 1:
531 *((uint8_t *)data) =
532 in_8(p->regs + PHB_CONFIG_DATA + (offset & 3));
533 PHBLOGCFG(p, "%03x CFG08 Rd %02x=%02x\n",
534 bdfn, offset, *((uint8_t *)data));
535 break;
536 case 2:
537 *((uint16_t *)data) =
538 in_le16(p->regs + PHB_CONFIG_DATA + (offset & 2));
539 PHBLOGCFG(p, "%03x CFG16 Rd %02x=%04x\n",
540 bdfn, offset, *((uint16_t *)data));
541 break;
542 case 4:
543 *((uint32_t *)data) = in_le32(p->regs + PHB_CONFIG_DATA);
544 PHBLOGCFG(p, "%03x CFG32 Rd %02x=%08x\n",
545 bdfn, offset, *((uint32_t *)data));
546 break;
547 default:
548 return OPAL_PARAMETER;
549 }
550 }
551 return OPAL_SUCCESS;
552 }
553
554
555 #define PHB4_PCI_CFG_READ(size, type) \
556 static int64_t phb4_pcicfg_read##size(struct phb *phb, uint32_t bdfn, \
557 uint32_t offset, type *data) \
558 { \
559 struct phb4 *p = phb_to_phb4(phb); \
560 \
561 /* Initialize data in case of error */ \
562 *data = (type)0xffffffff; \
563 return phb4_pcicfg_read(p, bdfn, offset, sizeof(type), data); \
564 }
565
phb4_pcicfg_write(struct phb4 * p,uint32_t bdfn,uint32_t offset,uint32_t size,uint32_t data)566 static int64_t phb4_pcicfg_write(struct phb4 *p, uint32_t bdfn,
567 uint32_t offset, uint32_t size,
568 uint32_t data)
569 {
570 uint64_t addr;
571 int64_t rc;
572 uint16_t pe;
573 bool use_asb = false;
574
575 rc = phb4_pcicfg_check(p, bdfn, offset, size, &pe);
576 if (rc)
577 return rc;
578
579 if (p->flags & PHB4_AIB_FENCED) {
580 if (!(p->flags & PHB4_CFG_USE_ASB))
581 return OPAL_HARDWARE;
582 if (bdfn != 0)
583 return OPAL_HARDWARE;
584 use_asb = true;
585 } else if ((p->flags & PHB4_CFG_BLOCKED) && bdfn != 0) {
586 return OPAL_HARDWARE;
587 }
588
589 /* Handle per-device filters */
590 rc = pci_handle_cfg_filters(&p->phb, bdfn, offset, size,
591 (uint32_t *)&data, true);
592 if (rc != OPAL_PARTIAL)
593 return rc;
594
595 /* Handle root complex MMIO based config space */
596 if (bdfn == 0)
597 return phb4_rc_write(p, offset, size, data, use_asb);
598
599 addr = PHB_CA_ENABLE;
600 addr = SETFIELD(PHB_CA_BDFN, addr, bdfn);
601 addr = SETFIELD(PHB_CA_REG, addr, offset & ~3u);
602 addr = SETFIELD(PHB_CA_PE, addr, pe);
603 if (use_asb) {
604 /* We don't support ASB config space writes */
605 return OPAL_UNSUPPORTED;
606 } else {
607 out_be64(p->regs + PHB_CONFIG_ADDRESS, addr);
608 switch(size) {
609 case 1:
610 out_8(p->regs + PHB_CONFIG_DATA + (offset & 3), data);
611 break;
612 case 2:
613 out_le16(p->regs + PHB_CONFIG_DATA + (offset & 2), data);
614 break;
615 case 4:
616 out_le32(p->regs + PHB_CONFIG_DATA, data);
617 break;
618 default:
619 return OPAL_PARAMETER;
620 }
621 }
622 PHBLOGCFG(p, "%03x CFG%d Wr %02x=%08x\n", bdfn, 8 * size, offset, data);
623 return OPAL_SUCCESS;
624 }
625
626 #define PHB4_PCI_CFG_WRITE(size, type) \
627 static int64_t phb4_pcicfg_write##size(struct phb *phb, uint32_t bdfn, \
628 uint32_t offset, type data) \
629 { \
630 struct phb4 *p = phb_to_phb4(phb); \
631 \
632 return phb4_pcicfg_write(p, bdfn, offset, sizeof(type), data); \
633 }
634
635 PHB4_PCI_CFG_READ(8, u8)
636 PHB4_PCI_CFG_READ(16, u16)
637 PHB4_PCI_CFG_READ(32, u32)
638 PHB4_PCI_CFG_WRITE(8, u8)
639 PHB4_PCI_CFG_WRITE(16, u16)
640 PHB4_PCI_CFG_WRITE(32, u32)
641
phb4_get_reserved_pe_number(struct phb * phb)642 static int64_t phb4_get_reserved_pe_number(struct phb *phb)
643 {
644 struct phb4 *p = phb_to_phb4(phb);
645
646 return PHB4_RESERVED_PE_NUM(p);
647 }
648
649
phb4_root_port_init(struct phb * phb,struct pci_device * dev,int ecap,int aercap)650 static void phb4_root_port_init(struct phb *phb, struct pci_device *dev,
651 int ecap, int aercap)
652 {
653 struct phb4 *p = phb_to_phb4(phb);
654 struct pci_slot *slot = dev->slot;
655 uint16_t bdfn = dev->bdfn;
656 uint16_t val16;
657 uint32_t val32;
658
659 /*
660 * Use the PHB's callback so that UTL events will be masked or
661 * unmasked when the link is down or up.
662 */
663 if (dev->slot && dev->slot->ops.prepare_link_change &&
664 phb->slot && phb->slot->ops.prepare_link_change)
665 dev->slot->ops.prepare_link_change =
666 phb->slot->ops.prepare_link_change;
667
668 // FIXME: check recommended init values for phb4
669
670 /*
671 * Enable the bridge slot capability in the root port's config
672 * space. This should probably be done *before* we start
673 * scanning config space, but we need a pci_device struct to
674 * exist before we do a slot lookup so *faaaaaaaaaaaaaart*
675 */
676 if (slot && slot->pluggable && slot->power_limit) {
677 uint64_t val;
678
679 val = in_be64(p->regs + PHB_PCIE_SCR);
680 val |= PHB_PCIE_SCR_SLOT_CAP;
681 out_be64(p->regs + PHB_PCIE_SCR, val);
682
683 /* update the cached slotcap */
684 pci_cfg_read32(phb, bdfn, ecap + PCICAP_EXP_SLOTCAP,
685 &slot->slot_cap);
686 }
687
688 /* Enable SERR and parity checking */
689 pci_cfg_read16(phb, bdfn, PCI_CFG_CMD, &val16);
690 val16 |= (PCI_CFG_CMD_SERR_EN | PCI_CFG_CMD_PERR_RESP |
691 PCI_CFG_CMD_MEM_EN);
692 pci_cfg_write16(phb, bdfn, PCI_CFG_CMD, val16);
693
694 /* Enable reporting various errors */
695 if (!ecap) return;
696 pci_cfg_read16(phb, bdfn, ecap + PCICAP_EXP_DEVCTL, &val16);
697 val16 |= (PCICAP_EXP_DEVCTL_CE_REPORT |
698 PCICAP_EXP_DEVCTL_NFE_REPORT |
699 PCICAP_EXP_DEVCTL_FE_REPORT |
700 PCICAP_EXP_DEVCTL_UR_REPORT);
701 pci_cfg_write16(phb, bdfn, ecap + PCICAP_EXP_DEVCTL, val16);
702
703 if (!aercap) return;
704
705 /* Mask various unrecoverable errors */
706 pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_UE_MASK, &val32);
707 val32 |= (PCIECAP_AER_UE_MASK_POISON_TLP |
708 PCIECAP_AER_UE_MASK_COMPL_TIMEOUT |
709 PCIECAP_AER_UE_MASK_COMPL_ABORT |
710 PCIECAP_AER_UE_MASK_ECRC);
711 pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_UE_MASK, val32);
712
713 /* Report various unrecoverable errors as fatal errors */
714 pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_UE_SEVERITY, &val32);
715 val32 |= (PCIECAP_AER_UE_SEVERITY_DLLP |
716 PCIECAP_AER_UE_SEVERITY_SURPRISE_DOWN |
717 PCIECAP_AER_UE_SEVERITY_FLOW_CTL_PROT |
718 PCIECAP_AER_UE_SEVERITY_UNEXP_COMPL |
719 PCIECAP_AER_UE_SEVERITY_RECV_OVFLOW |
720 PCIECAP_AER_UE_SEVERITY_MALFORMED_TLP);
721 pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_UE_SEVERITY, val32);
722
723 /* Mask various recoverable errors */
724 pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_CE_MASK, &val32);
725 val32 |= PCIECAP_AER_CE_MASK_ADV_NONFATAL;
726 pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_CE_MASK, val32);
727
728 /* Enable ECRC check */
729 pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, &val32);
730 val32 |= (PCIECAP_AER_CAPCTL_ECRCG_EN |
731 PCIECAP_AER_CAPCTL_ECRCC_EN);
732 pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, val32);
733
734 /* Enable all error reporting */
735 pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_RERR_CMD, &val32);
736 val32 |= (PCIECAP_AER_RERR_CMD_FE |
737 PCIECAP_AER_RERR_CMD_NFE |
738 PCIECAP_AER_RERR_CMD_CE);
739 pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_RERR_CMD, val32);
740 }
741
phb4_switch_port_init(struct phb * phb,struct pci_device * dev,int ecap,int aercap)742 static void phb4_switch_port_init(struct phb *phb,
743 struct pci_device *dev,
744 int ecap, int aercap)
745 {
746 uint16_t bdfn = dev->bdfn;
747 uint16_t val16;
748 uint32_t val32;
749
750 // FIXME: update AER settings for phb4
751
752 /* Enable SERR and parity checking and disable INTx */
753 pci_cfg_read16(phb, bdfn, PCI_CFG_CMD, &val16);
754 val16 |= (PCI_CFG_CMD_PERR_RESP |
755 PCI_CFG_CMD_SERR_EN |
756 PCI_CFG_CMD_INTx_DIS);
757 pci_cfg_write16(phb, bdfn, PCI_CFG_CMD, val16);
758
759 /* Disable partity error and enable system error */
760 pci_cfg_read16(phb, bdfn, PCI_CFG_BRCTL, &val16);
761 val16 &= ~PCI_CFG_BRCTL_PERR_RESP_EN;
762 val16 |= PCI_CFG_BRCTL_SERR_EN;
763 pci_cfg_write16(phb, bdfn, PCI_CFG_BRCTL, val16);
764
765 /* Enable reporting various errors */
766 if (!ecap) return;
767 pci_cfg_read16(phb, bdfn, ecap + PCICAP_EXP_DEVCTL, &val16);
768 val16 |= (PCICAP_EXP_DEVCTL_CE_REPORT |
769 PCICAP_EXP_DEVCTL_NFE_REPORT |
770 PCICAP_EXP_DEVCTL_FE_REPORT);
771 /* HW279570 - Disable reporting of correctable errors */
772 val16 &= ~PCICAP_EXP_DEVCTL_CE_REPORT;
773 pci_cfg_write16(phb, bdfn, ecap + PCICAP_EXP_DEVCTL, val16);
774
775 /* Unmask all unrecoverable errors */
776 if (!aercap) return;
777 pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_UE_MASK, 0x0);
778
779 /* Severity of unrecoverable errors */
780 if (dev->dev_type == PCIE_TYPE_SWITCH_UPPORT)
781 val32 = (PCIECAP_AER_UE_SEVERITY_DLLP |
782 PCIECAP_AER_UE_SEVERITY_SURPRISE_DOWN |
783 PCIECAP_AER_UE_SEVERITY_FLOW_CTL_PROT |
784 PCIECAP_AER_UE_SEVERITY_RECV_OVFLOW |
785 PCIECAP_AER_UE_SEVERITY_MALFORMED_TLP |
786 PCIECAP_AER_UE_SEVERITY_INTERNAL);
787 else
788 val32 = (PCIECAP_AER_UE_SEVERITY_FLOW_CTL_PROT |
789 PCIECAP_AER_UE_SEVERITY_INTERNAL);
790 pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_UE_SEVERITY, val32);
791
792 /*
793 * Mask various correctable errors
794 */
795 val32 = PCIECAP_AER_CE_MASK_ADV_NONFATAL;
796 pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_CE_MASK, val32);
797
798 /* Enable ECRC generation and disable ECRC check */
799 pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, &val32);
800 val32 |= PCIECAP_AER_CAPCTL_ECRCG_EN;
801 val32 &= ~PCIECAP_AER_CAPCTL_ECRCC_EN;
802 pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, val32);
803 }
804
phb4_endpoint_init(struct phb * phb,struct pci_device * dev,int ecap,int aercap)805 static void phb4_endpoint_init(struct phb *phb,
806 struct pci_device *dev,
807 int ecap, int aercap)
808 {
809 uint16_t bdfn = dev->bdfn;
810 uint16_t val16;
811 uint32_t val32;
812
813 /* Enable SERR and parity checking */
814 pci_cfg_read16(phb, bdfn, PCI_CFG_CMD, &val16);
815 val16 |= (PCI_CFG_CMD_PERR_RESP |
816 PCI_CFG_CMD_SERR_EN);
817 pci_cfg_write16(phb, bdfn, PCI_CFG_CMD, val16);
818
819 /* Enable reporting various errors */
820 if (!ecap) return;
821 pci_cfg_read16(phb, bdfn, ecap + PCICAP_EXP_DEVCTL, &val16);
822 val16 &= ~PCICAP_EXP_DEVCTL_CE_REPORT;
823 val16 |= (PCICAP_EXP_DEVCTL_NFE_REPORT |
824 PCICAP_EXP_DEVCTL_FE_REPORT |
825 PCICAP_EXP_DEVCTL_UR_REPORT);
826 pci_cfg_write16(phb, bdfn, ecap + PCICAP_EXP_DEVCTL, val16);
827
828 /* Enable ECRC generation and check */
829 if (!aercap)
830 return;
831
832 pci_cfg_read32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, &val32);
833 val32 |= (PCIECAP_AER_CAPCTL_ECRCG_EN |
834 PCIECAP_AER_CAPCTL_ECRCC_EN);
835 pci_cfg_write32(phb, bdfn, aercap + PCIECAP_AER_CAPCTL, val32);
836 }
837
phb4_pcicfg_no_dstate(void * dev __unused,struct pci_cfg_reg_filter * pcrf,uint32_t offset,uint32_t len __unused,uint32_t * data __unused,bool write)838 static int64_t phb4_pcicfg_no_dstate(void *dev __unused,
839 struct pci_cfg_reg_filter *pcrf,
840 uint32_t offset, uint32_t len __unused,
841 uint32_t *data __unused, bool write)
842 {
843 uint32_t loff = offset - pcrf->start;
844
845 /* Disable D-state change on children of the PHB. For now we
846 * simply block all writes to the PM control/status
847 */
848 if (write && loff >= 4 && loff < 6)
849 return OPAL_SUCCESS;
850
851 return OPAL_PARTIAL;
852 }
853
phb4_pec2_dma_engine_realloc(struct phb4 * p)854 void phb4_pec2_dma_engine_realloc(struct phb4 *p)
855 {
856 uint64_t reg;
857
858 /*
859 * Allocate 16 extra dma read engines to stack 0, to boost dma
860 * performance for devices on stack 0 of PEC2, i.e PHB3.
861 * It comes at a price of reduced read engine allocation for
862 * devices on stack 1 and 2. The engine allocation becomes
863 * 48/8/8 instead of the default 32/16/16.
864 *
865 * The reallocation magic value should be 0xffff0000ff008000,
866 * but per the PCI designers, dma engine 32 (bit 0) has a
867 * quirk, and 0x7fff80007F008000 has the same effect (engine
868 * 32 goes to PHB4).
869 */
870 if (p->index != 3) /* shared slot on PEC2 */
871 return;
872
873 PHBINF(p, "Allocating an extra 16 dma read engines on PEC2 stack0\n");
874 reg = 0x7fff80007F008000ULL;
875 xscom_write(p->chip_id,
876 p->pci_xscom + XPEC_PCI_PRDSTKOVR, reg);
877 xscom_write(p->chip_id,
878 p->pe_xscom + XPEC_NEST_READ_STACK_OVERRIDE, reg);
879 }
880
phb4_check_device_quirks(struct pci_device * dev)881 static void phb4_check_device_quirks(struct pci_device *dev)
882 {
883 /* Some special adapter tweaks for devices directly under the PHB */
884 if (dev->primary_bus != 1)
885 return;
886
887 /* PM quirk */
888 if (!pci_has_cap(dev, PCI_CFG_CAP_ID_PM, false))
889 return;
890
891 pci_add_cfg_reg_filter(dev,
892 pci_cap(dev, PCI_CFG_CAP_ID_PM, false), 8,
893 PCI_REG_FLAG_WRITE,
894 phb4_pcicfg_no_dstate);
895 }
896
phb4_device_init(struct phb * phb,struct pci_device * dev,void * data __unused)897 static int phb4_device_init(struct phb *phb, struct pci_device *dev,
898 void *data __unused)
899 {
900 int ecap, aercap;
901
902 /* Setup special device quirks */
903 phb4_check_device_quirks(dev);
904
905 /* Common initialization for the device */
906 pci_device_init(phb, dev);
907
908 ecap = pci_cap(dev, PCI_CFG_CAP_ID_EXP, false);
909 aercap = pci_cap(dev, PCIECAP_ID_AER, true);
910 if (dev->dev_type == PCIE_TYPE_ROOT_PORT)
911 phb4_root_port_init(phb, dev, ecap, aercap);
912 else if (dev->dev_type == PCIE_TYPE_SWITCH_UPPORT ||
913 dev->dev_type == PCIE_TYPE_SWITCH_DNPORT)
914 phb4_switch_port_init(phb, dev, ecap, aercap);
915 else
916 phb4_endpoint_init(phb, dev, ecap, aercap);
917
918 return 0;
919 }
920
phb4_pci_reinit(struct phb * phb,uint64_t scope,uint64_t data)921 static int64_t phb4_pci_reinit(struct phb *phb, uint64_t scope, uint64_t data)
922 {
923 struct pci_device *pd;
924 uint16_t bdfn = data;
925 int ret;
926
927 if (scope != OPAL_REINIT_PCI_DEV)
928 return OPAL_PARAMETER;
929
930 pd = pci_find_dev(phb, bdfn);
931 if (!pd)
932 return OPAL_PARAMETER;
933
934 ret = phb4_device_init(phb, pd, NULL);
935 if (ret)
936 return OPAL_HARDWARE;
937
938 return OPAL_SUCCESS;
939 }
940
941 /* Default value for MBT0, see comments in init_ioda_cache() */
phb4_default_mbt0(struct phb4 * p,unsigned int bar_idx)942 static uint64_t phb4_default_mbt0(struct phb4 *p, unsigned int bar_idx)
943 {
944 uint64_t mbt0;
945
946 switch (p->mbt_size - bar_idx - 1) {
947 case 0:
948 mbt0 = SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_MDT);
949 mbt0 = SETFIELD(IODA3_MBT0_MDT_COLUMN, mbt0, 3);
950 break;
951 case 1:
952 mbt0 = SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_MDT);
953 mbt0 = SETFIELD(IODA3_MBT0_MDT_COLUMN, mbt0, 2);
954 break;
955 case 2:
956 mbt0 = SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_MDT);
957 mbt0 = SETFIELD(IODA3_MBT0_MDT_COLUMN, mbt0, 1);
958 break;
959 default:
960 mbt0 = SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_PE_SEG);
961 }
962 return mbt0;
963 }
964
965 /*
966 * Clear the saved (cached) IODA state.
967 *
968 * The caches here are used to save the configuration of the IODA tables
969 * done by the OS. When the PHB is reset it loses all of its internal state
970 * so we need to keep a copy to restore from. This function re-initialises
971 * the saved state to sane defaults.
972 */
phb4_init_ioda_cache(struct phb4 * p)973 static void phb4_init_ioda_cache(struct phb4 *p)
974 {
975 uint32_t i;
976
977 /*
978 * The RTT entries (RTE) are supposed to be initialised to
979 * 0xFF which indicates an invalid PE# for that RTT index
980 * (the bdfn). However, we set them to 0x00 since Linux
981 * needs to find the devices first by scanning config space
982 * and this occurs before PEs have been assigned.
983 */
984 for (i = 0; i < RTT_TABLE_ENTRIES; i++)
985 p->tbl_rtt[i] = cpu_to_be16(PHB4_RESERVED_PE_NUM(p));
986 memset(p->tbl_peltv, 0x0, p->tbl_peltv_size);
987 memset(p->tve_cache, 0x0, sizeof(p->tve_cache));
988
989 /* XXX Should we mask them ? */
990 memset(p->mist_cache, 0x0, sizeof(p->mist_cache));
991
992 /* Configure MBT entries 1...N */
993
994 /* Column 0 is left 0 and will be used fo M32 and configured
995 * by the OS. We use MDT column 1..3 for the last 3 BARs, thus
996 * allowing Linux to remap those, and setup all the other ones
997 * for now in mode 00 (segment# == PE#). By default those
998 * columns are set to map the same way.
999 */
1000 for (i = 0; i < p->max_num_pes; i++) {
1001 p->mdt_cache[i] = SETFIELD(IODA3_MDT_PE_B, 0ull, i);
1002 p->mdt_cache[i] |= SETFIELD(IODA3_MDT_PE_C, 0ull, i);
1003 p->mdt_cache[i] |= SETFIELD(IODA3_MDT_PE_D, 0ull, i);
1004 }
1005
1006 /* Initialize MBT entries for BARs 1...N */
1007 for (i = 1; i < p->mbt_size; i++) {
1008 p->mbt_cache[i][0] = phb4_default_mbt0(p, i);
1009 p->mbt_cache[i][1] = 0;
1010 }
1011
1012 /* Initialize M32 bar using MBT entry 0, MDT colunm A */
1013 p->mbt_cache[0][0] = SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_MDT);
1014 p->mbt_cache[0][0] |= SETFIELD(IODA3_MBT0_MDT_COLUMN, 0ull, 0);
1015 p->mbt_cache[0][0] |= IODA3_MBT0_TYPE_M32 | (p->mm1_base & IODA3_MBT0_BASE_ADDR);
1016 p->mbt_cache[0][1] = IODA3_MBT1_ENABLE | ((~(M32_PCI_SIZE - 1)) & IODA3_MBT1_MASK);
1017 }
1018
phb4_wait_bit(struct phb4 * p,uint32_t reg,uint64_t mask,uint64_t want_val)1019 static int64_t phb4_wait_bit(struct phb4 *p, uint32_t reg,
1020 uint64_t mask, uint64_t want_val)
1021 {
1022 uint64_t val;
1023
1024 /* Wait for all pending TCE kills to complete
1025 *
1026 * XXX Add timeout...
1027 */
1028 /* XXX SIMICS is nasty... */
1029 if ((reg == PHB_TCE_KILL || reg == PHB_DMA_READ_WRITE_SYNC) &&
1030 chip_quirk(QUIRK_SIMICS))
1031 return OPAL_SUCCESS;
1032
1033 for (;;) {
1034 val = in_be64(p->regs + reg);
1035 if (val == 0xffffffffffffffffull) {
1036 /* XXX Fenced ? */
1037 return OPAL_HARDWARE;
1038 }
1039 if ((val & mask) == want_val)
1040 break;
1041
1042 }
1043 return OPAL_SUCCESS;
1044 }
1045
phb4_tce_kill(struct phb * phb,uint32_t kill_type,uint64_t pe_number,uint32_t tce_size,uint64_t dma_addr,uint32_t npages)1046 static int64_t phb4_tce_kill(struct phb *phb, uint32_t kill_type,
1047 uint64_t pe_number, uint32_t tce_size,
1048 uint64_t dma_addr, uint32_t npages)
1049 {
1050 struct phb4 *p = phb_to_phb4(phb);
1051 uint64_t val;
1052 int64_t rc;
1053
1054 sync();
1055 switch(kill_type) {
1056 case OPAL_PCI_TCE_KILL_PAGES:
1057 while (npages--) {
1058 /* Wait for a slot in the HW kill queue */
1059 rc = phb4_wait_bit(p, PHB_TCE_KILL,
1060 PHB_TCE_KILL_ALL |
1061 PHB_TCE_KILL_PE |
1062 PHB_TCE_KILL_ONE, 0);
1063 if (rc)
1064 return rc;
1065 val = SETFIELD(PHB_TCE_KILL_PENUM, dma_addr, pe_number);
1066
1067 /* Set appropriate page size */
1068 switch(tce_size) {
1069 case 0x1000:
1070 if (dma_addr & 0xf000000000000fffull)
1071 return OPAL_PARAMETER;
1072 break;
1073 case 0x10000:
1074 if (dma_addr & 0xf00000000000ffffull)
1075 return OPAL_PARAMETER;
1076 val |= PHB_TCE_KILL_PSEL | PHB_TCE_KILL_64K;
1077 break;
1078 case 0x200000:
1079 if (dma_addr & 0xf0000000001fffffull)
1080 return OPAL_PARAMETER;
1081 val |= PHB_TCE_KILL_PSEL | PHB_TCE_KILL_2M;
1082 break;
1083 case 0x40000000:
1084 if (dma_addr & 0xf00000003fffffffull)
1085 return OPAL_PARAMETER;
1086 val |= PHB_TCE_KILL_PSEL | PHB_TCE_KILL_1G;
1087 break;
1088 default:
1089 return OPAL_PARAMETER;
1090 }
1091 /* Perform kill */
1092 out_be64(p->regs + PHB_TCE_KILL, PHB_TCE_KILL_ONE | val);
1093 /* Next page */
1094 dma_addr += tce_size;
1095 }
1096 break;
1097 case OPAL_PCI_TCE_KILL_PE:
1098 /* Wait for a slot in the HW kill queue */
1099 rc = phb4_wait_bit(p, PHB_TCE_KILL,
1100 PHB_TCE_KILL_ALL |
1101 PHB_TCE_KILL_PE |
1102 PHB_TCE_KILL_ONE, 0);
1103 if (rc)
1104 return rc;
1105 /* Perform kill */
1106 out_be64(p->regs + PHB_TCE_KILL, PHB_TCE_KILL_PE |
1107 SETFIELD(PHB_TCE_KILL_PENUM, 0ull, pe_number));
1108 break;
1109 case OPAL_PCI_TCE_KILL_ALL:
1110 /* Wait for a slot in the HW kill queue */
1111 rc = phb4_wait_bit(p, PHB_TCE_KILL,
1112 PHB_TCE_KILL_ALL |
1113 PHB_TCE_KILL_PE |
1114 PHB_TCE_KILL_ONE, 0);
1115 if (rc)
1116 return rc;
1117 /* Perform kill */
1118 out_be64(p->regs + PHB_TCE_KILL, PHB_TCE_KILL_ALL);
1119 break;
1120 default:
1121 return OPAL_PARAMETER;
1122 }
1123
1124 /* Start DMA sync process */
1125 if (is_phb5()){
1126 val = in_be64(p->regs + PHB_DMA_READ_WRITE_SYNC) &
1127 (PHB_DMA_READ_SYNC_COMPLETE |
1128 PHB_DMA_WRITE_SYNC_COMPLETE);
1129 out_be64(p->regs + PHB_DMA_READ_WRITE_SYNC,
1130 val | PHB_DMA_READ_SYNC_START);
1131
1132 } else {
1133 out_be64(p->regs + PHB_DMA_READ_WRITE_SYNC,
1134 PHB_DMA_READ_SYNC_START);
1135 }
1136
1137 /* Wait for kill to complete */
1138 rc = phb4_wait_bit(p, PHB_Q_DMA_R, PHB_Q_DMA_R_TCE_KILL_STATUS, 0);
1139 if (rc)
1140 return rc;
1141
1142 /* Wait for DMA sync to complete */
1143 return phb4_wait_bit(p, PHB_DMA_READ_WRITE_SYNC,
1144 PHB_DMA_READ_SYNC_COMPLETE,
1145 PHB_DMA_READ_SYNC_COMPLETE);
1146 }
1147
1148 /* phb4_ioda_reset - Reset the IODA tables
1149 *
1150 * @purge: If true, the cache is cleared and the cleared values
1151 * are applied to HW. If false, the cached values are
1152 * applied to HW
1153 *
1154 * This reset the IODA tables in the PHB. It is called at
1155 * initialization time, on PHB reset, and can be called
1156 * explicitly from OPAL
1157 */
phb4_ioda_reset(struct phb * phb,bool purge)1158 static int64_t phb4_ioda_reset(struct phb *phb, bool purge)
1159 {
1160 struct phb4 *p = phb_to_phb4(phb);
1161 uint32_t i;
1162 uint64_t val;
1163
1164 if (purge) {
1165 PHBDBG(p, "Purging all IODA tables...\n");
1166 if (phb->slot)
1167 phb->slot->link_retries = PHB4_LINK_LINK_RETRIES;
1168 phb4_init_ioda_cache(p);
1169 }
1170
1171 /* Init_30..31 - Errata workaround, clear PESTA entry 0 */
1172 phb4_ioda_sel(p, IODA3_TBL_PESTA, 0, false);
1173 out_be64(p->regs + PHB_IODA_DATA0, 0);
1174
1175 /* Init_32..33 - MIST */
1176 phb4_ioda_sel(p, IODA3_TBL_MIST, 0, true);
1177 val = in_be64(p->regs + PHB_IODA_ADDR);
1178 val = SETFIELD(PHB_IODA_AD_MIST_PWV, val, 0xf);
1179 out_be64(p->regs + PHB_IODA_ADDR, val);
1180 for (i = 0; i < (p->num_irqs/4); i++)
1181 out_be64(p->regs + PHB_IODA_DATA0, p->mist_cache[i]);
1182
1183 /* Init_34..35 - MRT */
1184 phb4_ioda_sel(p, IODA3_TBL_MRT, 0, true);
1185 for (i = 0; i < p->mrt_size; i++)
1186 out_be64(p->regs + PHB_IODA_DATA0, 0);
1187
1188 /* Init_36..37 - TVT */
1189 phb4_ioda_sel(p, IODA3_TBL_TVT, 0, true);
1190 for (i = 0; i < p->tvt_size; i++)
1191 out_be64(p->regs + PHB_IODA_DATA0, p->tve_cache[i]);
1192
1193 /* Init_38..39 - MBT */
1194 phb4_ioda_sel(p, IODA3_TBL_MBT, 0, true);
1195 for (i = 0; i < p->mbt_size; i++) {
1196 out_be64(p->regs + PHB_IODA_DATA0, p->mbt_cache[i][0]);
1197 out_be64(p->regs + PHB_IODA_DATA0, p->mbt_cache[i][1]);
1198 }
1199
1200 /* Init_40..41 - MDT */
1201 phb4_ioda_sel(p, IODA3_TBL_MDT, 0, true);
1202 for (i = 0; i < p->max_num_pes; i++)
1203 out_be64(p->regs + PHB_IODA_DATA0, p->mdt_cache[i]);
1204
1205 /* Additional OPAL specific inits */
1206
1207 /* Clear PEST & PEEV */
1208 for (i = 0; i < p->max_num_pes; i++) {
1209 phb4_ioda_sel(p, IODA3_TBL_PESTA, i, false);
1210 out_be64(p->regs + PHB_IODA_DATA0, 0);
1211 phb4_ioda_sel(p, IODA3_TBL_PESTB, i, false);
1212 out_be64(p->regs + PHB_IODA_DATA0, 0);
1213 }
1214
1215 phb4_ioda_sel(p, IODA3_TBL_PEEV, 0, true);
1216 for (i = 0; i < p->max_num_pes/64; i++)
1217 out_be64(p->regs + PHB_IODA_DATA0, 0);
1218
1219 /* Invalidate RTE, TCE cache */
1220 out_be64(p->regs + PHB_RTC_INVALIDATE, PHB_RTC_INVALIDATE_ALL);
1221
1222 return phb4_tce_kill(&p->phb, OPAL_PCI_TCE_KILL_ALL, 0, 0, 0, 0);
1223 }
1224
1225 /*
1226 * Clear anything we have in PAPR Error Injection registers. Though
1227 * the spec says the PAPR error injection should be one-shot without
1228 * the "sticky" bit. However, that's false according to the experiments
1229 * I had. So we have to clear it at appropriate point in kernel to
1230 * avoid endless frozen PE.
1231 */
phb4_papr_errinjct_reset(struct phb * phb)1232 static int64_t phb4_papr_errinjct_reset(struct phb *phb)
1233 {
1234 struct phb4 *p = phb_to_phb4(phb);
1235
1236 out_be64(p->regs + PHB_PAPR_ERR_INJ_CTL, 0x0ul);
1237 out_be64(p->regs + PHB_PAPR_ERR_INJ_ADDR, 0x0ul);
1238 out_be64(p->regs + PHB_PAPR_ERR_INJ_MASK, 0x0ul);
1239
1240 return OPAL_SUCCESS;
1241 }
1242
phb4_set_phb_mem_window(struct phb * phb,uint16_t window_type,uint16_t window_num,uint64_t addr,uint64_t pci_addr __unused,uint64_t size)1243 static int64_t phb4_set_phb_mem_window(struct phb *phb,
1244 uint16_t window_type,
1245 uint16_t window_num,
1246 uint64_t addr,
1247 uint64_t pci_addr __unused,
1248 uint64_t size)
1249 {
1250 struct phb4 *p = phb_to_phb4(phb);
1251 uint64_t mbt0, mbt1;
1252
1253 /*
1254 * We have a unified MBT for all BARs on PHB4.
1255 *
1256 * So we use it as follow:
1257 *
1258 * - M32 is hard wired to be MBT[0] and uses MDT column 0
1259 * for remapping.
1260 *
1261 * - MBT[1..n] are available to the OS, currently only as
1262 * fully segmented or single PE (we don't yet expose the
1263 * new segmentation modes).
1264 *
1265 * - We configure the 3 last BARs to columnt 1..3 initially
1266 * set to segment# == PE#. We will need to provide some
1267 * extensions to the existing APIs to enable remapping of
1268 * segments on those BARs (and only those) as the current
1269 * API forces single segment mode.
1270 */
1271 switch (window_type) {
1272 case OPAL_IO_WINDOW_TYPE:
1273 case OPAL_M32_WINDOW_TYPE:
1274 return OPAL_UNSUPPORTED;
1275 case OPAL_M64_WINDOW_TYPE:
1276 if (window_num == 0 || window_num >= p->mbt_size) {
1277 PHBERR(p, "%s: Invalid window %d\n",
1278 __func__, window_num);
1279 return OPAL_PARAMETER;
1280 }
1281
1282 mbt0 = p->mbt_cache[window_num][0];
1283 mbt1 = p->mbt_cache[window_num][1];
1284
1285 /* XXX For now we assume the 4K minimum alignment,
1286 * todo: check with the HW folks what the exact limits
1287 * are based on the segmentation model.
1288 */
1289 if ((addr & 0xFFFul) || (size & 0xFFFul)) {
1290 PHBERR(p, "%s: Bad addr/size alignment %llx/%llx\n",
1291 __func__, addr, size);
1292 return OPAL_PARAMETER;
1293 }
1294
1295 /* size should be 2^N */
1296 if (!size || size & (size-1)) {
1297 PHBERR(p, "%s: size not a power of 2: %llx\n",
1298 __func__, size);
1299 return OPAL_PARAMETER;
1300 }
1301
1302 /* address should be size aligned */
1303 if (addr & (size - 1)) {
1304 PHBERR(p, "%s: addr not size aligned %llx/%llx\n",
1305 __func__, addr, size);
1306 return OPAL_PARAMETER;
1307 }
1308
1309 break;
1310 default:
1311 return OPAL_PARAMETER;
1312 }
1313
1314 /* The BAR shouldn't be enabled yet */
1315 if (mbt0 & IODA3_MBT0_ENABLE)
1316 return OPAL_PARTIAL;
1317
1318 /* Apply the settings */
1319 mbt0 = SETFIELD(IODA3_MBT0_BASE_ADDR, mbt0, addr >> 12);
1320 mbt1 = SETFIELD(IODA3_MBT1_MASK, mbt1, ~((size >> 12) -1));
1321 p->mbt_cache[window_num][0] = mbt0;
1322 p->mbt_cache[window_num][1] = mbt1;
1323
1324 return OPAL_SUCCESS;
1325 }
1326
1327 /*
1328 * For one specific M64 BAR, it can be shared by all PEs,
1329 * or owned by single PE exclusively.
1330 */
phb4_phb_mmio_enable(struct phb __unused * phb,uint16_t window_type,uint16_t window_num,uint16_t enable)1331 static int64_t phb4_phb_mmio_enable(struct phb __unused *phb,
1332 uint16_t window_type,
1333 uint16_t window_num,
1334 uint16_t enable)
1335 {
1336 struct phb4 *p = phb_to_phb4(phb);
1337 uint64_t mbt0, mbt1, base, mask;
1338
1339 /*
1340 * By design, PHB4 doesn't support IODT any more.
1341 * Besides, we can't enable M32 BAR as well. So
1342 * the function is used to do M64 mapping and each
1343 * BAR is supposed to be shared by all PEs.
1344 *
1345 * TODO: Add support for some of the new PHB4 split modes
1346 */
1347 switch (window_type) {
1348 case OPAL_IO_WINDOW_TYPE:
1349 case OPAL_M32_WINDOW_TYPE:
1350 return OPAL_UNSUPPORTED;
1351 case OPAL_M64_WINDOW_TYPE:
1352 /* Window 0 is reserved for M32 */
1353 if (window_num == 0 || window_num >= p->mbt_size ||
1354 enable > OPAL_ENABLE_M64_NON_SPLIT) {
1355 PHBDBG(p,
1356 "phb4_phb_mmio_enable wrong args (window %d enable %d)\n",
1357 window_num, enable);
1358 return OPAL_PARAMETER;
1359 }
1360 break;
1361 default:
1362 return OPAL_PARAMETER;
1363 }
1364
1365 /*
1366 * We need check the base/mask while enabling
1367 * the M64 BAR. Otherwise, invalid base/mask
1368 * might cause fenced AIB unintentionally
1369 */
1370 mbt0 = p->mbt_cache[window_num][0];
1371 mbt1 = p->mbt_cache[window_num][1];
1372
1373 if (enable == OPAL_DISABLE_M64) {
1374 /* Reset the window to disabled & default mode */
1375 mbt0 = phb4_default_mbt0(p, window_num);
1376 mbt1 = 0;
1377 } else {
1378 /* Verify that the mode is valid and consistent */
1379 if (enable == OPAL_ENABLE_M64_SPLIT) {
1380 uint64_t mode = GETFIELD(IODA3_MBT0_MODE, mbt0);
1381 if (mode != IODA3_MBT0_MODE_PE_SEG &&
1382 mode != IODA3_MBT0_MODE_MDT)
1383 return OPAL_PARAMETER;
1384 } else if (enable == OPAL_ENABLE_M64_NON_SPLIT) {
1385 if (GETFIELD(IODA3_MBT0_MODE, mbt0) !=
1386 IODA3_MBT0_MODE_SINGLE_PE)
1387 return OPAL_PARAMETER;
1388 } else
1389 return OPAL_PARAMETER;
1390
1391 base = GETFIELD(IODA3_MBT0_BASE_ADDR, mbt0);
1392 base = (base << 12);
1393 mask = GETFIELD(IODA3_MBT1_MASK, mbt1);
1394 if (base < p->mm0_base || !mask)
1395 return OPAL_PARTIAL;
1396
1397 mbt0 |= IODA3_MBT0_ENABLE;
1398 mbt1 |= IODA3_MBT1_ENABLE;
1399 }
1400
1401 /* Update HW and cache */
1402 p->mbt_cache[window_num][0] = mbt0;
1403 p->mbt_cache[window_num][1] = mbt1;
1404 phb4_ioda_sel(p, IODA3_TBL_MBT, window_num << 1, true);
1405 out_be64(p->regs + PHB_IODA_DATA0, mbt0);
1406 out_be64(p->regs + PHB_IODA_DATA0, mbt1);
1407
1408 return OPAL_SUCCESS;
1409 }
1410
phb4_map_pe_mmio_window(struct phb * phb,uint64_t pe_number,uint16_t window_type,uint16_t window_num,uint16_t segment_num)1411 static int64_t phb4_map_pe_mmio_window(struct phb *phb,
1412 uint64_t pe_number,
1413 uint16_t window_type,
1414 uint16_t window_num,
1415 uint16_t segment_num)
1416 {
1417 struct phb4 *p = phb_to_phb4(phb);
1418 uint64_t mbt0, mbt1, mdt0;
1419
1420 if (pe_number >= p->num_pes)
1421 return OPAL_PARAMETER;
1422
1423 /*
1424 * We support a combined MDT that has 4 columns. We let the OS
1425 * use kernel 0 for M32.
1426 *
1427 * We configure the 3 last BARs to map column 3..1 which by default
1428 * are set to map segment# == pe#, but can be remapped here if we
1429 * extend this function.
1430 *
1431 * The problem is that the current API was "hijacked" so that an
1432 * attempt at remapping any segment of an M64 has the effect of
1433 * turning it into a single-PE mode BAR. So if we want to support
1434 * remapping we'll have to play around this for example by creating
1435 * a new API or a new window type...
1436 */
1437 switch(window_type) {
1438 case OPAL_IO_WINDOW_TYPE:
1439 return OPAL_UNSUPPORTED;
1440 case OPAL_M32_WINDOW_TYPE:
1441 if (window_num != 0 || segment_num >= p->num_pes)
1442 return OPAL_PARAMETER;
1443
1444 mdt0 = p->mdt_cache[segment_num];
1445 mdt0 = SETFIELD(IODA3_MDT_PE_A, mdt0, pe_number);
1446 phb4_ioda_sel(p, IODA3_TBL_MDT, segment_num, false);
1447 out_be64(p->regs + PHB_IODA_DATA0, mdt0);
1448 break;
1449 case OPAL_M64_WINDOW_TYPE:
1450 if (window_num == 0 || window_num >= p->mbt_size)
1451 return OPAL_PARAMETER;
1452
1453 mbt0 = p->mbt_cache[window_num][0];
1454 mbt1 = p->mbt_cache[window_num][1];
1455
1456 /* The BAR shouldn't be enabled yet */
1457 if (mbt0 & IODA3_MBT0_ENABLE)
1458 return OPAL_PARTIAL;
1459
1460 /* Set to single PE mode and configure the PE */
1461 mbt0 = SETFIELD(IODA3_MBT0_MODE, mbt0,
1462 IODA3_MBT0_MODE_SINGLE_PE);
1463 mbt1 = SETFIELD(IODA3_MBT1_SINGLE_PE_NUM, mbt1, pe_number);
1464 p->mbt_cache[window_num][0] = mbt0;
1465 p->mbt_cache[window_num][1] = mbt1;
1466 break;
1467 default:
1468 return OPAL_PARAMETER;
1469 }
1470
1471 return OPAL_SUCCESS;
1472 }
1473
phb4_map_pe_dma_window(struct phb * phb,uint64_t pe_number,uint16_t window_id,uint16_t tce_levels,uint64_t tce_table_addr,uint64_t tce_table_size,uint64_t tce_page_size)1474 static int64_t phb4_map_pe_dma_window(struct phb *phb,
1475 uint64_t pe_number,
1476 uint16_t window_id,
1477 uint16_t tce_levels,
1478 uint64_t tce_table_addr,
1479 uint64_t tce_table_size,
1480 uint64_t tce_page_size)
1481 {
1482 struct phb4 *p = phb_to_phb4(phb);
1483 uint64_t tts_encoded;
1484 uint64_t data64 = 0;
1485
1486 /*
1487 * We configure the PHB in 2 TVE per PE mode to match phb3.
1488 * Current Linux implementation *requires* the two windows per
1489 * PE.
1490 *
1491 * Note: On DD2.0 this is the normal mode of operation.
1492 */
1493
1494 /*
1495 * Sanity check. We currently only support "2 window per PE" mode
1496 * ie, only bit 59 of the PCI address is used to select the window
1497 */
1498 if (pe_number >= p->num_pes || (window_id >> 1) != pe_number)
1499 return OPAL_PARAMETER;
1500
1501 /*
1502 * tce_table_size == 0 is used to disable an entry, in this case
1503 * we ignore other arguments
1504 */
1505 if (tce_table_size == 0) {
1506 phb4_ioda_sel(p, IODA3_TBL_TVT, window_id, false);
1507 out_be64(p->regs + PHB_IODA_DATA0, 0);
1508 p->tve_cache[window_id] = 0;
1509 return OPAL_SUCCESS;
1510 }
1511
1512 /* Additional arguments validation */
1513 if (tce_levels < 1 || tce_levels > 5 ||
1514 !is_pow2(tce_table_size) ||
1515 tce_table_size < 0x1000)
1516 return OPAL_PARAMETER;
1517
1518 /* Encode TCE table size */
1519 data64 = SETFIELD(IODA3_TVT_TABLE_ADDR, 0ul, tce_table_addr >> 12);
1520 tts_encoded = ilog2(tce_table_size) - 11;
1521 if (tts_encoded > 31)
1522 return OPAL_PARAMETER;
1523 data64 = SETFIELD(IODA3_TVT_TCE_TABLE_SIZE, data64, tts_encoded);
1524
1525 /* Encode TCE page size */
1526 switch (tce_page_size) {
1527 case 0x1000: /* 4K */
1528 data64 = SETFIELD(IODA3_TVT_IO_PSIZE, data64, 1);
1529 break;
1530 case 0x10000: /* 64K */
1531 data64 = SETFIELD(IODA3_TVT_IO_PSIZE, data64, 5);
1532 break;
1533 case 0x200000: /* 2M */
1534 data64 = SETFIELD(IODA3_TVT_IO_PSIZE, data64, 10);
1535 break;
1536 case 0x40000000: /* 1G */
1537 data64 = SETFIELD(IODA3_TVT_IO_PSIZE, data64, 19);
1538 break;
1539 default:
1540 return OPAL_PARAMETER;
1541 }
1542
1543 /* Encode number of levels */
1544 data64 = SETFIELD(IODA3_TVT_NUM_LEVELS, data64, tce_levels - 1);
1545
1546 phb4_ioda_sel(p, IODA3_TBL_TVT, window_id, false);
1547 out_be64(p->regs + PHB_IODA_DATA0, data64);
1548 p->tve_cache[window_id] = data64;
1549
1550 return OPAL_SUCCESS;
1551 }
1552
phb4_map_pe_dma_window_real(struct phb * phb,uint64_t pe_number,uint16_t window_id,uint64_t pci_start_addr,uint64_t pci_mem_size)1553 static int64_t phb4_map_pe_dma_window_real(struct phb *phb,
1554 uint64_t pe_number,
1555 uint16_t window_id,
1556 uint64_t pci_start_addr,
1557 uint64_t pci_mem_size)
1558 {
1559 struct phb4 *p = phb_to_phb4(phb);
1560 uint64_t end = pci_start_addr + pci_mem_size;
1561 uint64_t tve;
1562
1563 if (pe_number >= p->num_pes ||
1564 (window_id >> 1) != pe_number)
1565 return OPAL_PARAMETER;
1566
1567 if (pci_mem_size) {
1568 /* Enable */
1569
1570 /*
1571 * Check that the start address has the right TVE index,
1572 * we only support the 1 bit mode where each PE has 2
1573 * TVEs
1574 */
1575 if ((pci_start_addr >> 59) != (window_id & 1))
1576 return OPAL_PARAMETER;
1577 pci_start_addr &= ((1ull << 59) - 1);
1578 end = pci_start_addr + pci_mem_size;
1579
1580 /* We have to be 16M aligned */
1581 if ((pci_start_addr & 0x00ffffff) ||
1582 (pci_mem_size & 0x00ffffff))
1583 return OPAL_PARAMETER;
1584
1585 /*
1586 * It *looks* like this is the max we can support (we need
1587 * to verify this. Also we are not checking for rollover,
1588 * but then we aren't trying too hard to protect ourselves
1589 * againt a completely broken OS.
1590 */
1591 if (end > 0x0003ffffffffffffull)
1592 return OPAL_PARAMETER;
1593
1594 /*
1595 * Put start address bits 49:24 into TVE[52:53]||[0:23]
1596 * and end address bits 49:24 into TVE[54:55]||[24:47]
1597 * and set TVE[51]
1598 */
1599 tve = (pci_start_addr << 16) & (0xffffffull << 40);
1600 tve |= (pci_start_addr >> 38) & (3ull << 10);
1601 tve |= (end >> 8) & (0xfffffful << 16);
1602 tve |= (end >> 40) & (3ull << 8);
1603 tve |= PPC_BIT(51) | IODA3_TVT_NON_TRANSLATE_50;
1604 } else {
1605 /* Disable */
1606 tve = 0;
1607 }
1608
1609 phb4_ioda_sel(p, IODA3_TBL_TVT, window_id, false);
1610 out_be64(p->regs + PHB_IODA_DATA0, tve);
1611 p->tve_cache[window_id] = tve;
1612
1613 return OPAL_SUCCESS;
1614 }
1615
phb4_set_option(struct phb * phb,enum OpalPhbOption opt,uint64_t setting)1616 static int64_t phb4_set_option(struct phb *phb, enum OpalPhbOption opt,
1617 uint64_t setting)
1618 {
1619 struct phb4 *p = phb_to_phb4(phb);
1620 uint64_t data64;
1621
1622 data64 = phb4_read_reg(p, PHB_CTRLR);
1623 switch (opt) {
1624 case OPAL_PHB_OPTION_TVE1_4GB:
1625 if (setting > 1)
1626 return OPAL_PARAMETER;
1627
1628 PHBDBG(p, "4GB bypass mode = %lld\n", setting);
1629 if (setting)
1630 data64 |= PPC_BIT(24);
1631 else
1632 data64 &= ~PPC_BIT(24);
1633 break;
1634 case OPAL_PHB_OPTION_MMIO_EEH_DISABLE:
1635 if (setting > 1)
1636 return OPAL_PARAMETER;
1637
1638 PHBDBG(p, "MMIO EEH Disable = %lld\n", setting);
1639 if (setting)
1640 data64 |= PPC_BIT(14);
1641 else
1642 data64 &= ~PPC_BIT(14);
1643 break;
1644 default:
1645 return OPAL_UNSUPPORTED;
1646 }
1647 phb4_write_reg(p, PHB_CTRLR, data64);
1648
1649 return OPAL_SUCCESS;
1650 }
1651
phb4_get_option(struct phb * phb,enum OpalPhbOption opt,__be64 * setting)1652 static int64_t phb4_get_option(struct phb *phb, enum OpalPhbOption opt,
1653 __be64 *setting)
1654 {
1655 struct phb4 *p = phb_to_phb4(phb);
1656 uint64_t data64;
1657
1658 data64 = phb4_read_reg(p, PHB_CTRLR);
1659 switch (opt) {
1660 case OPAL_PHB_OPTION_TVE1_4GB:
1661 *setting = cpu_to_be64((data64 & PPC_BIT(24)) ? 1 : 0);
1662 break;
1663 case OPAL_PHB_OPTION_MMIO_EEH_DISABLE:
1664 *setting = cpu_to_be64((data64 & PPC_BIT(14)) ? 1 : 0);
1665 break;
1666 default:
1667 return OPAL_UNSUPPORTED;
1668 }
1669
1670 return OPAL_SUCCESS;
1671 }
1672
phb4_set_ive_pe(struct phb * phb,uint64_t pe_number,uint32_t ive_num)1673 static int64_t phb4_set_ive_pe(struct phb *phb,
1674 uint64_t pe_number,
1675 uint32_t ive_num)
1676 {
1677 struct phb4 *p = phb_to_phb4(phb);
1678 uint32_t mist_idx;
1679 uint32_t mist_quad;
1680 uint32_t mist_shift;
1681 uint64_t val;
1682
1683 if (pe_number >= p->num_pes || ive_num >= (p->num_irqs - 8))
1684 return OPAL_PARAMETER;
1685
1686 mist_idx = ive_num >> 2;
1687 mist_quad = ive_num & 3;
1688 mist_shift = (3 - mist_quad) << 4;
1689 p->mist_cache[mist_idx] &= ~(0x0fffull << mist_shift);
1690 p->mist_cache[mist_idx] |= ((uint64_t)pe_number) << mist_shift;
1691
1692 /* Note: This has the side effect of clearing P/Q, so this
1693 * shouldn't be called while the interrupt is "hot"
1694 */
1695
1696 phb4_ioda_sel(p, IODA3_TBL_MIST, mist_idx, false);
1697
1698 /* We need to inject the appropriate MIST write enable bit
1699 * in the IODA table address register
1700 */
1701 val = in_be64(p->regs + PHB_IODA_ADDR);
1702 val = SETFIELD(PHB_IODA_AD_MIST_PWV, val, 8 >> mist_quad);
1703 out_be64(p->regs + PHB_IODA_ADDR, val);
1704
1705 /* Write entry */
1706 out_be64(p->regs + PHB_IODA_DATA0, p->mist_cache[mist_idx]);
1707
1708 return OPAL_SUCCESS;
1709 }
1710
phb4_get_msi_32(struct phb * phb,uint64_t pe_number,uint32_t ive_num,uint8_t msi_range,uint32_t * msi_address,uint32_t * message_data)1711 static int64_t phb4_get_msi_32(struct phb *phb,
1712 uint64_t pe_number,
1713 uint32_t ive_num,
1714 uint8_t msi_range,
1715 uint32_t *msi_address,
1716 uint32_t *message_data)
1717 {
1718 struct phb4 *p = phb_to_phb4(phb);
1719
1720 /*
1721 * Sanity check. We needn't check on mve_number (PE#)
1722 * on PHB3 since the interrupt source is purely determined
1723 * by its DMA address and data, but the check isn't
1724 * harmful.
1725 */
1726 if (pe_number >= p->num_pes ||
1727 ive_num >= (p->num_irqs - 8) ||
1728 msi_range != 1 || !msi_address|| !message_data)
1729 return OPAL_PARAMETER;
1730
1731 /*
1732 * DMA address and data will form the IVE index.
1733 * For more details, please refer to IODA2 spec.
1734 */
1735 *msi_address = 0xFFFF0000 | ((ive_num << 4) & 0xFFFFFE0F);
1736 *message_data = ive_num & 0x1F;
1737
1738 return OPAL_SUCCESS;
1739 }
1740
phb4_get_msi_64(struct phb * phb,uint64_t pe_number,uint32_t ive_num,uint8_t msi_range,uint64_t * msi_address,uint32_t * message_data)1741 static int64_t phb4_get_msi_64(struct phb *phb,
1742 uint64_t pe_number,
1743 uint32_t ive_num,
1744 uint8_t msi_range,
1745 uint64_t *msi_address,
1746 uint32_t *message_data)
1747 {
1748 struct phb4 *p = phb_to_phb4(phb);
1749
1750 /* Sanity check */
1751 if (pe_number >= p->num_pes ||
1752 ive_num >= (p->num_irqs - 8) ||
1753 msi_range != 1 || !msi_address || !message_data)
1754 return OPAL_PARAMETER;
1755
1756 /*
1757 * DMA address and data will form the IVE index.
1758 * For more details, please refer to IODA2 spec.
1759 */
1760 *msi_address = (0x1ul << 60) | ((ive_num << 4) & 0xFFFFFFFFFFFFFE0Ful);
1761 *message_data = ive_num & 0x1F;
1762
1763 return OPAL_SUCCESS;
1764 }
1765
phb4_rc_err_clear(struct phb4 * p)1766 static void phb4_rc_err_clear(struct phb4 *p)
1767 {
1768 /* Init_47 - Clear errors */
1769 phb4_pcicfg_write16(&p->phb, 0, PCI_CFG_SECONDARY_STATUS, 0xffff);
1770
1771 if (p->ecap <= 0)
1772 return;
1773
1774 phb4_pcicfg_write16(&p->phb, 0, p->ecap + PCICAP_EXP_DEVSTAT,
1775 PCICAP_EXP_DEVSTAT_CE |
1776 PCICAP_EXP_DEVSTAT_NFE |
1777 PCICAP_EXP_DEVSTAT_FE |
1778 PCICAP_EXP_DEVSTAT_UE);
1779
1780 if (p->aercap <= 0)
1781 return;
1782
1783 /* Clear all UE status */
1784 phb4_pcicfg_write32(&p->phb, 0, p->aercap + PCIECAP_AER_UE_STATUS,
1785 0xffffffff);
1786 /* Clear all CE status */
1787 phb4_pcicfg_write32(&p->phb, 0, p->aercap + PCIECAP_AER_CE_STATUS,
1788 0xffffffff);
1789 /* Clear root error status */
1790 phb4_pcicfg_write32(&p->phb, 0, p->aercap + PCIECAP_AER_RERR_STA,
1791 0xffffffff);
1792 }
1793
phb4_err_clear_regb(struct phb4 * p)1794 static void phb4_err_clear_regb(struct phb4 *p)
1795 {
1796 uint64_t val64;
1797
1798 val64 = phb4_read_reg(p, PHB_REGB_ERR_STATUS);
1799 phb4_write_reg(p, PHB_REGB_ERR_STATUS, val64);
1800 phb4_write_reg(p, PHB_REGB_ERR1_STATUS, 0x0ul);
1801 phb4_write_reg(p, PHB_REGB_ERR_LOG_0, 0x0ul);
1802 phb4_write_reg(p, PHB_REGB_ERR_LOG_1, 0x0ul);
1803 }
1804
1805 /*
1806 * The function can be called during error recovery for all classes of
1807 * errors. This is new to PHB4; previous revisions had separate
1808 * sequences for INF/ER/Fatal errors.
1809 *
1810 * "Rec #" in this function refer to "Recov_#" steps in the
1811 * PHB4 INF recovery sequence.
1812 */
phb4_err_clear(struct phb4 * p)1813 static void phb4_err_clear(struct phb4 *p)
1814 {
1815 uint64_t val64;
1816 uint64_t fir = phb4_read_reg(p, PHB_LEM_FIR_ACCUM);
1817
1818 /* Rec 1: Acquire the PCI config lock (we don't need to do this) */
1819
1820 /* Rec 2...15: Clear error status in RC config space */
1821 phb4_rc_err_clear(p);
1822
1823 /* Rec 16...23: Clear PBL errors */
1824 val64 = phb4_read_reg(p, PHB_PBL_ERR_STATUS);
1825 phb4_write_reg(p, PHB_PBL_ERR_STATUS, val64);
1826 phb4_write_reg(p, PHB_PBL_ERR1_STATUS, 0x0ul);
1827 phb4_write_reg(p, PHB_PBL_ERR_LOG_0, 0x0ul);
1828 phb4_write_reg(p, PHB_PBL_ERR_LOG_1, 0x0ul);
1829
1830 /* Rec 24...31: Clear REGB errors */
1831 phb4_err_clear_regb(p);
1832
1833 /* Rec 32...59: Clear PHB error trap */
1834 val64 = phb4_read_reg(p, PHB_TXE_ERR_STATUS);
1835 phb4_write_reg(p, PHB_TXE_ERR_STATUS, val64);
1836 phb4_write_reg(p, PHB_TXE_ERR1_STATUS, 0x0ul);
1837 phb4_write_reg(p, PHB_TXE_ERR_LOG_0, 0x0ul);
1838 phb4_write_reg(p, PHB_TXE_ERR_LOG_1, 0x0ul);
1839
1840 val64 = phb4_read_reg(p, PHB_RXE_ARB_ERR_STATUS);
1841 phb4_write_reg(p, PHB_RXE_ARB_ERR_STATUS, val64);
1842 phb4_write_reg(p, PHB_RXE_ARB_ERR1_STATUS, 0x0ul);
1843 phb4_write_reg(p, PHB_RXE_ARB_ERR_LOG_0, 0x0ul);
1844 phb4_write_reg(p, PHB_RXE_ARB_ERR_LOG_1, 0x0ul);
1845
1846 val64 = phb4_read_reg(p, PHB_RXE_MRG_ERR_STATUS);
1847 phb4_write_reg(p, PHB_RXE_MRG_ERR_STATUS, val64);
1848 phb4_write_reg(p, PHB_RXE_MRG_ERR1_STATUS, 0x0ul);
1849 phb4_write_reg(p, PHB_RXE_MRG_ERR_LOG_0, 0x0ul);
1850 phb4_write_reg(p, PHB_RXE_MRG_ERR_LOG_1, 0x0ul);
1851
1852 val64 = phb4_read_reg(p, PHB_RXE_TCE_ERR_STATUS);
1853 phb4_write_reg(p, PHB_RXE_TCE_ERR_STATUS, val64);
1854 phb4_write_reg(p, PHB_RXE_TCE_ERR1_STATUS, 0x0ul);
1855 phb4_write_reg(p, PHB_RXE_TCE_ERR_LOG_0, 0x0ul);
1856 phb4_write_reg(p, PHB_RXE_TCE_ERR_LOG_1, 0x0ul);
1857
1858 val64 = phb4_read_reg(p, PHB_ERR_STATUS);
1859 phb4_write_reg(p, PHB_ERR_STATUS, val64);
1860 phb4_write_reg(p, PHB_ERR1_STATUS, 0x0ul);
1861 phb4_write_reg(p, PHB_ERR_LOG_0, 0x0ul);
1862 phb4_write_reg(p, PHB_ERR_LOG_1, 0x0ul);
1863
1864 /* Rec 61/62: Clear FIR/WOF */
1865 phb4_write_reg(p, PHB_LEM_FIR_AND_MASK, ~fir);
1866 phb4_write_reg(p, PHB_LEM_WOF, 0x0ul);
1867
1868 /* Rec 63: Update LEM mask to its initial value */
1869 phb4_write_reg(p, PHB_LEM_ERROR_MASK, 0x0ul);
1870
1871 /* Rec 64: Clear the PCI config lock (we don't need to do this) */
1872 }
1873
phb4_read_phb_status(struct phb4 * p,struct OpalIoPhb4ErrorData * stat)1874 static void phb4_read_phb_status(struct phb4 *p,
1875 struct OpalIoPhb4ErrorData *stat)
1876 {
1877 uint32_t i;
1878 __be64 *pPEST;
1879 uint16_t __16;
1880 uint32_t __32;
1881 uint64_t __64;
1882
1883 memset(stat, 0, sizeof(struct OpalIoPhb4ErrorData));
1884
1885 /* Error data common part */
1886 stat->common.version = cpu_to_be32(OPAL_PHB_ERROR_DATA_VERSION_1);
1887 stat->common.ioType = cpu_to_be32(OPAL_PHB_ERROR_DATA_TYPE_PHB4);
1888 stat->common.len = cpu_to_be32(sizeof(struct OpalIoPhb4ErrorData));
1889
1890 /* Use ASB for config space if the PHB is fenced */
1891 if (p->flags & PHB4_AIB_FENCED)
1892 p->flags |= PHB4_CFG_USE_ASB;
1893
1894 /* Grab RC bridge control, make it 32-bit */
1895 phb4_pcicfg_read16(&p->phb, 0, PCI_CFG_BRCTL, &__16);
1896 stat->brdgCtl = cpu_to_be32(__16);
1897
1898 /*
1899 * Grab various RC PCIe capability registers. All device, slot
1900 * and link status are 16-bit, so we grab the pair control+status
1901 * for each of them
1902 */
1903 phb4_pcicfg_read32(&p->phb, 0, p->ecap + PCICAP_EXP_DEVCTL, &__32);
1904 stat->deviceStatus = cpu_to_be32(__32);
1905 phb4_pcicfg_read32(&p->phb, 0, p->ecap + PCICAP_EXP_SLOTCTL, &__32);
1906 stat->slotStatus = cpu_to_be32(__32);
1907 phb4_pcicfg_read32(&p->phb, 0, p->ecap + PCICAP_EXP_LCTL, &__32);
1908 stat->linkStatus = cpu_to_be32(__32);
1909
1910 /*
1911 * I assume those are the standard config space header, cmd & status
1912 * together makes 32-bit. Secondary status is 16-bit so I'll clear
1913 * the top on that one
1914 */
1915 phb4_pcicfg_read32(&p->phb, 0, PCI_CFG_CMD, &__32);
1916 stat->devCmdStatus = cpu_to_be32(__32);
1917 phb4_pcicfg_read16(&p->phb, 0, PCI_CFG_SECONDARY_STATUS, &__16);
1918 stat->devSecStatus = cpu_to_be32(__16);
1919
1920 /* Grab a bunch of AER regs */
1921 phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_RERR_STA, &__32);
1922 stat->rootErrorStatus = cpu_to_be32(__32);
1923 phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_UE_STATUS, &__32);
1924 stat->uncorrErrorStatus = cpu_to_be32(__32);
1925
1926 phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_CE_STATUS, &__32);
1927 stat->corrErrorStatus = cpu_to_be32(__32);
1928
1929 phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_HDR_LOG0, &__32);
1930 stat->tlpHdr1 = cpu_to_be32(__32);
1931
1932 phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_HDR_LOG1, &__32);
1933 stat->tlpHdr2 = cpu_to_be32(__32);
1934
1935 phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_HDR_LOG2, &__32);
1936 stat->tlpHdr3 = cpu_to_be32(__32);
1937
1938 phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_HDR_LOG3, &__32);
1939 stat->tlpHdr4 = cpu_to_be32(__32);
1940
1941 phb4_pcicfg_read32(&p->phb, 0, p->aercap + PCIECAP_AER_SRCID, &__32);
1942 stat->sourceId = cpu_to_be32(__32);
1943
1944
1945 /* PEC NFIR, same as P8/PHB3 */
1946 xscom_read(p->chip_id, p->pe_stk_xscom + 0x0, &__64);
1947 stat->nFir = cpu_to_be64(__64);
1948 xscom_read(p->chip_id, p->pe_stk_xscom + 0x3, &__64);
1949 stat->nFirMask = cpu_to_be64(__64);
1950 xscom_read(p->chip_id, p->pe_stk_xscom + 0x8, &__64);
1951 stat->nFirWOF = cpu_to_be64(__64);
1952
1953 /* PHB4 inbound and outbound error Regs */
1954 stat->phbPlssr = cpu_to_be64(phb4_read_reg_asb(p, PHB_CPU_LOADSTORE_STATUS));
1955 stat->phbCsr = cpu_to_be64(phb4_read_reg_asb(p, PHB_DMA_CHAN_STATUS));
1956 stat->lemFir = cpu_to_be64(phb4_read_reg_asb(p, PHB_LEM_FIR_ACCUM));
1957 stat->lemErrorMask = cpu_to_be64(phb4_read_reg_asb(p, PHB_LEM_ERROR_MASK));
1958 stat->lemWOF = cpu_to_be64(phb4_read_reg_asb(p, PHB_LEM_WOF));
1959 stat->phbErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_ERR_STATUS));
1960 stat->phbFirstErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_ERR1_STATUS));
1961 stat->phbErrorLog0 = cpu_to_be64(phb4_read_reg_asb(p, PHB_ERR_LOG_0));
1962 stat->phbErrorLog1 = cpu_to_be64(phb4_read_reg_asb(p, PHB_ERR_LOG_1));
1963 stat->phbTxeErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_TXE_ERR_STATUS));
1964 stat->phbTxeFirstErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_TXE_ERR1_STATUS));
1965 stat->phbTxeErrorLog0 = cpu_to_be64(phb4_read_reg_asb(p, PHB_TXE_ERR_LOG_0));
1966 stat->phbTxeErrorLog1 = cpu_to_be64(phb4_read_reg_asb(p, PHB_TXE_ERR_LOG_1));
1967 stat->phbRxeArbErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_ARB_ERR_STATUS));
1968 stat->phbRxeArbFirstErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_ARB_ERR1_STATUS));
1969 stat->phbRxeArbErrorLog0 = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_ARB_ERR_LOG_0));
1970 stat->phbRxeArbErrorLog1 = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_ARB_ERR_LOG_1));
1971 stat->phbRxeMrgErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_MRG_ERR_STATUS));
1972 stat->phbRxeMrgFirstErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_MRG_ERR1_STATUS));
1973 stat->phbRxeMrgErrorLog0 = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_MRG_ERR_LOG_0));
1974 stat->phbRxeMrgErrorLog1 = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_MRG_ERR_LOG_1));
1975 stat->phbRxeTceErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_TCE_ERR_STATUS));
1976 stat->phbRxeTceFirstErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_TCE_ERR1_STATUS));
1977 stat->phbRxeTceErrorLog0 = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_TCE_ERR_LOG_0));
1978 stat->phbRxeTceErrorLog1 = cpu_to_be64(phb4_read_reg_asb(p, PHB_RXE_TCE_ERR_LOG_1));
1979
1980 /* PHB4 REGB error registers */
1981 stat->phbPblErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_PBL_ERR_STATUS));
1982 stat->phbPblFirstErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_PBL_ERR1_STATUS));
1983 stat->phbPblErrorLog0 = cpu_to_be64(phb4_read_reg_asb(p, PHB_PBL_ERR_LOG_0));
1984 stat->phbPblErrorLog1 = cpu_to_be64(phb4_read_reg_asb(p, PHB_PBL_ERR_LOG_1));
1985
1986 stat->phbPcieDlpErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_PCIE_DLP_ERR_STATUS));
1987 stat->phbPcieDlpErrorLog1 = cpu_to_be64(phb4_read_reg_asb(p, PHB_PCIE_DLP_ERRLOG1));
1988 stat->phbPcieDlpErrorLog2 = cpu_to_be64(phb4_read_reg_asb(p, PHB_PCIE_DLP_ERRLOG2));
1989
1990 stat->phbRegbErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_REGB_ERR_STATUS));
1991 stat->phbRegbFirstErrorStatus = cpu_to_be64(phb4_read_reg_asb(p, PHB_REGB_ERR1_STATUS));
1992 stat->phbRegbErrorLog0 = cpu_to_be64(phb4_read_reg_asb(p, PHB_REGB_ERR_LOG_0));
1993 stat->phbRegbErrorLog1 = cpu_to_be64(phb4_read_reg_asb(p, PHB_REGB_ERR_LOG_1));
1994
1995 /*
1996 * Grab PESTA & B content. The error bit (bit#0) should
1997 * be fetched from IODA and the left content from memory
1998 * resident tables.
1999 */
2000 pPEST = (__be64 *)p->tbl_pest;
2001 phb4_ioda_sel(p, IODA3_TBL_PESTA, 0, true);
2002 for (i = 0; i < p->max_num_pes; i++) {
2003 stat->pestA[i] = cpu_to_be64(phb4_read_reg_asb(p, PHB_IODA_DATA0));
2004 stat->pestA[i] |= pPEST[2 * i];
2005 }
2006
2007 phb4_ioda_sel(p, IODA3_TBL_PESTB, 0, true);
2008 for (i = 0; i < p->max_num_pes; i++) {
2009 stat->pestB[i] = cpu_to_be64(phb4_read_reg_asb(p, PHB_IODA_DATA0));
2010 stat->pestB[i] |= pPEST[2 * i + 1];
2011 }
2012 }
2013
phb4_dump_peltv(struct phb4 * p)2014 static void __unused phb4_dump_peltv(struct phb4 *p)
2015 {
2016 int stride = p->max_num_pes / 64;
2017 uint64_t *tbl = (void *) p->tbl_peltv;
2018 unsigned int pe;
2019
2020 PHBERR(p, "PELT-V: base addr: %p size: %llx (%d PEs, stride = %d)\n",
2021 tbl, p->tbl_peltv_size, p->max_num_pes, stride);
2022
2023 for (pe = 0; pe < p->max_num_pes; pe++) {
2024 unsigned int i, j;
2025 uint64_t sum = 0;
2026
2027 i = pe * stride;
2028
2029 /*
2030 * Only print an entry if there's bits set in the PE's
2031 * PELT-V entry. There's a few hundred possible PEs and
2032 * generally only a handful will be in use.
2033 */
2034
2035 for (j = 0; j < stride; j++)
2036 sum |= tbl[i + j];
2037 if (!sum)
2038 continue; /* unused PE, skip it */
2039
2040 if (p->max_num_pes == 512) {
2041 PHBERR(p, "PELT-V[%03x] = "
2042 "%016llx %016llx %016llx %016llx"
2043 "%016llx %016llx %016llx %016llx\n", pe,
2044 tbl[i + 0], tbl[i + 1], tbl[i + 2], tbl[i + 3],
2045 tbl[i + 4], tbl[i + 5], tbl[i + 6], tbl[i + 7]);
2046 } else if (p->max_num_pes == 256) {
2047 PHBERR(p, "PELT-V[%03x] = "
2048 "%016llx %016llx %016llx %016llx\n", pe,
2049 tbl[i + 0], tbl[i + 1], tbl[i + 2], tbl[i + 3]);
2050 }
2051 }
2052 }
2053
phb4_dump_ioda_table(struct phb4 * p,int table)2054 static void __unused phb4_dump_ioda_table(struct phb4 *p, int table)
2055 {
2056 const char *name;
2057 int entries, i;
2058
2059 switch (table) {
2060 case IODA3_TBL_LIST:
2061 name = "LIST";
2062 entries = 8;
2063 break;
2064 case IODA3_TBL_MIST:
2065 name = "MIST";
2066 entries = 1024;
2067 break;
2068 case IODA3_TBL_RCAM:
2069 name = "RCAM";
2070 entries = 128;
2071 break;
2072 case IODA3_TBL_MRT:
2073 name = "MRT";
2074 entries = 16;
2075 break;
2076 case IODA3_TBL_PESTA:
2077 name = "PESTA";
2078 entries = 512;
2079 break;
2080 case IODA3_TBL_PESTB:
2081 name = "PESTB";
2082 entries = 512;
2083 break;
2084 case IODA3_TBL_TVT:
2085 name = "TVT";
2086 entries = 512;
2087 break;
2088 case IODA3_TBL_TCAM:
2089 name = "TCAM";
2090 entries = 1024;
2091 break;
2092 case IODA3_TBL_TDR:
2093 name = "TDR";
2094 entries = 1024;
2095 break;
2096 case IODA3_TBL_MBT: /* special case, see below */
2097 name = "MBT";
2098 entries = 64;
2099 break;
2100 case IODA3_TBL_MDT:
2101 name = "MDT";
2102 entries = 512;
2103 break;
2104 case IODA3_TBL_PEEV:
2105 name = "PEEV";
2106 entries = 8;
2107 break;
2108 default:
2109 PHBERR(p, "Invalid IODA table %d!\n", table);
2110 return;
2111 }
2112
2113 PHBERR(p, "Start %s dump (only non-zero entries are printed):\n", name);
2114
2115 phb4_ioda_sel(p, table, 0, true);
2116
2117 /*
2118 * Each entry in the MBT is 16 bytes. Every other table has 8 byte
2119 * entries so we special case the MDT to keep the output readable.
2120 */
2121 if (table == IODA3_TBL_MBT) {
2122 for (i = 0; i < 32; i++) {
2123 uint64_t v1 = phb4_read_reg_asb(p, PHB_IODA_DATA0);
2124 uint64_t v2 = phb4_read_reg_asb(p, PHB_IODA_DATA0);
2125
2126 if (!v1 && !v2)
2127 continue;
2128 PHBERR(p, "MBT[%03x] = %016llx %016llx\n", i, v1, v2);
2129 }
2130 } else {
2131 for (i = 0; i < entries; i++) {
2132 uint64_t v = phb4_read_reg_asb(p, PHB_IODA_DATA0);
2133
2134 if (!v)
2135 continue;
2136 PHBERR(p, "%s[%03x] = %016llx\n", name, i, v);
2137 }
2138 }
2139
2140 PHBERR(p, "End %s dump\n", name);
2141 }
2142
phb4_eeh_dump_regs(struct phb4 * p)2143 static void phb4_eeh_dump_regs(struct phb4 *p)
2144 {
2145 struct OpalIoPhb4ErrorData *s;
2146 uint16_t reg;
2147 unsigned int i;
2148
2149 if (!verbose_eeh)
2150 return;
2151
2152 s = zalloc(sizeof(struct OpalIoPhb4ErrorData));
2153 if (!s) {
2154 PHBERR(p, "Failed to allocate error info !\n");
2155 return;
2156 }
2157 phb4_read_phb_status(p, s);
2158
2159 PHBERR(p, " brdgCtl = %08x\n", be32_to_cpu(s->brdgCtl));
2160
2161 /* PHB4 cfg regs */
2162 PHBERR(p, " deviceStatus = %08x\n", be32_to_cpu(s->deviceStatus));
2163 PHBERR(p, " slotStatus = %08x\n", be32_to_cpu(s->slotStatus));
2164 PHBERR(p, " linkStatus = %08x\n", be32_to_cpu(s->linkStatus));
2165 PHBERR(p, " devCmdStatus = %08x\n", be32_to_cpu(s->devCmdStatus));
2166 PHBERR(p, " devSecStatus = %08x\n", be32_to_cpu(s->devSecStatus));
2167 PHBERR(p, " rootErrorStatus = %08x\n", be32_to_cpu(s->rootErrorStatus));
2168 PHBERR(p, " corrErrorStatus = %08x\n", be32_to_cpu(s->corrErrorStatus));
2169 PHBERR(p, " uncorrErrorStatus = %08x\n", be32_to_cpu(s->uncorrErrorStatus));
2170
2171 /* Two non OPAL API registers that are useful */
2172 phb4_pcicfg_read16(&p->phb, 0, p->ecap + PCICAP_EXP_DEVCTL, ®);
2173 PHBERR(p, " devctl = %08x\n", reg);
2174 phb4_pcicfg_read16(&p->phb, 0, p->ecap + PCICAP_EXP_DEVSTAT,
2175 ®);
2176 PHBERR(p, " devStat = %08x\n", reg);
2177
2178 /* Byte swap TLP headers so they are the same as the PCIe spec */
2179 PHBERR(p, " tlpHdr1 = %08x\n", cpu_to_le32(be32_to_cpu(s->tlpHdr1)));
2180 PHBERR(p, " tlpHdr2 = %08x\n", cpu_to_le32(be32_to_cpu(s->tlpHdr2)));
2181 PHBERR(p, " tlpHdr3 = %08x\n", cpu_to_le32(be32_to_cpu(s->tlpHdr3)));
2182 PHBERR(p, " tlpHdr4 = %08x\n", cpu_to_le32(be32_to_cpu(s->tlpHdr4)));
2183 PHBERR(p, " sourceId = %08x\n", be32_to_cpu(s->sourceId));
2184 PHBERR(p, " nFir = %016llx\n", be64_to_cpu(s->nFir));
2185 PHBERR(p, " nFirMask = %016llx\n", be64_to_cpu(s->nFirMask));
2186 PHBERR(p, " nFirWOF = %016llx\n", be64_to_cpu(s->nFirWOF));
2187 PHBERR(p, " phbPlssr = %016llx\n", be64_to_cpu(s->phbPlssr));
2188 PHBERR(p, " phbCsr = %016llx\n", be64_to_cpu(s->phbCsr));
2189 PHBERR(p, " lemFir = %016llx\n", be64_to_cpu(s->lemFir));
2190 PHBERR(p, " lemErrorMask = %016llx\n", be64_to_cpu(s->lemErrorMask));
2191 PHBERR(p, " lemWOF = %016llx\n", be64_to_cpu(s->lemWOF));
2192 PHBERR(p, " phbErrorStatus = %016llx\n", be64_to_cpu(s->phbErrorStatus));
2193 PHBERR(p, " phbFirstErrorStatus = %016llx\n", be64_to_cpu(s->phbFirstErrorStatus));
2194 PHBERR(p, " phbErrorLog0 = %016llx\n", be64_to_cpu(s->phbErrorLog0));
2195 PHBERR(p, " phbErrorLog1 = %016llx\n", be64_to_cpu(s->phbErrorLog1));
2196 PHBERR(p, " phbTxeErrorStatus = %016llx\n", be64_to_cpu(s->phbTxeErrorStatus));
2197 PHBERR(p, " phbTxeFirstErrorStatus = %016llx\n", be64_to_cpu(s->phbTxeFirstErrorStatus));
2198 PHBERR(p, " phbTxeErrorLog0 = %016llx\n", be64_to_cpu(s->phbTxeErrorLog0));
2199 PHBERR(p, " phbTxeErrorLog1 = %016llx\n", be64_to_cpu(s->phbTxeErrorLog1));
2200 PHBERR(p, " phbRxeArbErrorStatus = %016llx\n", be64_to_cpu(s->phbRxeArbErrorStatus));
2201 PHBERR(p, "phbRxeArbFrstErrorStatus = %016llx\n", be64_to_cpu(s->phbRxeArbFirstErrorStatus));
2202 PHBERR(p, " phbRxeArbErrorLog0 = %016llx\n", be64_to_cpu(s->phbRxeArbErrorLog0));
2203 PHBERR(p, " phbRxeArbErrorLog1 = %016llx\n", be64_to_cpu(s->phbRxeArbErrorLog1));
2204 PHBERR(p, " phbRxeMrgErrorStatus = %016llx\n", be64_to_cpu(s->phbRxeMrgErrorStatus));
2205 PHBERR(p, "phbRxeMrgFrstErrorStatus = %016llx\n", be64_to_cpu(s->phbRxeMrgFirstErrorStatus));
2206 PHBERR(p, " phbRxeMrgErrorLog0 = %016llx\n", be64_to_cpu(s->phbRxeMrgErrorLog0));
2207 PHBERR(p, " phbRxeMrgErrorLog1 = %016llx\n", be64_to_cpu(s->phbRxeMrgErrorLog1));
2208 PHBERR(p, " phbRxeTceErrorStatus = %016llx\n", be64_to_cpu(s->phbRxeTceErrorStatus));
2209 PHBERR(p, "phbRxeTceFrstErrorStatus = %016llx\n", be64_to_cpu(s->phbRxeTceFirstErrorStatus));
2210 PHBERR(p, " phbRxeTceErrorLog0 = %016llx\n", be64_to_cpu(s->phbRxeTceErrorLog0));
2211 PHBERR(p, " phbRxeTceErrorLog1 = %016llx\n", be64_to_cpu(s->phbRxeTceErrorLog1));
2212 PHBERR(p, " phbPblErrorStatus = %016llx\n", be64_to_cpu(s->phbPblErrorStatus));
2213 PHBERR(p, " phbPblFirstErrorStatus = %016llx\n", be64_to_cpu(s->phbPblFirstErrorStatus));
2214 PHBERR(p, " phbPblErrorLog0 = %016llx\n", be64_to_cpu(s->phbPblErrorLog0));
2215 PHBERR(p, " phbPblErrorLog1 = %016llx\n", be64_to_cpu(s->phbPblErrorLog1));
2216 PHBERR(p, " phbPcieDlpErrorLog1 = %016llx\n", be64_to_cpu(s->phbPcieDlpErrorLog1));
2217 PHBERR(p, " phbPcieDlpErrorLog2 = %016llx\n", be64_to_cpu(s->phbPcieDlpErrorLog2));
2218 PHBERR(p, " phbPcieDlpErrorStatus = %016llx\n", be64_to_cpu(s->phbPcieDlpErrorStatus));
2219
2220 PHBERR(p, " phbRegbErrorStatus = %016llx\n", be64_to_cpu(s->phbRegbErrorStatus));
2221 PHBERR(p, " phbRegbFirstErrorStatus = %016llx\n", be64_to_cpu(s->phbRegbFirstErrorStatus));
2222 PHBERR(p, " phbRegbErrorLog0 = %016llx\n", be64_to_cpu(s->phbRegbErrorLog0));
2223 PHBERR(p, " phbRegbErrorLog1 = %016llx\n", be64_to_cpu(s->phbRegbErrorLog1));
2224
2225 for (i = 0; i < p->max_num_pes; i++) {
2226 if (!s->pestA[i] && !s->pestB[i])
2227 continue;
2228 PHBERR(p, " PEST[%03x] = %016llx %016llx\n",
2229 i, be64_to_cpu(s->pestA[i]), be64_to_cpu(s->pestB[i]));
2230 }
2231 free(s);
2232 }
2233
phb4_set_pe(struct phb * phb,uint64_t pe_number,uint64_t bdfn,uint8_t bcompare,uint8_t dcompare,uint8_t fcompare,uint8_t action)2234 static int64_t phb4_set_pe(struct phb *phb,
2235 uint64_t pe_number,
2236 uint64_t bdfn,
2237 uint8_t bcompare,
2238 uint8_t dcompare,
2239 uint8_t fcompare,
2240 uint8_t action)
2241 {
2242 struct phb4 *p = phb_to_phb4(phb);
2243 uint64_t mask, idx;
2244
2245 /* Sanity check */
2246 if (action != OPAL_MAP_PE && action != OPAL_UNMAP_PE)
2247 return OPAL_PARAMETER;
2248 if (pe_number >= p->num_pes || bdfn > 0xffff ||
2249 bcompare > OpalPciBusAll ||
2250 dcompare > OPAL_COMPARE_RID_DEVICE_NUMBER ||
2251 fcompare > OPAL_COMPARE_RID_FUNCTION_NUMBER)
2252 return OPAL_PARAMETER;
2253
2254 /* match everything by default */
2255 mask = 0;
2256
2257 /* Figure out the RID range */
2258 if (bcompare != OpalPciBusAny)
2259 mask = ((0x1 << (bcompare + 1)) - 1) << (15 - bcompare);
2260
2261 if (dcompare == OPAL_COMPARE_RID_DEVICE_NUMBER)
2262 mask |= 0xf8;
2263
2264 if (fcompare == OPAL_COMPARE_RID_FUNCTION_NUMBER)
2265 mask |= 0x7;
2266
2267 if (action == OPAL_UNMAP_PE)
2268 pe_number = PHB4_RESERVED_PE_NUM(p);
2269
2270 /* Map or unmap the RTT range */
2271 for (idx = 0; idx < RTT_TABLE_ENTRIES; idx++)
2272 if ((idx & mask) == (bdfn & mask))
2273 p->tbl_rtt[idx] = cpu_to_be16(pe_number);
2274
2275 /* Invalidate the RID Translation Cache (RTC) inside the PHB */
2276 out_be64(p->regs + PHB_RTC_INVALIDATE, PHB_RTC_INVALIDATE_ALL);
2277
2278 return OPAL_SUCCESS;
2279 }
2280
phb4_set_peltv(struct phb * phb,uint32_t parent_pe,uint32_t child_pe,uint8_t state)2281 static int64_t phb4_set_peltv(struct phb *phb,
2282 uint32_t parent_pe,
2283 uint32_t child_pe,
2284 uint8_t state)
2285 {
2286 struct phb4 *p = phb_to_phb4(phb);
2287 uint32_t idx, mask;
2288
2289 /* Sanity check */
2290 if (parent_pe >= p->num_pes || child_pe >= p->num_pes)
2291 return OPAL_PARAMETER;
2292
2293 /* Find index for parent PE */
2294 idx = parent_pe * (p->max_num_pes / 8);
2295 idx += (child_pe / 8);
2296 mask = 0x1 << (7 - (child_pe % 8));
2297
2298 if (state)
2299 p->tbl_peltv[idx] |= mask;
2300 else
2301 p->tbl_peltv[idx] &= ~mask;
2302
2303 return OPAL_SUCCESS;
2304 }
2305
phb4_prepare_link_change(struct pci_slot * slot,bool is_up)2306 static void phb4_prepare_link_change(struct pci_slot *slot, bool is_up)
2307 {
2308 struct phb4 *p = phb_to_phb4(slot->phb);
2309 uint32_t reg32;
2310
2311 p->has_link = is_up;
2312
2313 if (is_up) {
2314 /* Clear AER receiver error status */
2315 phb4_pcicfg_write32(&p->phb, 0, p->aercap +
2316 PCIECAP_AER_CE_STATUS,
2317 PCIECAP_AER_CE_RECVR_ERR);
2318 /* Unmask receiver error status in AER */
2319 phb4_pcicfg_read32(&p->phb, 0, p->aercap +
2320 PCIECAP_AER_CE_MASK, ®32);
2321 reg32 &= ~PCIECAP_AER_CE_RECVR_ERR;
2322 phb4_pcicfg_write32(&p->phb, 0, p->aercap +
2323 PCIECAP_AER_CE_MASK, reg32);
2324
2325 /* Don't block PCI-CFG */
2326 p->flags &= ~PHB4_CFG_BLOCKED;
2327
2328 /* Re-enable link down errors */
2329 out_be64(p->regs + PHB_PCIE_MISC_STRAP,
2330 0x0000060000000000ull);
2331
2332 /* Re-enable error status indicators that trigger irqs */
2333 out_be64(p->regs + PHB_REGB_ERR_INF_ENABLE,
2334 0x2130006efca8bc00ull);
2335 out_be64(p->regs + PHB_REGB_ERR_ERC_ENABLE,
2336 0x0080000000000000ull);
2337 out_be64(p->regs + PHB_REGB_ERR_FAT_ENABLE,
2338 0xde0fff91035743ffull);
2339
2340 } else {
2341 /* Mask AER receiver error */
2342 phb4_pcicfg_read32(&p->phb, 0, p->aercap +
2343 PCIECAP_AER_CE_MASK, ®32);
2344 reg32 |= PCIECAP_AER_CE_RECVR_ERR;
2345 phb4_pcicfg_write32(&p->phb, 0, p->aercap +
2346 PCIECAP_AER_CE_MASK, reg32);
2347
2348 /* Clear error link enable & error link down kill enable */
2349 out_be64(p->regs + PHB_PCIE_MISC_STRAP, 0);
2350
2351 /* Disable all error status indicators that trigger irqs */
2352 out_be64(p->regs + PHB_REGB_ERR_INF_ENABLE, 0);
2353 out_be64(p->regs + PHB_REGB_ERR_ERC_ENABLE, 0);
2354 out_be64(p->regs + PHB_REGB_ERR_FAT_ENABLE, 0);
2355
2356 /* Block PCI-CFG access */
2357 p->flags |= PHB4_CFG_BLOCKED;
2358 }
2359 }
2360
phb4_get_presence_state(struct pci_slot * slot,uint8_t * val)2361 static int64_t phb4_get_presence_state(struct pci_slot *slot, uint8_t *val)
2362 {
2363 struct phb4 *p = phb_to_phb4(slot->phb);
2364 uint64_t hps, dtctl;
2365
2366 /* Test for PHB in error state ? */
2367 if (p->broken)
2368 return OPAL_HARDWARE;
2369
2370 /* Check hotplug status */
2371 hps = in_be64(p->regs + PHB_PCIE_HOTPLUG_STATUS);
2372 if (!(hps & PHB_PCIE_HPSTAT_PRESENCE)) {
2373 *val = OPAL_PCI_SLOT_PRESENT;
2374 } else {
2375 /*
2376 * If it says not present but link is up, then we assume
2377 * we are on a broken simulation environment and still
2378 * return a valid presence. Otherwise, not present.
2379 */
2380 dtctl = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
2381 if (dtctl & PHB_PCIE_DLP_TL_LINKACT) {
2382 PHBERR(p, "Presence detect 0 but link set !\n");
2383 *val = OPAL_PCI_SLOT_PRESENT;
2384 } else {
2385 *val = OPAL_PCI_SLOT_EMPTY;
2386 }
2387 }
2388
2389 return OPAL_SUCCESS;
2390 }
2391
phb4_get_link_info(struct pci_slot * slot,uint8_t * speed,uint8_t * width)2392 static int64_t phb4_get_link_info(struct pci_slot *slot, uint8_t *speed,
2393 uint8_t *width)
2394 {
2395 struct phb4 *p = phb_to_phb4(slot->phb);
2396 uint64_t reg;
2397 uint16_t state;
2398 int64_t rc;
2399 uint8_t s;
2400
2401 /* Link is up, let's find the actual speed */
2402 reg = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
2403 if (!(reg & PHB_PCIE_DLP_TL_LINKACT)) {
2404 *width = 0;
2405 if (speed)
2406 *speed = 0;
2407 return OPAL_SUCCESS;
2408 }
2409
2410 rc = phb4_pcicfg_read16(&p->phb, 0,
2411 p->ecap + PCICAP_EXP_LSTAT, &state);
2412 if (rc != OPAL_SUCCESS) {
2413 PHBERR(p, "%s: Error %lld getting link state\n", __func__, rc);
2414 return OPAL_HARDWARE;
2415 }
2416
2417 if (state & PCICAP_EXP_LSTAT_DLLL_ACT) {
2418 *width = ((state & PCICAP_EXP_LSTAT_WIDTH) >> 4);
2419 s = state & PCICAP_EXP_LSTAT_SPEED;
2420 } else {
2421 *width = 0;
2422 s = 0;
2423 }
2424
2425 if (speed)
2426 *speed = s;
2427
2428 return OPAL_SUCCESS;
2429 }
2430
phb4_get_link_state(struct pci_slot * slot,uint8_t * val)2431 static int64_t phb4_get_link_state(struct pci_slot *slot, uint8_t *val)
2432 {
2433 return phb4_get_link_info(slot, NULL, val);
2434 }
2435
phb4_retry_state(struct pci_slot * slot)2436 static int64_t phb4_retry_state(struct pci_slot *slot)
2437 {
2438 struct phb4 *p = phb_to_phb4(slot->phb);
2439
2440 /* Mark link as down */
2441 phb4_prepare_link_change(slot, false);
2442
2443 /* Last attempt to activate link */
2444 if (slot->link_retries == 1) {
2445 if (slot->state == PHB4_SLOT_LINK_WAIT) {
2446 PHBERR(p, "Falling back to GEN1 training\n");
2447 p->max_link_speed = 1;
2448 }
2449 }
2450
2451 if (!slot->link_retries--) {
2452 switch (slot->state) {
2453 case PHB4_SLOT_LINK_WAIT_ELECTRICAL:
2454 PHBERR(p, "Presence detected but no electrical link\n");
2455 break;
2456 case PHB4_SLOT_LINK_WAIT:
2457 PHBERR(p, "Electrical link detected but won't train\n");
2458 break;
2459 case PHB4_SLOT_LINK_STABLE:
2460 PHBERR(p, "Linked trained but was degraded or unstable\n");
2461 break;
2462 default:
2463 PHBERR(p, "Unknown link issue\n");
2464 }
2465 return OPAL_HARDWARE;
2466 }
2467
2468 pci_slot_set_state(slot, PHB4_SLOT_CRESET_START);
2469 return pci_slot_set_sm_timeout(slot, msecs_to_tb(1));
2470 }
2471
phb4_train_info(struct phb4 * p,uint64_t reg,unsigned long dt)2472 static uint64_t phb4_train_info(struct phb4 *p, uint64_t reg, unsigned long dt)
2473 {
2474 uint64_t ltssm_state = GETFIELD(PHB_PCIE_DLP_LTSSM_TRC, reg);
2475 char s[80];
2476
2477 snprintf(s, sizeof(s), "TRACE:0x%016llx % 2lims",
2478 reg, tb_to_msecs(dt));
2479
2480 if (reg & PHB_PCIE_DLP_TL_LINKACT)
2481 snprintf(s, sizeof(s), "%s trained ", s);
2482 else if (reg & PHB_PCIE_DLP_TRAINING)
2483 snprintf(s, sizeof(s), "%s training", s);
2484 else if (reg & PHB_PCIE_DLP_INBAND_PRESENCE)
2485 snprintf(s, sizeof(s), "%s presence", s);
2486 else
2487 snprintf(s, sizeof(s), "%s ", s);
2488
2489 snprintf(s, sizeof(s), "%s GEN%lli:x%02lli:", s,
2490 GETFIELD(PHB_PCIE_DLP_LINK_SPEED, reg),
2491 GETFIELD(PHB_PCIE_DLP_LINK_WIDTH, reg));
2492
2493 switch (ltssm_state) {
2494 case PHB_PCIE_DLP_LTSSM_RESET:
2495 snprintf(s, sizeof(s), "%sreset", s);
2496 break;
2497 case PHB_PCIE_DLP_LTSSM_DETECT:
2498 snprintf(s, sizeof(s), "%sdetect", s);
2499 break;
2500 case PHB_PCIE_DLP_LTSSM_POLLING:
2501 snprintf(s, sizeof(s), "%spolling", s);
2502 break;
2503 case PHB_PCIE_DLP_LTSSM_CONFIG:
2504 snprintf(s, sizeof(s), "%sconfig", s);
2505 break;
2506 case PHB_PCIE_DLP_LTSSM_L0:
2507 snprintf(s, sizeof(s), "%sL0", s);
2508 break;
2509 case PHB_PCIE_DLP_LTSSM_REC:
2510 snprintf(s, sizeof(s), "%srecovery", s);
2511 break;
2512 case PHB_PCIE_DLP_LTSSM_L1:
2513 snprintf(s, sizeof(s), "%sL1", s);
2514 break;
2515 case PHB_PCIE_DLP_LTSSM_L2:
2516 snprintf(s, sizeof(s), "%sL2", s);
2517 break;
2518 case PHB_PCIE_DLP_LTSSM_HOTRESET:
2519 snprintf(s, sizeof(s), "%shotreset", s);
2520 break;
2521 case PHB_PCIE_DLP_LTSSM_DISABLED:
2522 snprintf(s, sizeof(s), "%sdisabled", s);
2523 break;
2524 case PHB_PCIE_DLP_LTSSM_LOOPBACK:
2525 snprintf(s, sizeof(s), "%sloopback", s);
2526 break;
2527 default:
2528 snprintf(s, sizeof(s), "%sunvalid", s);
2529 }
2530 PHBNOTICE(p, "%s\n", s);
2531
2532 return ltssm_state;
2533 }
2534
phb4_dump_pec_err_regs(struct phb4 * p)2535 static void phb4_dump_pec_err_regs(struct phb4 *p)
2536 {
2537 uint64_t nfir_p_wof, nfir_n_wof, err_aib;
2538 uint64_t err_rpt0, err_rpt1;
2539
2540 /* Read the PCI and NEST FIRs and dump them. Also cache PCI/NEST FIRs */
2541 xscom_read(p->chip_id,
2542 p->pci_stk_xscom + XPEC_PCI_STK_PCI_FIR, &p->pfir_cache);
2543 xscom_read(p->chip_id,
2544 p->pci_stk_xscom + XPEC_PCI_STK_PCI_FIR_WOF, &nfir_p_wof);
2545 xscom_read(p->chip_id,
2546 p->pe_stk_xscom + XPEC_NEST_STK_PCI_NFIR, &p->nfir_cache);
2547 xscom_read(p->chip_id,
2548 p->pe_stk_xscom + XPEC_NEST_STK_PCI_NFIR_WOF, &nfir_n_wof);
2549 xscom_read(p->chip_id,
2550 p->pe_stk_xscom + XPEC_NEST_STK_ERR_RPT0, &err_rpt0);
2551 xscom_read(p->chip_id,
2552 p->pe_stk_xscom + XPEC_NEST_STK_ERR_RPT1, &err_rpt1);
2553 xscom_read(p->chip_id,
2554 p->pci_stk_xscom + XPEC_PCI_STK_PBAIB_ERR_REPORT, &err_aib);
2555
2556 PHBERR(p, " PCI FIR=%016llx\n", p->pfir_cache);
2557 PHBERR(p, " PCI FIR WOF=%016llx\n", nfir_p_wof);
2558 PHBERR(p, " NEST FIR=%016llx\n", p->nfir_cache);
2559 PHBERR(p, " NEST FIR WOF=%016llx\n", nfir_n_wof);
2560 PHBERR(p, " ERR RPT0=%016llx\n", err_rpt0);
2561 PHBERR(p, " ERR RPT1=%016llx\n", err_rpt1);
2562 PHBERR(p, " AIB ERR=%016llx\n", err_aib);
2563 }
2564
phb4_dump_capp_err_regs(struct phb4 * p)2565 static void phb4_dump_capp_err_regs(struct phb4 *p)
2566 {
2567 uint64_t fir, apc_master_err, snoop_err, transport_err;
2568 uint64_t tlbi_err, capp_err_status;
2569 uint64_t offset = PHB4_CAPP_REG_OFFSET(p);
2570
2571 xscom_read(p->chip_id, CAPP_FIR + offset, &fir);
2572 xscom_read(p->chip_id, CAPP_APC_MASTER_ERR_RPT + offset,
2573 &apc_master_err);
2574 xscom_read(p->chip_id, CAPP_SNOOP_ERR_RTP + offset, &snoop_err);
2575 xscom_read(p->chip_id, CAPP_TRANSPORT_ERR_RPT + offset, &transport_err);
2576 xscom_read(p->chip_id, CAPP_TLBI_ERR_RPT + offset, &tlbi_err);
2577 xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, &capp_err_status);
2578
2579 PHBERR(p, " CAPP FIR=%016llx\n", fir);
2580 PHBERR(p, "CAPP APC MASTER ERR=%016llx\n", apc_master_err);
2581 PHBERR(p, " CAPP SNOOP ERR=%016llx\n", snoop_err);
2582 PHBERR(p, " CAPP TRANSPORT ERR=%016llx\n", transport_err);
2583 PHBERR(p, " CAPP TLBI ERR=%016llx\n", tlbi_err);
2584 PHBERR(p, " CAPP ERR STATUS=%016llx\n", capp_err_status);
2585 }
2586
2587 /* Check if AIB is fenced via PBCQ NFIR */
phb4_fenced(struct phb4 * p)2588 static bool phb4_fenced(struct phb4 *p)
2589 {
2590
2591 /* Already fenced ? */
2592 if (p->flags & PHB4_AIB_FENCED)
2593 return true;
2594
2595 /*
2596 * An all 1's from the PHB indicates a PHB freeze/fence. We
2597 * don't really differenciate them at this point.
2598 */
2599 if (in_be64(p->regs + PHB_CPU_LOADSTORE_STATUS)!= 0xfffffffffffffffful)
2600 return false;
2601
2602 /* Mark ourselves fenced */
2603 p->flags |= PHB4_AIB_FENCED;
2604
2605 PHBERR(p, "PHB Freeze/Fence detected !\n");
2606 phb4_dump_pec_err_regs(p);
2607
2608 /*
2609 * dump capp error registers in case phb was fenced due to capp.
2610 * Expect p->nfir_cache already updated in phb4_dump_pec_err_regs()
2611 */
2612 if (p->nfir_cache & XPEC_NEST_STK_PCI_NFIR_CXA_PE_CAPP)
2613 phb4_dump_capp_err_regs(p);
2614
2615 phb4_eeh_dump_regs(p);
2616
2617 return true;
2618 }
2619
phb4_check_reg(struct phb4 * p,uint64_t reg)2620 static bool phb4_check_reg(struct phb4 *p, uint64_t reg)
2621 {
2622 if (reg == 0xffffffffffffffffUL)
2623 return !phb4_fenced(p);
2624 return true;
2625 }
2626
phb4_get_info(struct phb * phb,uint16_t bdfn,uint8_t * speed,uint8_t * width)2627 static void phb4_get_info(struct phb *phb, uint16_t bdfn, uint8_t *speed,
2628 uint8_t *width)
2629 {
2630 int32_t ecap;
2631 uint32_t cap;
2632
2633 ecap = pci_find_cap(phb, bdfn, PCI_CFG_CAP_ID_EXP);
2634 pci_cfg_read32(phb, bdfn, ecap + PCICAP_EXP_LCAP, &cap);
2635 *width = (cap & PCICAP_EXP_LCAP_MAXWDTH) >> 4;
2636 *speed = cap & PCICAP_EXP_LCAP_MAXSPD;
2637 }
2638
2639 #define PVR_POWER9_CUMULUS 0x00002000
2640
phb4_chip_retry_workaround(void)2641 static bool phb4_chip_retry_workaround(void)
2642 {
2643 unsigned int pvr;
2644
2645 if (pci_retry_all)
2646 return true;
2647
2648 /* Chips that need this retry are:
2649 * - CUMULUS DD1.0
2650 * - NIMBUS DD2.0 (and DD1.0, but it is unsupported so no check).
2651 */
2652 pvr = mfspr(SPR_PVR);
2653 if (pvr & PVR_POWER9_CUMULUS) {
2654 if ((PVR_VERS_MAJ(pvr) == 1) && (PVR_VERS_MIN(pvr) == 0))
2655 return true;
2656 } else { /* NIMBUS */
2657 if ((PVR_VERS_MAJ(pvr) == 2) && (PVR_VERS_MIN(pvr) == 0))
2658 return true;
2659 }
2660 return false;
2661 }
2662
2663 struct pci_card_id {
2664 uint16_t vendor;
2665 uint16_t device;
2666 };
2667
2668 static struct pci_card_id retry_allowlist[] = {
2669 { 0x1000, 0x005d }, /* LSI Logic MegaRAID SAS-3 3108 */
2670 { 0x1000, 0x00c9 }, /* LSI MPT SAS-3 */
2671 { 0x104c, 0x8241 }, /* TI xHCI USB */
2672 { 0x1077, 0x2261 }, /* QLogic ISP2722-based 16/32Gb FC */
2673 { 0x10b5, 0x8725 }, /* PLX Switch: p9dsu, witherspoon */
2674 { 0x10b5, 0x8748 }, /* PLX Switch: ZZ */
2675 { 0x11f8, 0xf117 }, /* PMC-Sierra/MicroSemi NV1604 */
2676 { 0x15b3, 0x1013 }, /* Mellanox ConnectX-4 */
2677 { 0x15b3, 0x1017 }, /* Mellanox ConnectX-5 */
2678 { 0x15b3, 0x1019 }, /* Mellanox ConnectX-5 Ex */
2679 { 0x1a03, 0x1150 }, /* ASPEED AST2500 Switch */
2680 { 0x8086, 0x10fb }, /* Intel x520 10G Eth */
2681 { 0x9005, 0x028d }, /* MicroSemi PM8069 */
2682 };
2683
2684 #define VENDOR(vdid) ((vdid) & 0xffff)
2685 #define DEVICE(vdid) (((vdid) >> 16) & 0xffff)
2686
phb4_adapter_in_allowlist(uint32_t vdid)2687 static bool phb4_adapter_in_allowlist(uint32_t vdid)
2688 {
2689 int i;
2690
2691 if (pci_retry_all)
2692 return true;
2693
2694 for (i = 0; i < ARRAY_SIZE(retry_allowlist); i++)
2695 if ((retry_allowlist[i].vendor == VENDOR(vdid)) &&
2696 (retry_allowlist[i].device == DEVICE(vdid)))
2697 return true;
2698
2699 return false;
2700 }
2701
2702 static struct pci_card_id lane_eq_disable[] = {
2703 { 0x10de, 0x17fd }, /* Nvidia GM200GL [Tesla M40] */
2704 { 0x10de, 0x1db4 }, /* Nvidia GV100 */
2705 };
2706
phb4_lane_eq_retry_allowlist(uint32_t vdid)2707 static bool phb4_lane_eq_retry_allowlist(uint32_t vdid)
2708 {
2709 int i;
2710
2711 for (i = 0; i < ARRAY_SIZE(lane_eq_disable); i++)
2712 if ((lane_eq_disable[i].vendor == VENDOR(vdid)) &&
2713 (lane_eq_disable[i].device == DEVICE(vdid)))
2714 return true;
2715 return false;
2716 }
2717
phb4_lane_eq_change(struct phb4 * p,uint32_t vdid)2718 static void phb4_lane_eq_change(struct phb4 *p, uint32_t vdid)
2719 {
2720 p->lane_eq_en = !phb4_lane_eq_retry_allowlist(vdid);
2721 }
2722
phb4_link_optimal(struct pci_slot * slot,uint32_t * vdid)2723 static bool phb4_link_optimal(struct pci_slot *slot, uint32_t *vdid)
2724 {
2725 struct phb4 *p = phb_to_phb4(slot->phb);
2726 uint64_t reg;
2727 uint32_t id;
2728 uint16_t bdfn, lane_errs;
2729 uint8_t trained_speed, dev_speed, target_speed, rx_errs;
2730 uint8_t trained_width, dev_width, target_width;
2731 bool optimal_speed, optimal_width, optimal, retry_enabled, rx_err_ok;
2732
2733
2734 /* Current trained state */
2735 phb4_get_link_info(slot, &trained_speed, &trained_width);
2736
2737 /* Get device capability */
2738 bdfn = 0x0100; /* bus=1 dev=0 device=0 */
2739 /* Since this is the first access, we need to wait for CRS */
2740 if (!pci_wait_crs(slot->phb, bdfn , &id))
2741 return true;
2742 phb4_get_info(slot->phb, bdfn, &dev_speed, &dev_width);
2743
2744 /* Work out if we are optimally trained */
2745 target_speed = MIN(p->max_link_speed, dev_speed);
2746 optimal_speed = (trained_speed >= target_speed);
2747 target_width = MIN(p->max_link_width, dev_width);
2748 optimal_width = (trained_width >= target_width);
2749 optimal = optimal_width && optimal_speed;
2750 retry_enabled = (phb4_chip_retry_workaround() &&
2751 phb4_adapter_in_allowlist(id)) ||
2752 phb4_lane_eq_retry_allowlist(id);
2753 reg = in_be64(p->regs + PHB_PCIE_DLP_ERR_COUNTERS);
2754 rx_errs = GETFIELD(PHB_PCIE_DLP_RX_ERR_CNT, reg);
2755 rx_err_ok = (rx_errs < rx_err_max);
2756 reg = in_be64(p->regs + PHB_PCIE_DLP_ERR_STATUS);
2757 lane_errs = GETFIELD(PHB_PCIE_DLP_LANE_ERR, reg);
2758
2759 PHBDBG(p, "LINK: Card [%04x:%04x] %s Retry:%s\n", VENDOR(id),
2760 DEVICE(id), optimal ? "Optimal" : "Degraded",
2761 retry_enabled ? "enabled" : "disabled");
2762 PHBDBG(p, "LINK: Speed Train:GEN%i PHB:GEN%i DEV:GEN%i%s\n",
2763 trained_speed, p->max_link_speed, dev_speed,
2764 optimal_speed ? "" : " *");
2765 PHBDBG(p, "LINK: Width Train:x%02i PHB:x%02i DEV:x%02i%s\n",
2766 trained_width, p->max_link_width, dev_width,
2767 optimal_width ? "" : " *");
2768 PHBDBG(p, "LINK: RX Errors Now:%i Max:%i Lane:0x%04x%s\n",
2769 rx_errs, rx_err_max, lane_errs, rx_err_ok ? "" : " *");
2770
2771 if (vdid)
2772 *vdid = id;
2773
2774 /* Always do RX error retry irrespective of chip and card */
2775 if (!rx_err_ok)
2776 return false;
2777
2778 if (!retry_enabled)
2779 return true;
2780
2781 return optimal;
2782 }
2783
2784 /*
2785 * This is a trace function to watch what's happening duing pcie link
2786 * training. If any errors are detected it simply returns so the
2787 * normal code can deal with it.
2788 */
phb4_link_trace(struct phb4 * p,uint64_t target_state,int max_ms)2789 static void phb4_link_trace(struct phb4 *p, uint64_t target_state, int max_ms)
2790 {
2791 unsigned long now, end, start = mftb(), state = 0;
2792 uint64_t trwctl, reg, reglast = -1;
2793 bool enabled;
2794
2795 /*
2796 * Enable the DLP trace outputs. If we don't the LTSSM state in
2797 * PHB_PCIE_DLP_TRAIN_CTL won't be updated and always reads zero.
2798 */
2799 trwctl = phb4_read_reg(p, PHB_PCIE_DLP_TRWCTL);
2800 enabled = !!(trwctl & PHB_PCIE_DLP_TRWCTL_EN);
2801 if (!enabled) {
2802 phb4_write_reg(p, PHB_PCIE_DLP_TRWCTL,
2803 trwctl | PHB_PCIE_DLP_TRWCTL_EN);
2804 }
2805
2806 end = start + msecs_to_tb(max_ms);
2807 now = start;
2808
2809 do {
2810 reg = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
2811 if (reg != reglast)
2812 state = phb4_train_info(p, reg, now - start);
2813 reglast = reg;
2814
2815 if (!phb4_check_reg(p, reg)) {
2816 PHBNOTICE(p, "TRACE: PHB fenced.\n");
2817 goto out;
2818 }
2819
2820 if (tb_compare(now, end) == TB_AAFTERB) {
2821 PHBNOTICE(p, "TRACE: Timed out after %dms\n", max_ms);
2822 goto out;
2823 }
2824
2825 now = mftb();
2826 } while (state != target_state);
2827
2828 PHBNOTICE(p, "TRACE: Reached target state\n");
2829
2830 out:
2831 /*
2832 * The trace enable bit is a clock gate for the tracing logic. Turn
2833 * it off to save power if we're not using it otherwise.
2834 */
2835 if (!enabled)
2836 phb4_write_reg(p, PHB_PCIE_DLP_TRWCTL, trwctl);
2837 }
2838
2839 /*
2840 * This helper is called repeatedly by the host sync notifier mechanism, which
2841 * relies on the kernel to regularly poll the OPAL_SYNC_HOST_REBOOT call as it
2842 * shuts down.
2843 */
phb4_host_sync_reset(void * data)2844 static bool phb4_host_sync_reset(void *data)
2845 {
2846 struct phb4 *p = (struct phb4 *)data;
2847 struct phb *phb = &p->phb;
2848 int64_t rc = 0;
2849
2850 /* Make sure no-one modifies the phb flags while we are active */
2851 phb_lock(phb);
2852
2853 /* Make sure CAPP is attached to the PHB */
2854 if (p->capp)
2855 /* Call phb ops to disable capi */
2856 rc = phb->ops->set_capi_mode(phb, OPAL_PHB_CAPI_MODE_PCIE,
2857 p->capp->attached_pe);
2858 else
2859 rc = OPAL_SUCCESS;
2860
2861 /* Continue kicking state-machine if in middle of a mode transition */
2862 if (rc == OPAL_BUSY)
2863 rc = phb->slot->ops.run_sm(phb->slot);
2864
2865 phb_unlock(phb);
2866
2867 return rc <= OPAL_SUCCESS;
2868 }
2869
2870 /*
2871 * Notification from the pci-core that a pci slot state machine completed.
2872 * We use this callback to mark the CAPP disabled if we were waiting for it.
2873 */
phb4_slot_sm_run_completed(struct pci_slot * slot,uint64_t err)2874 static int64_t phb4_slot_sm_run_completed(struct pci_slot *slot, uint64_t err)
2875 {
2876 struct phb4 *p = phb_to_phb4(slot->phb);
2877
2878 /* Check if we are disabling the capp */
2879 if (p->flags & PHB4_CAPP_DISABLE) {
2880
2881 /* Unset struct capp so that we dont fall into a creset loop */
2882 p->flags &= ~(PHB4_CAPP_DISABLE);
2883 p->capp->phb = NULL;
2884 p->capp->attached_pe = phb4_get_reserved_pe_number(&p->phb);
2885
2886 /* Remove the host sync notifier is we are done.*/
2887 opal_del_host_sync_notifier(phb4_host_sync_reset, p);
2888 if (err) {
2889 /* Force a CEC ipl reboot */
2890 disable_fast_reboot("CAPP: reset failed");
2891 PHBERR(p, "CAPP: Unable to reset. Error=%lld\n", err);
2892 } else {
2893 PHBINF(p, "CAPP: reset complete\n");
2894 }
2895 }
2896
2897 return OPAL_SUCCESS;
2898 }
2899
phb4_poll_link(struct pci_slot * slot)2900 static int64_t phb4_poll_link(struct pci_slot *slot)
2901 {
2902 struct phb4 *p = phb_to_phb4(slot->phb);
2903 uint64_t reg;
2904 uint32_t vdid;
2905
2906 switch (slot->state) {
2907 case PHB4_SLOT_NORMAL:
2908 case PHB4_SLOT_LINK_START:
2909 PHBDBG(p, "LINK: Start polling\n");
2910 slot->retries = PHB4_LINK_ELECTRICAL_RETRIES;
2911 pci_slot_set_state(slot, PHB4_SLOT_LINK_WAIT_ELECTRICAL);
2912 /* Polling early here has no chance of a false positive */
2913 return pci_slot_set_sm_timeout(slot, msecs_to_tb(1));
2914 case PHB4_SLOT_LINK_WAIT_ELECTRICAL:
2915 /*
2916 * Wait for the link electrical connection to be
2917 * established (shorter timeout). This allows us to
2918 * workaround spurrious presence detect on some machines
2919 * without waiting 10s each time
2920 *
2921 * Note: We *also* check for the full link up bit here
2922 * because simics doesn't seem to implement the electrical
2923 * link bit at all
2924 */
2925 reg = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
2926 if (!phb4_check_reg(p, reg)) {
2927 PHBERR(p, "PHB fence waiting for electrical link\n");
2928 return phb4_retry_state(slot);
2929 }
2930
2931 if (reg & (PHB_PCIE_DLP_INBAND_PRESENCE |
2932 PHB_PCIE_DLP_TL_LINKACT)) {
2933 PHBDBG(p, "LINK: Electrical link detected\n");
2934 pci_slot_set_state(slot, PHB4_SLOT_LINK_WAIT);
2935 slot->retries = PHB4_LINK_WAIT_RETRIES;
2936 /* No wait here since already have an elec link */
2937 return pci_slot_set_sm_timeout(slot, msecs_to_tb(1));
2938 }
2939
2940 if (slot->retries-- == 0) {
2941 PHBDBG(p, "LINK: No in-band presence\n");
2942 return OPAL_SUCCESS;
2943 }
2944 /* Retry */
2945 return pci_slot_set_sm_timeout(slot, msecs_to_tb(10));
2946 case PHB4_SLOT_LINK_WAIT:
2947 reg = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
2948 if (!phb4_check_reg(p, reg)) {
2949 PHBERR(p, "LINK: PHB fence waiting for link training\n");
2950 return phb4_retry_state(slot);
2951 }
2952 if (reg & PHB_PCIE_DLP_TL_LINKACT) {
2953 PHBDBG(p, "LINK: Link is up\n");
2954 phb4_prepare_link_change(slot, true);
2955 pci_slot_set_state(slot, PHB4_SLOT_LINK_STABLE);
2956 return pci_slot_set_sm_timeout(slot, secs_to_tb(1));
2957 }
2958
2959 if (slot->retries-- == 0) {
2960 PHBERR(p, "LINK: Timeout waiting for link up\n");
2961 PHBDBG(p, "LINK: DLP train control: 0x%016llx\n", reg);
2962 return phb4_retry_state(slot);
2963 }
2964 /* Retry */
2965 return pci_slot_set_sm_timeout(slot, msecs_to_tb(10));
2966 case PHB4_SLOT_LINK_STABLE:
2967 /* Sanity check link */
2968 if (phb4_fenced(p)) {
2969 PHBERR(p, "LINK: PHB fenced waiting for stabilty\n");
2970 return phb4_retry_state(slot);
2971 }
2972 reg = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
2973 if (!phb4_check_reg(p, reg)) {
2974 PHBERR(p, "LINK: PHB fence reading training control\n");
2975 return phb4_retry_state(slot);
2976 }
2977 if (reg & PHB_PCIE_DLP_TL_LINKACT) {
2978 PHBDBG(p, "LINK: Link is stable\n");
2979 if (!phb4_link_optimal(slot, &vdid)) {
2980 PHBDBG(p, "LINK: Link degraded\n");
2981 if (slot->link_retries) {
2982 phb4_lane_eq_change(p, vdid);
2983 return phb4_retry_state(slot);
2984 }
2985 /*
2986 * Link is degraded but no more retries, so
2987 * settle for what we have :-(
2988 */
2989 PHBERR(p, "LINK: Degraded but no more retries\n");
2990 }
2991 pci_restore_slot_bus_configs(slot);
2992 pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
2993 return OPAL_SUCCESS;
2994 }
2995 PHBERR(p, "LINK: Went down waiting for stabilty\n");
2996 PHBDBG(p, "LINK: DLP train control: 0x%016llx\n", reg);
2997 return phb4_retry_state(slot);
2998 default:
2999 PHBERR(p, "LINK: Unexpected slot state %08x\n",
3000 slot->state);
3001 }
3002
3003 pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
3004 return OPAL_HARDWARE;
3005 }
3006
phb4_get_max_link_speed(struct phb4 * p,struct dt_node * np)3007 static unsigned int phb4_get_max_link_speed(struct phb4 *p, struct dt_node *np)
3008 {
3009 unsigned int max_link_speed, hw_max_link_speed;
3010 struct proc_chip *chip;
3011 chip = get_chip(p->chip_id);
3012
3013 hw_max_link_speed = 4;
3014 if (is_phb5() && (p->index == 0 || p->index == 3))
3015 hw_max_link_speed = 5;
3016
3017 /* Priority order: NVRAM -> dt -> GEN3 dd2.00 -> hw default */
3018 max_link_speed = hw_max_link_speed;
3019 if (p->rev == PHB4_REV_NIMBUS_DD20 &&
3020 ((0xf & chip->ec_level) == 0) && chip->ec_rev == 0)
3021 max_link_speed = 3;
3022 if (np) {
3023 if (dt_has_node_property(np, "ibm,max-link-speed", NULL)) {
3024 max_link_speed = dt_prop_get_u32(np, "ibm,max-link-speed");
3025 p->dt_max_link_speed = max_link_speed;
3026 }
3027 else {
3028 p->dt_max_link_speed = 0;
3029 }
3030 }
3031 else {
3032 if (p->dt_max_link_speed > 0) {
3033 max_link_speed = p->dt_max_link_speed;
3034 }
3035 }
3036 if (pcie_max_link_speed)
3037 max_link_speed = pcie_max_link_speed;
3038 if (max_link_speed > hw_max_link_speed)
3039 max_link_speed = hw_max_link_speed;
3040
3041 return max_link_speed;
3042 }
3043
__phb4_get_max_link_width(struct phb4 * p)3044 static unsigned int __phb4_get_max_link_width(struct phb4 *p)
3045 {
3046 uint64_t addr, reg;
3047 unsigned int lane_config, width = 16;
3048
3049 /*
3050 * On P9, only PEC2 is configurable (no-/bi-/tri-furcation)
3051 */
3052 switch (p->pec) {
3053 case 0:
3054 width = 16;
3055 break;
3056 case 1:
3057 width = 8;
3058 break;
3059 case 2:
3060 addr = XPEC_P9_PCI_CPLT_CONF1 + 2 * XPEC_PCI_CPLT_OFFSET;
3061 xscom_read(p->chip_id, addr, ®);
3062 lane_config = GETFIELD(XPEC_P9_PCI_LANE_CFG, reg);
3063
3064 if (lane_config == 0b10 && p->index >= 4)
3065 width = 4;
3066 else
3067 width = 8;
3068 }
3069 return width;
3070 }
3071
__phb5_get_max_link_width(struct phb4 * p)3072 static unsigned int __phb5_get_max_link_width(struct phb4 *p)
3073 {
3074 uint64_t addr, reg;
3075 unsigned int lane_config, width = 16;
3076
3077 /*
3078 * On P10, the 2 PECs are identical and each can have a
3079 * different furcation, so we always need to check the PEC
3080 * config
3081 */
3082 addr = XPEC_P10_PCI_CPLT_CONF1 + p->pec * XPEC_PCI_CPLT_OFFSET;
3083 xscom_read(p->chip_id, addr, ®);
3084 lane_config = GETFIELD(XPEC_P10_PCI_LANE_CFG, reg);
3085
3086 switch (lane_config) {
3087 case 0b00:
3088 width = 16;
3089 break;
3090 case 0b01:
3091 width = 8;
3092 break;
3093 case 0b10:
3094 if (p->index == 0 || p->index == 3)
3095 width = 8;
3096 else
3097 width = 4;
3098 break;
3099 default:
3100 PHBERR(p, "Unexpected PEC lane config value %#x\n",
3101 lane_config);
3102 }
3103 return width;
3104 }
3105
phb4_get_max_link_width(struct phb4 * p)3106 static unsigned int phb4_get_max_link_width(struct phb4 *p)
3107 {
3108 if (is_phb5())
3109 return __phb5_get_max_link_width(p);
3110 else
3111 return __phb4_get_max_link_width(p);
3112 }
3113
phb4_assert_perst(struct pci_slot * slot,bool assert)3114 static void phb4_assert_perst(struct pci_slot *slot, bool assert)
3115 {
3116 struct phb4 *p = phb_to_phb4(slot->phb);
3117 uint16_t linkctl;
3118 uint64_t reg;
3119
3120 /*
3121 * Disable the link before asserting PERST. The Cursed RAID card
3122 * in ozrom1 (9005:028c) has problems coming back if PERST is asserted
3123 * while link is active. To work around the problem we assert the link
3124 * disable bit before asserting PERST. Asserting the secondary reset
3125 * bit in the btctl register also works.
3126 */
3127 phb4_pcicfg_read16(&p->phb, 0, p->ecap + PCICAP_EXP_LCTL, &linkctl);
3128 reg = phb4_read_reg(p, PHB_PCIE_CRESET);
3129
3130 if (assert) {
3131 linkctl |= PCICAP_EXP_LCTL_LINK_DIS;
3132 reg &= ~PHB_PCIE_CRESET_PERST_N;
3133 } else {
3134 linkctl &= ~PCICAP_EXP_LCTL_LINK_DIS;
3135 reg |= PHB_PCIE_CRESET_PERST_N;
3136 }
3137
3138 phb4_write_reg(p, PHB_PCIE_CRESET, reg);
3139 phb4_pcicfg_write16(&p->phb, 0, p->ecap + PCICAP_EXP_LCTL, linkctl);
3140 }
3141
set_sys_disable_detect(struct phb4 * p,bool set)3142 static void set_sys_disable_detect(struct phb4 *p, bool set)
3143 {
3144 uint64_t val;
3145
3146 val = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
3147 if (set)
3148 val |= PHB_PCIE_DLP_SYS_DISABLEDETECT;
3149 else
3150 val &= ~PHB_PCIE_DLP_SYS_DISABLEDETECT;
3151 out_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL, val);
3152 }
3153
phb4_hreset(struct pci_slot * slot)3154 static int64_t phb4_hreset(struct pci_slot *slot)
3155 {
3156 struct phb4 *p = phb_to_phb4(slot->phb);
3157 uint16_t brctl;
3158 uint8_t presence = 1;
3159
3160 switch (slot->state) {
3161 case PHB4_SLOT_NORMAL:
3162 PHBDBG(p, "HRESET: Starts\n");
3163 if (slot->ops.get_presence_state)
3164 slot->ops.get_presence_state(slot, &presence);
3165 if (!presence) {
3166 PHBDBG(p, "HRESET: No device\n");
3167 return OPAL_SUCCESS;
3168 }
3169
3170 /* circumvention for HW551382 */
3171 if (is_phb5()) {
3172 PHBINF(p, "HRESET: Workaround for HW551382\n");
3173 set_sys_disable_detect(p, true);
3174 }
3175
3176 PHBDBG(p, "HRESET: Prepare for link down\n");
3177 phb4_prepare_link_change(slot, false);
3178 /* fall through */
3179 case PHB4_SLOT_HRESET_START:
3180 PHBDBG(p, "HRESET: Assert\n");
3181
3182 phb4_pcicfg_read16(&p->phb, 0, PCI_CFG_BRCTL, &brctl);
3183 brctl |= PCI_CFG_BRCTL_SECONDARY_RESET;
3184 phb4_pcicfg_write16(&p->phb, 0, PCI_CFG_BRCTL, brctl);
3185 pci_slot_set_state(slot, PHB4_SLOT_HRESET_DELAY);
3186
3187 return pci_slot_set_sm_timeout(slot, secs_to_tb(1));
3188 case PHB4_SLOT_HRESET_DELAY:
3189 PHBDBG(p, "HRESET: Deassert\n");
3190
3191 /* Clear link errors before we deassert reset */
3192 phb4_err_clear_regb(p);
3193
3194 phb4_pcicfg_read16(&p->phb, 0, PCI_CFG_BRCTL, &brctl);
3195 brctl &= ~PCI_CFG_BRCTL_SECONDARY_RESET;
3196 phb4_pcicfg_write16(&p->phb, 0, PCI_CFG_BRCTL, brctl);
3197
3198 /*
3199 * Due to some oddball adapters bouncing the link
3200 * training a couple of times, we wait for a full second
3201 * before we start checking the link status, otherwise
3202 * we can get a spurrious link down interrupt which
3203 * causes us to EEH immediately.
3204 */
3205 pci_slot_set_state(slot, PHB4_SLOT_HRESET_DELAY2);
3206 return pci_slot_set_sm_timeout(slot, secs_to_tb(1));
3207 case PHB4_SLOT_HRESET_DELAY2:
3208 if (is_phb5())
3209 set_sys_disable_detect(p, false);
3210 pci_slot_set_state(slot, PHB4_SLOT_LINK_START);
3211 return slot->ops.poll_link(slot);
3212 default:
3213 PHBERR(p, "Unexpected slot state %08x\n", slot->state);
3214 }
3215
3216 pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
3217 return OPAL_HARDWARE;
3218 }
3219
phb4_freset(struct pci_slot * slot)3220 static int64_t phb4_freset(struct pci_slot *slot)
3221 {
3222 struct phb4 *p = phb_to_phb4(slot->phb);
3223
3224 switch(slot->state) {
3225 case PHB4_SLOT_NORMAL:
3226 case PHB4_SLOT_FRESET_START:
3227 PHBDBG(p, "FRESET: Starts\n");
3228
3229 /* Reset max link speed for training */
3230 p->max_link_speed = phb4_get_max_link_speed(p, NULL);
3231
3232 PHBDBG(p, "FRESET: Prepare for link down\n");
3233 phb4_prepare_link_change(slot, false);
3234
3235 if (!p->skip_perst) {
3236 /* circumvention for HW551382 */
3237 if (is_phb5()) {
3238 PHBINF(p, "FRESET: Workaround for HW551382\n");
3239 set_sys_disable_detect(p, true);
3240 }
3241
3242 PHBDBG(p, "FRESET: Assert\n");
3243 phb4_assert_perst(slot, true);
3244 pci_slot_set_state(slot, PHB4_SLOT_FRESET_ASSERT_DELAY);
3245
3246 /* 250ms assert time aligns with powernv */
3247 return pci_slot_set_sm_timeout(slot, msecs_to_tb(250));
3248 }
3249
3250 /* To skip the assert during boot time */
3251 PHBDBG(p, "FRESET: Assert skipped\n");
3252 pci_slot_set_state(slot, PHB4_SLOT_FRESET_ASSERT_DELAY);
3253 p->skip_perst = false;
3254 /* fall through */
3255 case PHB4_SLOT_FRESET_ASSERT_DELAY:
3256 /* Clear link errors before we deassert PERST */
3257 phb4_err_clear_regb(p);
3258
3259 PHBDBG(p, "FRESET: Deassert\n");
3260 phb4_assert_perst(slot, false);
3261
3262 if (pci_tracing)
3263 phb4_link_trace(p, PHB_PCIE_DLP_LTSSM_L0, 3000);
3264
3265 if (is_phb5())
3266 set_sys_disable_detect(p, false);
3267
3268 pci_slot_set_state(slot, PHB4_SLOT_LINK_START);
3269 return slot->ops.poll_link(slot);
3270 default:
3271 PHBERR(p, "Unexpected slot state %08x\n", slot->state);
3272 }
3273
3274 pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
3275 return OPAL_HARDWARE;
3276 }
3277
load_capp_ucode(struct phb4 * p)3278 static int64_t load_capp_ucode(struct phb4 *p)
3279 {
3280 int64_t rc;
3281
3282 if (p->index != CAPP0_PHB_INDEX && p->index != CAPP1_PHB_INDEX)
3283 return OPAL_HARDWARE;
3284
3285 /* 0x434150504c494448 = 'CAPPLIDH' in ASCII */
3286 rc = capp_load_ucode(p->chip_id, p->phb.opal_id, p->index,
3287 0x434150504c494448UL, PHB4_CAPP_REG_OFFSET(p),
3288 CAPP_APC_MASTER_ARRAY_ADDR_REG,
3289 CAPP_APC_MASTER_ARRAY_WRITE_REG,
3290 CAPP_SNP_ARRAY_ADDR_REG,
3291 CAPP_SNP_ARRAY_WRITE_REG);
3292 return rc;
3293 }
3294
do_capp_recovery_scoms(struct phb4 * p)3295 static int do_capp_recovery_scoms(struct phb4 *p)
3296 {
3297 uint64_t rc, reg, end;
3298 uint64_t offset = PHB4_CAPP_REG_OFFSET(p);
3299
3300
3301 /* Get the status of CAPP recovery */
3302 xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, ®);
3303
3304 /* No recovery in progress ignore */
3305 if ((reg & PPC_BIT(0)) == 0) {
3306 PHBDBG(p, "CAPP: No recovery in progress\n");
3307 return OPAL_SUCCESS;
3308 }
3309
3310 PHBDBG(p, "CAPP: Waiting for recovery to complete\n");
3311 /* recovery timer failure period 168ms */
3312 end = mftb() + msecs_to_tb(168);
3313 while ((reg & (PPC_BIT(1) | PPC_BIT(5) | PPC_BIT(9))) == 0) {
3314
3315 time_wait_ms(5);
3316 xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, ®);
3317
3318 if (tb_compare(mftb(), end) != TB_ABEFOREB) {
3319 PHBERR(p, "CAPP: Capp recovery Timed-out.\n");
3320 end = 0;
3321 break;
3322 }
3323 }
3324
3325 /* Check if the recovery failed or passed */
3326 if (reg & PPC_BIT(1)) {
3327 uint64_t act0, act1, mask, fir;
3328
3329 /* Use the Action0/1 and mask to only clear the bits
3330 * that cause local checkstop. Other bits needs attention
3331 * of the PRD daemon.
3332 */
3333 xscom_read(p->chip_id, CAPP_FIR_ACTION0 + offset, &act0);
3334 xscom_read(p->chip_id, CAPP_FIR_ACTION1 + offset, &act1);
3335 xscom_read(p->chip_id, CAPP_FIR_MASK + offset, &mask);
3336 xscom_read(p->chip_id, CAPP_FIR + offset, &fir);
3337
3338 fir = ~(fir & ~mask & act0 & act1);
3339 PHBDBG(p, "Doing CAPP recovery scoms\n");
3340
3341 /* update capp fir clearing bits causing local checkstop */
3342 PHBDBG(p, "Resetting CAPP Fir with mask 0x%016llX\n", fir);
3343 xscom_write(p->chip_id, CAPP_FIR_CLEAR + offset, fir);
3344
3345 /* disable snoops */
3346 xscom_write(p->chip_id, SNOOP_CAPI_CONFIG + offset, 0);
3347 load_capp_ucode(p);
3348
3349 /* clear err rpt reg*/
3350 xscom_write(p->chip_id, CAPP_ERR_RPT_CLR + offset, 0);
3351
3352 /* clear capp fir */
3353 xscom_write(p->chip_id, CAPP_FIR + offset, 0);
3354
3355 /* Just reset Bit-0,1 and dont touch any other bit */
3356 xscom_read(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, ®);
3357 reg &= ~(PPC_BIT(0) | PPC_BIT(1));
3358 xscom_write(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, reg);
3359
3360 PHBDBG(p, "CAPP recovery complete\n");
3361 rc = OPAL_SUCCESS;
3362
3363 } else {
3364 /* Most likely will checkstop here due to FIR ACTION for
3365 * failed recovery. So this message would never be logged.
3366 * But if we still enter here then return an error forcing a
3367 * fence of the PHB.
3368 */
3369 if (reg & PPC_BIT(5))
3370 PHBERR(p, "CAPP: Capp recovery Failed\n");
3371 else if (reg & PPC_BIT(9))
3372 PHBERR(p, "CAPP: Capp recovery hang detected\n");
3373 else if (end != 0)
3374 PHBERR(p, "CAPP: Unknown recovery failure\n");
3375
3376 PHBDBG(p, "CAPP: Err/Status-reg=0x%016llx\n", reg);
3377 rc = OPAL_HARDWARE;
3378 }
3379
3380 return rc;
3381 }
3382
3383 /*
3384 * Disable CAPI mode on a PHB. Must be done while PHB is fenced and
3385 * not in recovery.
3386 */
disable_capi_mode(struct phb4 * p)3387 static void disable_capi_mode(struct phb4 *p)
3388 {
3389 uint64_t reg;
3390 struct capp *capp = p->capp;
3391
3392 PHBINF(p, "CAPP: Deactivating\n");
3393
3394 /* Check if CAPP attached to the PHB and active */
3395 if (!capp || capp->phb != &p->phb) {
3396 PHBDBG(p, "CAPP: Not attached to this PHB!\n");
3397 return;
3398 }
3399
3400 xscom_read(p->chip_id, p->pe_xscom + XPEC_NEST_CAPP_CNTL, ®);
3401 if (!(reg & PPC_BIT(0))) {
3402 /* Not in CAPI mode, no action required */
3403 PHBERR(p, "CAPP: Not enabled!\n");
3404 return;
3405 }
3406
3407 /* CAPP should already be out of recovery in this function */
3408 capp_xscom_read(capp, CAPP_ERR_STATUS_CTRL, ®);
3409 if (reg & PPC_BIT(0)) {
3410 PHBERR(p, "CAPP: Can't disable while still in recovery!\n");
3411 return;
3412 }
3413
3414 PHBINF(p, "CAPP: Disabling CAPI mode\n");
3415
3416 /* First Phase Reset CAPP Registers */
3417 /* CAPP about to be disabled mark TLBI_FENCED and tlbi_psl_is_dead */
3418 capp_xscom_write(capp, CAPP_ERR_STATUS_CTRL, PPC_BIT(3) | PPC_BIT(4));
3419
3420 /* Flush SUE uOP1 Register */
3421 if (p->rev != PHB4_REV_NIMBUS_DD10)
3422 capp_xscom_write(capp, FLUSH_SUE_UOP1, 0);
3423
3424 /* Release DMA/STQ engines */
3425 capp_xscom_write(capp, APC_FSM_READ_MASK, 0ull);
3426 capp_xscom_write(capp, XPT_FSM_RMM, 0ull);
3427
3428 /* Disable snoop */
3429 capp_xscom_write(capp, SNOOP_CAPI_CONFIG, 0);
3430
3431 /* Clear flush SUE state map register */
3432 capp_xscom_write(capp, FLUSH_SUE_STATE_MAP, 0);
3433
3434 /* Disable epoch timer */
3435 capp_xscom_write(capp, EPOCH_RECOVERY_TIMERS_CTRL, 0);
3436
3437 /* CAPP Transport Control Register */
3438 capp_xscom_write(capp, TRANSPORT_CONTROL, PPC_BIT(15));
3439
3440 /* Disable snooping */
3441 capp_xscom_write(capp, SNOOP_CONTROL, 0);
3442 capp_xscom_write(capp, SNOOP_CAPI_CONFIG, 0);
3443
3444 /* APC Master PB Control Register - disable examining cResps */
3445 capp_xscom_write(capp, APC_MASTER_PB_CTRL, 0);
3446
3447 /* APC Master Config Register - de-select PHBs */
3448 xscom_write_mask(p->chip_id, capp->capp_xscom_offset +
3449 APC_MASTER_CAPI_CTRL, 0, PPC_BITMASK(2, 3));
3450
3451 /* Clear all error registers */
3452 capp_xscom_write(capp, CAPP_ERR_RPT_CLR, 0);
3453 capp_xscom_write(capp, CAPP_FIR, 0);
3454 capp_xscom_write(capp, CAPP_FIR_ACTION0, 0);
3455 capp_xscom_write(capp, CAPP_FIR_ACTION1, 0);
3456 capp_xscom_write(capp, CAPP_FIR_MASK, 0);
3457
3458 /* Second Phase Reset PEC/PHB Registers */
3459
3460 /* Reset the stack overrides if any */
3461 xscom_write(p->chip_id, p->pci_xscom + XPEC_PCI_PRDSTKOVR, 0);
3462 xscom_write(p->chip_id, p->pe_xscom +
3463 XPEC_NEST_READ_STACK_OVERRIDE, 0);
3464
3465 /* PE Bus AIB Mode Bits. Disable Tracing. Leave HOL Blocking as it is */
3466 if (!(p->rev == PHB4_REV_NIMBUS_DD10) && p->index == CAPP1_PHB_INDEX)
3467 xscom_write_mask(p->chip_id,
3468 p->pci_xscom + XPEC_PCI_PBAIB_HW_CONFIG, 0,
3469 PPC_BIT(30));
3470
3471 /* Reset for PCI to PB data movement */
3472 xscom_write_mask(p->chip_id, p->pe_xscom + XPEC_NEST_PBCQ_HW_CONFIG,
3473 0, XPEC_NEST_PBCQ_HW_CONFIG_PBINIT);
3474
3475 /* Disable CAPP mode in PEC CAPP Control Register */
3476 xscom_write(p->chip_id, p->pe_xscom + XPEC_NEST_CAPP_CNTL, 0ull);
3477 }
3478
phb4_creset(struct pci_slot * slot)3479 static int64_t phb4_creset(struct pci_slot *slot)
3480 {
3481 struct phb4 *p = phb_to_phb4(slot->phb);
3482 struct capp *capp = p->capp;
3483 uint64_t pbcq_status;
3484 uint64_t creset_time, wait_time;
3485
3486 /* Don't even try fixing a broken PHB */
3487 if (p->broken)
3488 return OPAL_HARDWARE;
3489
3490 switch (slot->state) {
3491 case PHB4_SLOT_NORMAL:
3492 case PHB4_SLOT_CRESET_START:
3493 PHBDBG(p, "CRESET: Starts\n");
3494
3495 p->creset_start_time = mftb();
3496
3497 /* circumvention for HW551382 */
3498 if (is_phb5()) {
3499 PHBINF(p, "CRESET: Workaround for HW551382\n");
3500 set_sys_disable_detect(p, true);
3501 }
3502
3503 phb4_prepare_link_change(slot, false);
3504 /* Clear error inject register, preventing recursive errors */
3505 xscom_write(p->chip_id, p->pe_xscom + 0x2, 0x0);
3506
3507 /* Prevent HMI when PHB gets fenced as we are disabling CAPP */
3508 if (p->flags & PHB4_CAPP_DISABLE &&
3509 capp && capp->phb == slot->phb) {
3510 /* Since no HMI, So set the recovery flag manually. */
3511 p->flags |= PHB4_CAPP_RECOVERY;
3512 xscom_write_mask(p->chip_id, capp->capp_xscom_offset +
3513 CAPP_FIR_MASK,
3514 PPC_BIT(31), PPC_BIT(31));
3515 }
3516
3517 /* Force fence on the PHB to work around a non-existent PE */
3518 if (!phb4_fenced(p))
3519 xscom_write(p->chip_id, p->pe_stk_xscom + 0x2,
3520 0x0000002000000000UL);
3521
3522 /*
3523 * Force use of ASB for register access until the PHB has
3524 * been fully reset.
3525 */
3526 p->flags |= PHB4_CFG_USE_ASB | PHB4_AIB_FENCED;
3527
3528 /* Assert PREST before clearing errors */
3529 phb4_assert_perst(slot, true);
3530
3531 /* Clear errors, following the proper sequence */
3532 phb4_err_clear(p);
3533
3534 /* Actual reset */
3535 p->flags |= PHB4_ETU_IN_RESET;
3536 xscom_write(p->chip_id, p->pci_stk_xscom + XPEC_PCI_STK_ETU_RESET,
3537 0x8000000000000000UL);
3538
3539 /* Read errors in PFIR and NFIR */
3540 xscom_read(p->chip_id, p->pci_stk_xscom + 0x0, &p->pfir_cache);
3541 xscom_read(p->chip_id, p->pe_stk_xscom + 0x0, &p->nfir_cache);
3542
3543 pci_slot_set_state(slot, PHB4_SLOT_CRESET_WAIT_CQ);
3544 slot->retries = 500;
3545 return pci_slot_set_sm_timeout(slot, msecs_to_tb(10));
3546 case PHB4_SLOT_CRESET_WAIT_CQ:
3547
3548 // Wait until operations are complete
3549 xscom_read(p->chip_id, p->pe_stk_xscom + 0xc, &pbcq_status);
3550 if (!(pbcq_status & 0xC000000000000000UL)) {
3551 PHBDBG(p, "CRESET: No pending transactions\n");
3552
3553 /* capp recovery */
3554 if ((p->flags & PHB4_CAPP_RECOVERY) &&
3555 (do_capp_recovery_scoms(p) != OPAL_SUCCESS))
3556 goto error;
3557
3558 if (p->flags & PHB4_CAPP_DISABLE)
3559 disable_capi_mode(p);
3560
3561 /* Clear errors in PFIR and NFIR */
3562 xscom_write(p->chip_id, p->pci_stk_xscom + 0x1,
3563 ~p->pfir_cache);
3564 xscom_write(p->chip_id, p->pe_stk_xscom + 0x1,
3565 ~p->nfir_cache);
3566
3567 /* Re-read errors in PFIR and NFIR and reset any new
3568 * error reported.
3569 */
3570 xscom_read(p->chip_id, p->pci_stk_xscom +
3571 XPEC_PCI_STK_PCI_FIR, &p->pfir_cache);
3572 xscom_read(p->chip_id, p->pe_stk_xscom +
3573 XPEC_NEST_STK_PCI_NFIR, &p->nfir_cache);
3574
3575 if (p->pfir_cache || p->nfir_cache) {
3576 PHBERR(p, "CRESET: PHB still fenced !!\n");
3577 phb4_dump_pec_err_regs(p);
3578
3579 /* Reset the PHB errors */
3580 xscom_write(p->chip_id, p->pci_stk_xscom +
3581 XPEC_PCI_STK_PCI_FIR, 0);
3582 xscom_write(p->chip_id, p->pe_stk_xscom +
3583 XPEC_NEST_STK_PCI_NFIR, 0);
3584 }
3585
3586 /* Clear PHB from reset */
3587 xscom_write(p->chip_id,
3588 p->pci_stk_xscom + XPEC_PCI_STK_ETU_RESET, 0x0);
3589 p->flags &= ~PHB4_ETU_IN_RESET;
3590
3591 pci_slot_set_state(slot, PHB4_SLOT_CRESET_REINIT);
3592 /* After lifting PHB reset, wait while logic settles */
3593 return pci_slot_set_sm_timeout(slot, msecs_to_tb(10));
3594 }
3595
3596 if (slot->retries-- == 0) {
3597 PHBERR(p, "Timeout waiting for pending transaction\n");
3598 goto error;
3599 }
3600 return pci_slot_set_sm_timeout(slot, msecs_to_tb(100));
3601 case PHB4_SLOT_CRESET_REINIT:
3602 PHBDBG(p, "CRESET: Reinitialization\n");
3603 p->flags &= ~PHB4_AIB_FENCED;
3604 p->flags &= ~PHB4_CAPP_RECOVERY;
3605 p->flags &= ~PHB4_CFG_USE_ASB;
3606 phb4_init_hw(p);
3607 pci_slot_set_state(slot, PHB4_SLOT_CRESET_FRESET);
3608
3609 /*
3610 * The PERST is sticky across resets, but LINK_DIS isn't.
3611 * Re-assert it here now that we've reset the PHB.
3612 */
3613 phb4_assert_perst(slot, true);
3614
3615 /*
3616 * wait either 100ms (for the ETU logic) or until we've had
3617 * PERST asserted for 250ms.
3618 */
3619 creset_time = tb_to_msecs(mftb() - p->creset_start_time);
3620 if (creset_time < 250)
3621 wait_time = MAX(100, 250 - creset_time);
3622 else
3623 wait_time = 100;
3624 PHBDBG(p, "CRESET: wait_time = %lld\n", wait_time);
3625 return pci_slot_set_sm_timeout(slot, msecs_to_tb(wait_time));
3626
3627 case PHB4_SLOT_CRESET_FRESET:
3628 /*
3629 * We asserted PERST at the beginning of the CRESET and we
3630 * have waited long enough, so we can skip it in the freset
3631 * procedure.
3632 */
3633 p->skip_perst = true;
3634 pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
3635 return slot->ops.freset(slot);
3636 default:
3637 PHBERR(p, "CRESET: Unexpected slot state %08x, resetting...\n",
3638 slot->state);
3639 pci_slot_set_state(slot, PHB4_SLOT_NORMAL);
3640 return slot->ops.creset(slot);
3641
3642 }
3643
3644 error:
3645 /* Mark the PHB as dead and expect it to be removed */
3646 p->broken = true;
3647 return OPAL_HARDWARE;
3648 }
3649
3650 /*
3651 * Initialize root complex slot, which is mainly used to
3652 * do fundamental reset before PCI enumeration in PCI core.
3653 * When probing root complex and building its real slot,
3654 * the operations will be copied over.
3655 */
phb4_slot_create(struct phb * phb)3656 static struct pci_slot *phb4_slot_create(struct phb *phb)
3657 {
3658 struct pci_slot *slot;
3659
3660 slot = pci_slot_alloc(phb, NULL);
3661 if (!slot)
3662 return slot;
3663
3664 /* Elementary functions */
3665 slot->ops.get_presence_state = phb4_get_presence_state;
3666 slot->ops.get_link_state = phb4_get_link_state;
3667 slot->ops.get_power_state = NULL;
3668 slot->ops.get_attention_state = NULL;
3669 slot->ops.get_latch_state = NULL;
3670 slot->ops.set_power_state = NULL;
3671 slot->ops.set_attention_state = NULL;
3672
3673 /*
3674 * For PHB slots, we have to split the fundamental reset
3675 * into 2 steps. We might not have the first step which
3676 * is to power off/on the slot, or it's controlled by
3677 * individual platforms.
3678 */
3679 slot->ops.prepare_link_change = phb4_prepare_link_change;
3680 slot->ops.poll_link = phb4_poll_link;
3681 slot->ops.hreset = phb4_hreset;
3682 slot->ops.freset = phb4_freset;
3683 slot->ops.creset = phb4_creset;
3684 slot->ops.completed_sm_run = phb4_slot_sm_run_completed;
3685 slot->link_retries = PHB4_LINK_LINK_RETRIES;
3686
3687 return slot;
3688 }
3689
phb4_int_unmask_all(struct phb4 * p)3690 static void phb4_int_unmask_all(struct phb4 *p)
3691 {
3692 /* Init_126..130 - Re-enable error interrupts */
3693 out_be64(p->regs + PHB_ERR_IRQ_ENABLE, 0xca8880cc00000000ull);
3694
3695 if (is_phb5())
3696 out_be64(p->regs + PHB_TXE_ERR_IRQ_ENABLE, 0x200850be08200020ull);
3697 else
3698 out_be64(p->regs + PHB_TXE_ERR_IRQ_ENABLE, 0x2008400e08200000ull);
3699 out_be64(p->regs + PHB_RXE_ARB_ERR_IRQ_ENABLE, 0xc40038fc01804070ull);
3700 out_be64(p->regs + PHB_RXE_MRG_ERR_IRQ_ENABLE, 0x00006100008000a8ull);
3701 out_be64(p->regs + PHB_RXE_TCE_ERR_IRQ_ENABLE, 0x60510050c0000000ull);
3702 }
3703
3704 /*
3705 * Mask the IRQ for any currently set error bits. This prevents the PHB's ERR
3706 * and INF interrupts from being re-fired before the kernel can handle the
3707 * underlying condition.
3708 */
phb4_int_mask_active(struct phb4 * p)3709 static void phb4_int_mask_active(struct phb4 *p)
3710 {
3711 const uint64_t error_regs[] = {
3712 PHB_ERR_STATUS,
3713 PHB_TXE_ERR_STATUS,
3714 PHB_RXE_ARB_ERR_STATUS,
3715 PHB_RXE_MRG_ERR_STATUS,
3716 PHB_RXE_TCE_ERR_STATUS
3717 };
3718 int i;
3719
3720 for (i = 0; i < ARRAY_SIZE(error_regs); i++) {
3721 uint64_t stat, mask;
3722
3723 /* The IRQ mask reg is always offset 0x20 from the status reg */
3724 stat = phb4_read_reg(p, error_regs[i]);
3725 mask = phb4_read_reg(p, error_regs[i] + 0x20);
3726
3727 phb4_write_reg(p, error_regs[i] + 0x20, mask & ~stat);
3728 }
3729 }
3730
phb4_get_pesta(struct phb4 * p,uint64_t pe_number)3731 static uint64_t phb4_get_pesta(struct phb4 *p, uint64_t pe_number)
3732 {
3733 uint64_t pesta;
3734 __be64 *pPEST;
3735
3736 pPEST = (__be64 *)p->tbl_pest;
3737
3738 phb4_ioda_sel(p, IODA3_TBL_PESTA, pe_number, false);
3739 pesta = phb4_read_reg(p, PHB_IODA_DATA0);
3740 if (pesta & IODA3_PESTA_MMIO_FROZEN)
3741 pesta |= be64_to_cpu(pPEST[2*pe_number]);
3742
3743 return pesta;
3744 }
3745
3746 /* Check if the chip requires escalating a freeze to fence on MMIO loads */
phb4_escalation_required(void)3747 static bool phb4_escalation_required(void)
3748 {
3749 uint64_t pvr = mfspr(SPR_PVR);
3750
3751 /* Only on Power9 */
3752 if (proc_gen != proc_gen_p9)
3753 return false;
3754
3755 /*
3756 * Escalation is required on the following chip versions:
3757 * - Cumulus DD1.0
3758 * - Nimbus DD2.0, DD2.1 (and DD1.0, but it is unsupported so no check).
3759 */
3760 if (pvr & PVR_POWER9_CUMULUS) {
3761 if (PVR_VERS_MAJ(pvr) == 1 && PVR_VERS_MIN(pvr) == 0)
3762 return true;
3763 } else { /* Nimbus */
3764 if (PVR_VERS_MAJ(pvr) == 2 && PVR_VERS_MIN(pvr) < 2)
3765 return true;
3766 }
3767
3768 return false;
3769 }
3770
phb4_freeze_escalate(uint64_t pesta)3771 static bool phb4_freeze_escalate(uint64_t pesta)
3772 {
3773 if ((GETFIELD(IODA3_PESTA_TRANS_TYPE, pesta) ==
3774 IODA3_PESTA_TRANS_TYPE_MMIOLOAD) &&
3775 (pesta & (IODA3_PESTA_CA_CMPLT_TMT | IODA3_PESTA_UR)))
3776 return true;
3777 return false;
3778 }
3779
phb4_eeh_freeze_status(struct phb * phb,uint64_t pe_number,uint8_t * freeze_state,uint16_t * pci_error_type,uint16_t * severity)3780 static int64_t phb4_eeh_freeze_status(struct phb *phb, uint64_t pe_number,
3781 uint8_t *freeze_state,
3782 uint16_t *pci_error_type,
3783 uint16_t *severity)
3784 {
3785 struct phb4 *p = phb_to_phb4(phb);
3786 uint64_t peev_bit = PPC_BIT(pe_number & 0x3f);
3787 uint64_t peev, pesta, pestb;
3788
3789 /* Defaults: not frozen */
3790 *freeze_state = OPAL_EEH_STOPPED_NOT_FROZEN;
3791 *pci_error_type = OPAL_EEH_NO_ERROR;
3792
3793 /* Check dead */
3794 if (p->broken) {
3795 *freeze_state = OPAL_EEH_STOPPED_MMIO_DMA_FREEZE;
3796 *pci_error_type = OPAL_EEH_PHB_ERROR;
3797 if (severity)
3798 *severity = OPAL_EEH_SEV_PHB_DEAD;
3799 return OPAL_HARDWARE;
3800 }
3801
3802 /* Check fence and CAPP recovery */
3803 if (phb4_fenced(p) || (p->flags & PHB4_CAPP_RECOVERY)) {
3804 *freeze_state = OPAL_EEH_STOPPED_MMIO_DMA_FREEZE;
3805 *pci_error_type = OPAL_EEH_PHB_ERROR;
3806 if (severity)
3807 *severity = OPAL_EEH_SEV_PHB_FENCED;
3808 return OPAL_SUCCESS;
3809 }
3810
3811 /* Check the PEEV */
3812 phb4_ioda_sel(p, IODA3_TBL_PEEV, pe_number / 64, false);
3813 peev = in_be64(p->regs + PHB_IODA_DATA0);
3814 if (!(peev & peev_bit))
3815 return OPAL_SUCCESS;
3816
3817 /* Indicate that we have an ER pending */
3818 phb4_set_err_pending(p, true);
3819 if (severity)
3820 *severity = OPAL_EEH_SEV_PE_ER;
3821
3822 /* Read the full PESTA */
3823 pesta = phb4_get_pesta(p, pe_number);
3824 /* Check if we need to escalate to fence */
3825 if (phb4_escalation_required() && phb4_freeze_escalate(pesta)) {
3826 PHBERR(p, "Escalating freeze to fence PESTA[%lli]=%016llx\n",
3827 pe_number, pesta);
3828 *severity = OPAL_EEH_SEV_PHB_FENCED;
3829 *pci_error_type = OPAL_EEH_PHB_ERROR;
3830 }
3831
3832 /* Read the PESTB in the PHB */
3833 phb4_ioda_sel(p, IODA3_TBL_PESTB, pe_number, false);
3834 pestb = phb4_read_reg(p, PHB_IODA_DATA0);
3835
3836 /* Convert PESTA/B to freeze_state */
3837 if (pesta & IODA3_PESTA_MMIO_FROZEN)
3838 *freeze_state |= OPAL_EEH_STOPPED_MMIO_FREEZE;
3839 if (pestb & IODA3_PESTB_DMA_STOPPED)
3840 *freeze_state |= OPAL_EEH_STOPPED_DMA_FREEZE;
3841
3842 return OPAL_SUCCESS;
3843 }
3844
phb4_eeh_freeze_clear(struct phb * phb,uint64_t pe_number,uint64_t eeh_action_token)3845 static int64_t phb4_eeh_freeze_clear(struct phb *phb, uint64_t pe_number,
3846 uint64_t eeh_action_token)
3847 {
3848 struct phb4 *p = phb_to_phb4(phb);
3849 uint64_t err, peev;
3850 int32_t i;
3851 bool frozen_pe = false;
3852
3853 if (p->broken)
3854 return OPAL_HARDWARE;
3855
3856 /* Summary. If nothing, move to clearing the PESTs which can
3857 * contain a freeze state from a previous error or simply set
3858 * explicitely by the user
3859 */
3860 err = in_be64(p->regs + PHB_ETU_ERR_SUMMARY);
3861 if (err == 0xffffffffffffffffUL) {
3862 if (phb4_fenced(p)) {
3863 PHBERR(p, "eeh_freeze_clear on fenced PHB\n");
3864 return OPAL_HARDWARE;
3865 }
3866 }
3867 if (err != 0)
3868 phb4_err_clear(p);
3869
3870 /*
3871 * We have PEEV in system memory. It would give more performance
3872 * to access that directly.
3873 */
3874 if (eeh_action_token & OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO) {
3875 phb4_ioda_sel(p, IODA3_TBL_PESTA, pe_number, false);
3876 out_be64(p->regs + PHB_IODA_DATA0, 0);
3877 }
3878 if (eeh_action_token & OPAL_EEH_ACTION_CLEAR_FREEZE_DMA) {
3879 phb4_ioda_sel(p, IODA3_TBL_PESTB, pe_number, false);
3880 out_be64(p->regs + PHB_IODA_DATA0, 0);
3881 }
3882
3883
3884 /* Update ER pending indication */
3885 phb4_ioda_sel(p, IODA3_TBL_PEEV, 0, true);
3886 for (i = 0; i < p->num_pes/64; i++) {
3887 peev = in_be64(p->regs + PHB_IODA_DATA0);
3888 if (peev) {
3889 frozen_pe = true;
3890 break;
3891 }
3892 }
3893 if (frozen_pe) {
3894 p->err.err_src = PHB4_ERR_SRC_PHB;
3895 p->err.err_class = PHB4_ERR_CLASS_ER;
3896 p->err.err_bit = -1;
3897 phb4_set_err_pending(p, true);
3898 } else
3899 phb4_set_err_pending(p, false);
3900
3901 return OPAL_SUCCESS;
3902 }
3903
phb4_eeh_freeze_set(struct phb * phb,uint64_t pe_number,uint64_t eeh_action_token)3904 static int64_t phb4_eeh_freeze_set(struct phb *phb, uint64_t pe_number,
3905 uint64_t eeh_action_token)
3906 {
3907 struct phb4 *p = phb_to_phb4(phb);
3908 uint64_t data;
3909
3910 if (p->broken)
3911 return OPAL_HARDWARE;
3912
3913 if (pe_number >= p->num_pes)
3914 return OPAL_PARAMETER;
3915
3916 if (eeh_action_token != OPAL_EEH_ACTION_SET_FREEZE_MMIO &&
3917 eeh_action_token != OPAL_EEH_ACTION_SET_FREEZE_DMA &&
3918 eeh_action_token != OPAL_EEH_ACTION_SET_FREEZE_ALL)
3919 return OPAL_PARAMETER;
3920
3921 if (eeh_action_token & OPAL_EEH_ACTION_SET_FREEZE_MMIO) {
3922 phb4_ioda_sel(p, IODA3_TBL_PESTA, pe_number, false);
3923 data = in_be64(p->regs + PHB_IODA_DATA0);
3924 data |= IODA3_PESTA_MMIO_FROZEN;
3925 out_be64(p->regs + PHB_IODA_DATA0, data);
3926 }
3927
3928 if (eeh_action_token & OPAL_EEH_ACTION_SET_FREEZE_DMA) {
3929 phb4_ioda_sel(p, IODA3_TBL_PESTB, pe_number, false);
3930 data = in_be64(p->regs + PHB_IODA_DATA0);
3931 data |= IODA3_PESTB_DMA_STOPPED;
3932 out_be64(p->regs + PHB_IODA_DATA0, data);
3933 }
3934
3935 return OPAL_SUCCESS;
3936 }
3937
phb4_eeh_next_error(struct phb * phb,uint64_t * first_frozen_pe,uint16_t * pci_error_type,uint16_t * severity)3938 static int64_t phb4_eeh_next_error(struct phb *phb,
3939 uint64_t *first_frozen_pe,
3940 uint16_t *pci_error_type,
3941 uint16_t *severity)
3942 {
3943 struct phb4 *p = phb_to_phb4(phb);
3944 uint64_t peev, pesta;
3945 uint32_t peev_size = p->num_pes/64;
3946 int32_t i, j;
3947
3948 /* If the PHB is broken, we needn't go forward */
3949 if (p->broken) {
3950 *pci_error_type = OPAL_EEH_PHB_ERROR;
3951 *severity = OPAL_EEH_SEV_PHB_DEAD;
3952 return OPAL_SUCCESS;
3953 }
3954
3955 if ((p->flags & PHB4_CAPP_RECOVERY)) {
3956 *pci_error_type = OPAL_EEH_PHB_ERROR;
3957 *severity = OPAL_EEH_SEV_PHB_FENCED;
3958 return OPAL_SUCCESS;
3959 }
3960
3961 /*
3962 * Check if we already have pending errors. If that's
3963 * the case, then to get more information about the
3964 * pending errors. Here we try PBCQ prior to PHB.
3965 */
3966 if (phb4_err_pending(p) /*&&
3967 !phb4_err_check_pbcq(p) &&
3968 !phb4_err_check_lem(p) */)
3969 phb4_set_err_pending(p, false);
3970
3971 /* Clear result */
3972 *pci_error_type = OPAL_EEH_NO_ERROR;
3973 *severity = OPAL_EEH_SEV_NO_ERROR;
3974 *first_frozen_pe = (uint64_t)-1;
3975
3976 /* Check frozen PEs */
3977 if (!phb4_err_pending(p)) {
3978 phb4_ioda_sel(p, IODA3_TBL_PEEV, 0, true);
3979 for (i = 0; i < peev_size; i++) {
3980 peev = in_be64(p->regs + PHB_IODA_DATA0);
3981 if (peev) {
3982 p->err.err_src = PHB4_ERR_SRC_PHB;
3983 p->err.err_class = PHB4_ERR_CLASS_ER;
3984 p->err.err_bit = -1;
3985 phb4_set_err_pending(p, true);
3986 break;
3987 }
3988 }
3989 }
3990
3991 if (!phb4_err_pending(p))
3992 return OPAL_SUCCESS;
3993 /*
3994 * If the frozen PE is caused by a malfunctioning TLP, we
3995 * need reset the PHB. So convert ER to PHB-fatal error
3996 * for the case.
3997 */
3998 if (p->err.err_class == PHB4_ERR_CLASS_ER) {
3999 for (i = peev_size - 1; i >= 0; i--) {
4000 phb4_ioda_sel(p, IODA3_TBL_PEEV, i, false);
4001 peev = in_be64(p->regs + PHB_IODA_DATA0);
4002 for (j = 0; j < 64; j++) {
4003 if (peev & PPC_BIT(j)) {
4004 *first_frozen_pe = i * 64 + j;
4005 break;
4006 }
4007 }
4008 if (*first_frozen_pe != (uint64_t)(-1))
4009 break;
4010 }
4011 }
4012
4013 if (*first_frozen_pe != (uint64_t)(-1)) {
4014 pesta = phb4_get_pesta(p, *first_frozen_pe);
4015 if (phb4_escalation_required() && phb4_freeze_escalate(pesta)) {
4016 PHBINF(p, "Escalating freeze to fence. PESTA[%lli]=%016llx\n",
4017 *first_frozen_pe, pesta);
4018 p->err.err_class = PHB4_ERR_CLASS_FENCED;
4019 }
4020 }
4021
4022 switch (p->err.err_class) {
4023 case PHB4_ERR_CLASS_DEAD:
4024 *pci_error_type = OPAL_EEH_PHB_ERROR;
4025 *severity = OPAL_EEH_SEV_PHB_DEAD;
4026 break;
4027 case PHB4_ERR_CLASS_FENCED:
4028 *pci_error_type = OPAL_EEH_PHB_ERROR;
4029 *severity = OPAL_EEH_SEV_PHB_FENCED;
4030 break;
4031 case PHB4_ERR_CLASS_ER:
4032 *pci_error_type = OPAL_EEH_PE_ERROR;
4033 *severity = OPAL_EEH_SEV_PE_ER;
4034
4035 /* No frozen PE ? */
4036 if (*first_frozen_pe == (uint64_t)-1) {
4037 *pci_error_type = OPAL_EEH_NO_ERROR;
4038 *severity = OPAL_EEH_SEV_NO_ERROR;
4039 phb4_set_err_pending(p, false);
4040 }
4041
4042 break;
4043 case PHB4_ERR_CLASS_INF:
4044 *pci_error_type = OPAL_EEH_PHB_ERROR;
4045 *severity = OPAL_EEH_SEV_INF;
4046 break;
4047 default:
4048 *pci_error_type = OPAL_EEH_NO_ERROR;
4049 *severity = OPAL_EEH_SEV_NO_ERROR;
4050 phb4_set_err_pending(p, false);
4051 }
4052
4053 /*
4054 * Unmask all our error interrupts once all pending errors
4055 * have been handled.
4056 */
4057 if (!phb4_err_pending(p))
4058 phb4_int_unmask_all(p);
4059
4060 return OPAL_SUCCESS;
4061 }
4062
phb4_err_inject_finalize(struct phb4 * phb,uint64_t addr,uint64_t mask,uint64_t ctrl,bool is_write)4063 static int64_t phb4_err_inject_finalize(struct phb4 *phb, uint64_t addr,
4064 uint64_t mask, uint64_t ctrl,
4065 bool is_write)
4066 {
4067 if (is_write)
4068 ctrl |= PHB_PAPR_ERR_INJ_CTL_WR;
4069 else
4070 ctrl |= PHB_PAPR_ERR_INJ_CTL_RD;
4071
4072 out_be64(phb->regs + PHB_PAPR_ERR_INJ_ADDR, addr);
4073 out_be64(phb->regs + PHB_PAPR_ERR_INJ_MASK, mask);
4074 out_be64(phb->regs + PHB_PAPR_ERR_INJ_CTL, ctrl);
4075
4076 return OPAL_SUCCESS;
4077 }
4078
phb4_err_inject_mem32(struct phb4 * phb __unused,uint64_t pe_number __unused,uint64_t addr __unused,uint64_t mask __unused,bool is_write __unused)4079 static int64_t phb4_err_inject_mem32(struct phb4 *phb __unused,
4080 uint64_t pe_number __unused,
4081 uint64_t addr __unused,
4082 uint64_t mask __unused,
4083 bool is_write __unused)
4084 {
4085 return OPAL_UNSUPPORTED;
4086 }
4087
phb4_err_inject_mem64(struct phb4 * phb __unused,uint64_t pe_number __unused,uint64_t addr __unused,uint64_t mask __unused,bool is_write __unused)4088 static int64_t phb4_err_inject_mem64(struct phb4 *phb __unused,
4089 uint64_t pe_number __unused,
4090 uint64_t addr __unused,
4091 uint64_t mask __unused,
4092 bool is_write __unused)
4093 {
4094 return OPAL_UNSUPPORTED;
4095 }
4096
phb4_err_inject_cfg(struct phb4 * phb,uint64_t pe_number,uint64_t addr,uint64_t mask,bool is_write)4097 static int64_t phb4_err_inject_cfg(struct phb4 *phb, uint64_t pe_number,
4098 uint64_t addr, uint64_t mask,
4099 bool is_write)
4100 {
4101 uint64_t a, m, prefer, ctrl;
4102 int bdfn;
4103 bool is_bus_pe = false;
4104
4105 a = 0xffffull;
4106 prefer = 0xffffull;
4107 m = PHB_PAPR_ERR_INJ_MASK_CFG_ALL;
4108 ctrl = PHB_PAPR_ERR_INJ_CTL_CFG;
4109
4110 for (bdfn = 0; bdfn < RTT_TABLE_ENTRIES; bdfn++) {
4111 if (be16_to_cpu(phb->tbl_rtt[bdfn]) != pe_number)
4112 continue;
4113
4114 /* The PE can be associated with PCI bus or device */
4115 is_bus_pe = false;
4116 if ((bdfn + 8) < RTT_TABLE_ENTRIES &&
4117 be16_to_cpu(phb->tbl_rtt[bdfn + 8]) == pe_number)
4118 is_bus_pe = true;
4119
4120 /* Figure out the PCI config address */
4121 if (prefer == 0xffffull) {
4122 if (is_bus_pe) {
4123 m = PHB_PAPR_ERR_INJ_MASK_CFG;
4124 prefer = SETFIELD(m, 0x0ull, PCI_BUS_NUM(bdfn));
4125 } else {
4126 m = PHB_PAPR_ERR_INJ_MASK_CFG_ALL;
4127 prefer = SETFIELD(m, 0x0ull, bdfn);
4128 }
4129 }
4130
4131 /* Check the input address is valid or not */
4132 if (!is_bus_pe &&
4133 GETFIELD(PHB_PAPR_ERR_INJ_MASK_CFG_ALL, addr) == bdfn) {
4134 a = addr;
4135 break;
4136 }
4137
4138 if (is_bus_pe &&
4139 GETFIELD(PHB_PAPR_ERR_INJ_MASK_CFG, addr) == PCI_BUS_NUM(bdfn)) {
4140 a = addr;
4141 break;
4142 }
4143 }
4144
4145 /* Invalid PE number */
4146 if (prefer == 0xffffull)
4147 return OPAL_PARAMETER;
4148
4149 /* Specified address is out of range */
4150 if (a == 0xffffull)
4151 a = prefer;
4152 else
4153 m = mask;
4154
4155 return phb4_err_inject_finalize(phb, a, m, ctrl, is_write);
4156 }
4157
phb4_err_inject_dma(struct phb4 * phb __unused,uint64_t pe_number __unused,uint64_t addr __unused,uint64_t mask __unused,bool is_write __unused,bool is_64bits __unused)4158 static int64_t phb4_err_inject_dma(struct phb4 *phb __unused,
4159 uint64_t pe_number __unused,
4160 uint64_t addr __unused,
4161 uint64_t mask __unused,
4162 bool is_write __unused,
4163 bool is_64bits __unused)
4164 {
4165 return OPAL_UNSUPPORTED;
4166 }
4167
phb4_err_inject_dma32(struct phb4 * phb,uint64_t pe_number,uint64_t addr,uint64_t mask,bool is_write)4168 static int64_t phb4_err_inject_dma32(struct phb4 *phb, uint64_t pe_number,
4169 uint64_t addr, uint64_t mask,
4170 bool is_write)
4171 {
4172 return phb4_err_inject_dma(phb, pe_number, addr, mask, is_write, false);
4173 }
4174
phb4_err_inject_dma64(struct phb4 * phb,uint64_t pe_number,uint64_t addr,uint64_t mask,bool is_write)4175 static int64_t phb4_err_inject_dma64(struct phb4 *phb, uint64_t pe_number,
4176 uint64_t addr, uint64_t mask,
4177 bool is_write)
4178 {
4179 return phb4_err_inject_dma(phb, pe_number, addr, mask, is_write, true);
4180 }
4181
4182
phb4_err_inject(struct phb * phb,uint64_t pe_number,uint32_t type,uint32_t func,uint64_t addr,uint64_t mask)4183 static int64_t phb4_err_inject(struct phb *phb, uint64_t pe_number,
4184 uint32_t type, uint32_t func,
4185 uint64_t addr, uint64_t mask)
4186 {
4187 struct phb4 *p = phb_to_phb4(phb);
4188 int64_t (*handler)(struct phb4 *p, uint64_t pe_number,
4189 uint64_t addr, uint64_t mask, bool is_write);
4190 bool is_write;
4191
4192 /* We can't inject error to the reserved PE */
4193 if (pe_number == PHB4_RESERVED_PE_NUM(p) || pe_number >= p->num_pes)
4194 return OPAL_PARAMETER;
4195
4196 /* Clear leftover from last time */
4197 out_be64(p->regs + PHB_PAPR_ERR_INJ_CTL, 0x0ul);
4198
4199 switch (func) {
4200 case OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR:
4201 case OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_DATA:
4202 is_write = false;
4203 if (type == OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64)
4204 handler = phb4_err_inject_mem64;
4205 else
4206 handler = phb4_err_inject_mem32;
4207 break;
4208 case OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_ADDR:
4209 case OPAL_ERR_INJECT_FUNC_IOA_ST_MEM_DATA:
4210 is_write = true;
4211 if (type == OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64)
4212 handler = phb4_err_inject_mem64;
4213 else
4214 handler = phb4_err_inject_mem32;
4215 break;
4216 case OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_ADDR:
4217 case OPAL_ERR_INJECT_FUNC_IOA_LD_CFG_DATA:
4218 is_write = false;
4219 handler = phb4_err_inject_cfg;
4220 break;
4221 case OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_ADDR:
4222 case OPAL_ERR_INJECT_FUNC_IOA_ST_CFG_DATA:
4223 is_write = true;
4224 handler = phb4_err_inject_cfg;
4225 break;
4226 case OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_ADDR:
4227 case OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_DATA:
4228 case OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_MASTER:
4229 case OPAL_ERR_INJECT_FUNC_IOA_DMA_RD_TARGET:
4230 is_write = false;
4231 if (type == OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64)
4232 handler = phb4_err_inject_dma64;
4233 else
4234 handler = phb4_err_inject_dma32;
4235 break;
4236 case OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_ADDR:
4237 case OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_DATA:
4238 case OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_MASTER:
4239 case OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET:
4240 is_write = true;
4241 if (type == OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64)
4242 handler = phb4_err_inject_dma64;
4243 else
4244 handler = phb4_err_inject_dma32;
4245 break;
4246 default:
4247 return OPAL_PARAMETER;
4248 }
4249
4250 return handler(p, pe_number, addr, mask, is_write);
4251 }
4252
phb4_get_diag_data(struct phb * phb,void * diag_buffer,uint64_t diag_buffer_len)4253 static int64_t phb4_get_diag_data(struct phb *phb,
4254 void *diag_buffer,
4255 uint64_t diag_buffer_len)
4256 {
4257 bool fenced;
4258 struct phb4 *p = phb_to_phb4(phb);
4259 struct OpalIoPhb4ErrorData *data = diag_buffer;
4260
4261 if (diag_buffer_len < sizeof(struct OpalIoPhb4ErrorData))
4262 return OPAL_PARAMETER;
4263 if (p->broken)
4264 return OPAL_HARDWARE;
4265
4266 /*
4267 * Dummy check for fence so that phb4_read_phb_status knows
4268 * whether to use ASB or AIB
4269 */
4270 fenced = phb4_fenced(p);
4271 phb4_read_phb_status(p, data);
4272
4273 if (!fenced)
4274 phb4_eeh_dump_regs(p);
4275
4276 /*
4277 * We're running to here probably because of errors
4278 * (INF class). For that case, we need clear the error
4279 * explicitly.
4280 */
4281 if (phb4_err_pending(p) &&
4282 p->err.err_class == PHB4_ERR_CLASS_INF &&
4283 p->err.err_src == PHB4_ERR_SRC_PHB) {
4284 phb4_err_clear(p);
4285 phb4_set_err_pending(p, false);
4286 }
4287
4288 return OPAL_SUCCESS;
4289 }
4290
tve_encode_50b_noxlate(uint64_t start_addr,uint64_t end_addr)4291 static uint64_t tve_encode_50b_noxlate(uint64_t start_addr, uint64_t end_addr)
4292 {
4293 uint64_t tve;
4294
4295 /*
4296 * Put start address bits 49:24 into TVE[52:53]||[0:23]
4297 * and end address bits 49:24 into TVE[54:55]||[24:47]
4298 * and set TVE[51]
4299 */
4300 tve = (start_addr << 16) & (0xffffffull << 40);
4301 tve |= (start_addr >> 38) & (3ull << 10);
4302 tve |= (end_addr >> 8) & (0xfffffful << 16);
4303 tve |= (end_addr >> 40) & (3ull << 8);
4304 tve |= PPC_BIT(51) | IODA3_TVT_NON_TRANSLATE_50;
4305 return tve;
4306 }
4307
phb4_is_dd20(struct phb4 * p)4308 static bool phb4_is_dd20(struct phb4 *p)
4309 {
4310 struct proc_chip *chip = get_chip(p->chip_id);
4311
4312 if (p->rev == PHB4_REV_NIMBUS_DD20 && ((0xf & chip->ec_level) == 0))
4313 return true;
4314 return false;
4315 }
4316
phb4_get_capp_info(int chip_id,struct phb * phb,struct capp_info * info)4317 static int64_t phb4_get_capp_info(int chip_id, struct phb *phb,
4318 struct capp_info *info)
4319 {
4320 struct phb4 *p = phb_to_phb4(phb);
4321 uint32_t offset;
4322
4323 /* Not even supposed to be here on P10, but doesn't hurt */
4324 if (is_phb5())
4325 return OPAL_UNSUPPORTED;
4326
4327 if (chip_id != p->chip_id)
4328 return OPAL_PARAMETER;
4329
4330 /* Check is CAPP is attached to the PHB */
4331 if (p->capp == NULL || p->capp->phb != phb)
4332 return OPAL_PARAMETER;
4333
4334 offset = PHB4_CAPP_REG_OFFSET(p);
4335
4336 if (p->index == CAPP0_PHB_INDEX)
4337 info->capp_index = 0;
4338 if (p->index == CAPP1_PHB_INDEX)
4339 info->capp_index = 1;
4340 info->phb_index = p->index;
4341 info->capp_fir_reg = CAPP_FIR + offset;
4342 info->capp_fir_mask_reg = CAPP_FIR_MASK + offset;
4343 info->capp_fir_action0_reg = CAPP_FIR_ACTION0 + offset;
4344 info->capp_fir_action1_reg = CAPP_FIR_ACTION1 + offset;
4345 info->capp_err_status_ctrl_reg = CAPP_ERR_STATUS_CTRL + offset;
4346
4347 return OPAL_SUCCESS;
4348 }
4349
phb4_init_capp_regs(struct phb4 * p,uint32_t capp_eng)4350 static void phb4_init_capp_regs(struct phb4 *p, uint32_t capp_eng)
4351 {
4352 uint64_t addr, reg;
4353 uint32_t offset;
4354 uint8_t link_width_x16 = 1;
4355
4356 offset = PHB4_CAPP_REG_OFFSET(p);
4357
4358 /* Calculate the phb link width if card is attached to PEC2 */
4359 if (p->index == CAPP1_PHB_INDEX) {
4360 /* Check if PEC2 is in x8 or x16 mode.
4361 * PEC0 is always in x16
4362 */
4363 addr = XPEC_P9_PCI_CPLT_CONF1 + 2 * XPEC_PCI_CPLT_OFFSET;
4364 xscom_read(p->chip_id, addr, ®);
4365 link_width_x16 = ((reg & XPEC_P9_PCI_IOVALID_MASK) ==
4366 XPEC_P9_PCI_IOVALID_X16);
4367 }
4368
4369 /* APC Master PowerBus Control Register */
4370 xscom_read(p->chip_id, APC_MASTER_PB_CTRL + offset, ®);
4371 reg |= PPC_BIT(0); /* enable cResp exam */
4372 reg |= PPC_BIT(3); /* disable vg not sys */
4373 reg |= PPC_BIT(12);/* HW417025: disable capp virtual machines */
4374 reg |= PPC_BIT(2); /* disable nn rn */
4375 reg |= PPC_BIT(4); /* disable g */
4376 reg |= PPC_BIT(5); /* disable ln */
4377 xscom_write(p->chip_id, APC_MASTER_PB_CTRL + offset, reg);
4378
4379 /* Set PHB mode, HPC Dir State and P9 mode */
4380 xscom_write(p->chip_id, APC_MASTER_CAPI_CTRL + offset,
4381 0x1772000000000000UL);
4382 PHBINF(p, "CAPP: port attached\n");
4383
4384 /* Set snoop ttype decoding , dir size to 512K */
4385 xscom_write(p->chip_id, SNOOP_CAPI_CONFIG + offset, 0x9000000000000000UL);
4386
4387 /* Use Read Epsilon Tier2 for all scopes.
4388 * Set Tier2 Read Epsilon.
4389 */
4390 xscom_read(p->chip_id, SNOOP_CONTROL + offset, ®);
4391 reg |= PPC_BIT(0);
4392 reg |= PPC_BIT(35);
4393 reg |= PPC_BIT(45);
4394 reg |= PPC_BIT(46);
4395 reg |= PPC_BIT(47);
4396 reg |= PPC_BIT(50);
4397 xscom_write(p->chip_id, SNOOP_CONTROL + offset, reg);
4398
4399 /* Transport Control Register */
4400 xscom_read(p->chip_id, TRANSPORT_CONTROL + offset, ®);
4401 if (p->index == CAPP0_PHB_INDEX) {
4402 reg |= PPC_BIT(1); /* Send Packet Timer Value */
4403 reg |= PPC_BITMASK(10, 13); /* Send Packet Timer Value */
4404 reg &= ~PPC_BITMASK(14, 17); /* Set Max LPC CI store buffer to zeros */
4405 reg &= ~PPC_BITMASK(18, 21); /* Set Max tlbi divider */
4406 if (capp_eng & CAPP_MIN_STQ_ENGINES) {
4407 /* 2 CAPP msg engines */
4408 reg |= PPC_BIT(58);
4409 reg |= PPC_BIT(59);
4410 reg |= PPC_BIT(60);
4411 }
4412 if (capp_eng & CAPP_MAX_STQ_ENGINES) {
4413 /* 14 CAPP msg engines */
4414 reg |= PPC_BIT(60);
4415 }
4416 reg |= PPC_BIT(62);
4417 }
4418 if (p->index == CAPP1_PHB_INDEX) {
4419 reg |= PPC_BIT(4); /* Send Packet Timer Value */
4420 reg &= ~PPC_BIT(10); /* Set CI Store Buffer Threshold=5 */
4421 reg |= PPC_BIT(11); /* Set CI Store Buffer Threshold=5 */
4422 reg &= ~PPC_BIT(12); /* Set CI Store Buffer Threshold=5 */
4423 reg |= PPC_BIT(13); /* Set CI Store Buffer Threshold=5 */
4424 reg &= ~PPC_BITMASK(14, 17); /* Set Max LPC CI store buffer to zeros */
4425 reg &= ~PPC_BITMASK(18, 21); /* Set Max tlbi divider */
4426 if (capp_eng & CAPP_MIN_STQ_ENGINES) {
4427 /* 2 CAPP msg engines */
4428 reg |= PPC_BIT(59);
4429 reg |= PPC_BIT(60);
4430
4431 } else if (capp_eng & CAPP_MAX_STQ_ENGINES) {
4432
4433 if (link_width_x16)
4434 /* 14 CAPP msg engines */
4435 reg |= PPC_BIT(60) | PPC_BIT(62);
4436 else
4437 /* 6 CAPP msg engines */
4438 reg |= PPC_BIT(60);
4439 }
4440 }
4441 xscom_write(p->chip_id, TRANSPORT_CONTROL + offset, reg);
4442
4443 /* The transport control register needs to be loaded in two
4444 * steps. Once the register values have been set, we have to
4445 * write bit 63 to a '1', which loads the register values into
4446 * the ci store buffer logic.
4447 */
4448 xscom_read(p->chip_id, TRANSPORT_CONTROL + offset, ®);
4449 reg |= PPC_BIT(63);
4450 xscom_write(p->chip_id, TRANSPORT_CONTROL + offset, reg);
4451
4452 /* Enable epoch timer */
4453 xscom_write(p->chip_id, EPOCH_RECOVERY_TIMERS_CTRL + offset,
4454 0xC0000000FFF8FFE0UL);
4455
4456 /* Flush SUE State Map Register */
4457 xscom_write(p->chip_id, FLUSH_SUE_STATE_MAP + offset,
4458 0x08020A0000000000UL);
4459
4460 /* Flush SUE uOP1 Register */
4461 xscom_write(p->chip_id, FLUSH_SUE_UOP1 + offset,
4462 0xDCE0280428000000);
4463
4464 /* capp owns PHB read buffers */
4465 if (p->index == CAPP0_PHB_INDEX) {
4466 /* max PHB read buffers 0-47 */
4467 reg = 0xFFFFFFFFFFFF0000UL;
4468 if (capp_eng & CAPP_MAX_DMA_READ_ENGINES)
4469 reg = 0xF000000000000000UL;
4470 xscom_write(p->chip_id, APC_FSM_READ_MASK + offset, reg);
4471 xscom_write(p->chip_id, XPT_FSM_RMM + offset, reg);
4472 }
4473 if (p->index == CAPP1_PHB_INDEX) {
4474
4475 if (capp_eng & CAPP_MAX_DMA_READ_ENGINES) {
4476 reg = 0xF000000000000000ULL;
4477 } else if (link_width_x16) {
4478 /* 0-47 (Read machines) are available for
4479 * capp use
4480 */
4481 reg = 0x0000FFFFFFFFFFFFULL;
4482 } else {
4483 /* Set 30 Read machines for CAPP Minus
4484 * 20-27 for DMA
4485 */
4486 reg = 0xFFFFF00E00000000ULL;
4487 }
4488 xscom_write(p->chip_id, APC_FSM_READ_MASK + offset, reg);
4489 xscom_write(p->chip_id, XPT_FSM_RMM + offset, reg);
4490 }
4491
4492 /* CAPP FIR Action 0 */
4493 xscom_write(p->chip_id, CAPP_FIR_ACTION0 + offset, 0x0b1c000104060000UL);
4494
4495 /* CAPP FIR Action 1 */
4496 xscom_write(p->chip_id, CAPP_FIR_ACTION1 + offset, 0x2b9c0001240E0000UL);
4497
4498 /* CAPP FIR MASK */
4499 xscom_write(p->chip_id, CAPP_FIR_MASK + offset, 0x80031f98d8717000UL);
4500
4501 /* Mask the CAPP PSL Credit Timeout Register error */
4502 xscom_write_mask(p->chip_id, CAPP_FIR_MASK + offset,
4503 PPC_BIT(46), PPC_BIT(46));
4504
4505 /* Deassert TLBI_FENCED and tlbi_psl_is_dead */
4506 xscom_write(p->chip_id, CAPP_ERR_STATUS_CTRL + offset, 0);
4507 }
4508
4509 /* override some inits with CAPI defaults */
phb4_init_capp_errors(struct phb4 * p)4510 static void phb4_init_capp_errors(struct phb4 *p)
4511 {
4512 /* Init_77: TXE Error AIB Fence Enable Register */
4513 if (phb4_is_dd20(p))
4514 out_be64(p->regs + 0x0d30, 0xdfffbf0ff7ddfff0ull);
4515 else
4516 out_be64(p->regs + 0x0d30, 0xdff7bf0ff7ddfff0ull);
4517 /* Init_86: RXE_ARB Error AIB Fence Enable Register */
4518 out_be64(p->regs + 0x0db0, 0xfbffd7bbfb7fbfefull);
4519
4520 /* Init_95: RXE_MRG Error AIB Fence Enable Register */
4521 out_be64(p->regs + 0x0e30, 0xfffffeffff7fff57ull);
4522
4523 /* Init_104: RXE_TCE Error AIB Fence Enable Register */
4524 out_be64(p->regs + 0x0eb0, 0xffaeffafffffffffull);
4525
4526 /* Init_113: PHB Error AIB Fence Enable Register */
4527 out_be64(p->regs + 0x0cb0, 0x35777073ff000000ull);
4528 }
4529
4530 /*
4531 * The capi, NBW and ASN indicators are used only on P9 to flag some
4532 * types of incoming traffic for the PHB and have been removed on P10.
4533 *
4534 * The capi indicator is over the 8 most significant bits (and
4535 * not 16). We stay away from bits 59 (TVE select), 60 and 61 (MSI)
4536 *
4537 * For the mask, we keep bit 59 in, as capi messages must hit TVE#0.
4538 * Bit 56 is not part of the mask, so that a NBW message (see below)
4539 * is also considered a capi message.
4540 */
4541 #define CAPIIND 0x0200
4542 #define CAPIMASK 0xFE00
4543
4544 /*
4545 * Non-Blocking Write messages are a subset of capi messages, so the
4546 * indicator is the same as capi + an extra bit (56) to differentiate.
4547 * Mask is the same as capi + the extra bit
4548 */
4549 #define NBWIND 0x0300
4550 #define NBWMASK 0xFF00
4551
4552 /*
4553 * The ASN indicator is used for tunneled operations (as_notify and
4554 * atomics). Tunneled operation messages can be sent in PCI mode as
4555 * well as CAPI mode.
4556 *
4557 * The format of those messages is specific and, for as_notify
4558 * messages, the address field is hijacked to encode the LPID/PID/TID
4559 * of the target thread, so those messages should not go through
4560 * translation. They must hit TVE#1. Therefore bit 59 is part of the
4561 * indicator.
4562 */
4563 #define ASNIND 0x0C00
4564 #define ASNMASK 0xFF00
4565
4566 /* Power Bus Common Queue Registers
4567 * All PBCQ and PBAIB registers are accessed via SCOM
4568 * NestBase = 4010C00 for PEC0
4569 * 4011000 for PEC1
4570 * 4011400 for PEC2
4571 * PCIBase = D010800 for PE0
4572 * E010800 for PE1
4573 * F010800 for PE2
4574 *
4575 * Some registers are shared amongst all of the stacks and will only
4576 * have 1 copy. Other registers are implemented one per stack.
4577 * Registers that are duplicated will have an additional offset
4578 * of “StackBase” so that they have a unique address.
4579 * Stackoffset = 00000040 for Stack0
4580 * = 00000080 for Stack1
4581 * = 000000C0 for Stack2
4582 */
enable_capi_mode(struct phb4 * p,uint64_t pe_number,uint32_t capp_eng)4583 static int64_t enable_capi_mode(struct phb4 *p, uint64_t pe_number,
4584 uint32_t capp_eng)
4585 {
4586 uint64_t addr, reg, start_addr, end_addr, stq_eng, dma_eng;
4587 uint64_t mbt0, mbt1;
4588 int i, window_num = -1;
4589
4590 /* CAPP Control Register */
4591 xscom_read(p->chip_id, p->pe_xscom + XPEC_NEST_CAPP_CNTL, ®);
4592 if (reg & PPC_BIT(0)) {
4593 PHBDBG(p, "Already in CAPP mode\n");
4594 }
4595
4596 for (i = 0; i < 500000; i++) {
4597 /* PBCQ General Status Register */
4598 xscom_read(p->chip_id,
4599 p->pe_stk_xscom + XPEC_NEST_STK_PBCQ_STAT,
4600 ®);
4601 if (!(reg & 0xC000000000000000UL))
4602 break;
4603 time_wait_us(10);
4604 }
4605 if (reg & 0xC000000000000000UL) {
4606 PHBERR(p, "CAPP: Timeout waiting for pending transaction\n");
4607 return OPAL_HARDWARE;
4608 }
4609
4610 stq_eng = 0x0000000000000000ULL;
4611 dma_eng = 0x0000000000000000ULL;
4612 if (p->index == CAPP0_PHB_INDEX) {
4613 /* PBCQ is operating as a x16 stack
4614 * - The maximum number of engines give to CAPP will be
4615 * 14 and will be assigned in the order of STQ 15 to 2.
4616 * - 0-47 (Read machines) are available for capp use.
4617 */
4618 stq_eng = 0x000E000000000000ULL; /* 14 CAPP msg engines */
4619 dma_eng = 0x0000FFFFFFFFFFFFULL; /* 48 CAPP Read machines */
4620 }
4621
4622 if (p->index == CAPP1_PHB_INDEX) {
4623 /* Check if PEC is in x8 or x16 mode */
4624 addr = XPEC_P9_PCI_CPLT_CONF1 + 2 * XPEC_PCI_CPLT_OFFSET;
4625 xscom_read(p->chip_id, addr, ®);
4626 if ((reg & XPEC_P9_PCI_IOVALID_MASK) == XPEC_P9_PCI_IOVALID_X16) {
4627 /* PBCQ is operating as a x16 stack
4628 * - The maximum number of engines give to CAPP will be
4629 * 14 and will be assigned in the order of STQ 15 to 2.
4630 * - 0-47 (Read machines) are available for capp use.
4631 */
4632 stq_eng = 0x000E000000000000ULL;
4633 dma_eng = 0x0000FFFFFFFFFFFFULL;
4634 } else {
4635
4636 /* PBCQ is operating as a x8 stack
4637 * - The maximum number of engines given to CAPP should
4638 * be 6 and will be assigned in the order of 7 to 2.
4639 * - 0-30 (Read machines) are available for capp use.
4640 */
4641 stq_eng = 0x0006000000000000ULL;
4642 /* 30 Read machines for CAPP Minus 20-27 for DMA */
4643 dma_eng = 0x0000FFFFF00E0000ULL;
4644 }
4645 }
4646
4647 if (capp_eng & CAPP_MIN_STQ_ENGINES)
4648 stq_eng = 0x0002000000000000ULL; /* 2 capp msg engines */
4649
4650 /* CAPP Control Register. Enable CAPP Mode */
4651 reg = 0x8000000000000000ULL; /* PEC works in CAPP Mode */
4652 reg |= stq_eng;
4653 if (capp_eng & CAPP_MAX_DMA_READ_ENGINES)
4654 dma_eng = 0x0000F00000000000ULL; /* 4 CAPP Read machines */
4655 reg |= dma_eng;
4656 xscom_write(p->chip_id, p->pe_xscom + XPEC_NEST_CAPP_CNTL, reg);
4657
4658 /* PEC2 has 3 ETU's + 16 pci lanes that can operate as x16,
4659 * x8+x8 (bifurcated) or x8+x4+x4 (trifurcated) mode. When
4660 * Mellanox CX5 card is attached to stack0 of this PEC, indicated by
4661 * request to allocate CAPP_MAX_DMA_READ_ENGINES; we tweak the default
4662 * dma-read engines allocations to maximize the DMA read performance
4663 */
4664 if ((p->index == CAPP1_PHB_INDEX) &&
4665 (capp_eng & CAPP_MAX_DMA_READ_ENGINES))
4666 phb4_pec2_dma_engine_realloc(p);
4667
4668 /* PCI to PB data movement ignores the PB init signal. */
4669 xscom_write_mask(p->chip_id, p->pe_xscom + XPEC_NEST_PBCQ_HW_CONFIG,
4670 XPEC_NEST_PBCQ_HW_CONFIG_PBINIT,
4671 XPEC_NEST_PBCQ_HW_CONFIG_PBINIT);
4672
4673 /* If pump mode is enabled don't do nodal broadcasts.
4674 */
4675 xscom_read(p->chip_id, PB_CENT_HP_MODE_CURR, ®);
4676 if (reg & PB_CFG_PUMP_MODE) {
4677 reg = XPEC_NEST_PBCQ_HW_CONFIG_DIS_NODAL;
4678 reg |= XPEC_NEST_PBCQ_HW_CONFIG_DIS_RNNN;
4679 xscom_write_mask(p->chip_id,
4680 p->pe_xscom + XPEC_NEST_PBCQ_HW_CONFIG,
4681 reg, reg);
4682 }
4683
4684 /* PEC Phase 4 (PHB) registers adjustment
4685 * Inbound CAPP traffic: The CAPI can send both CAPP packets and
4686 * I/O packets. A PCIe packet is indentified as a CAPP packet in
4687 * the PHB if the PCIe address matches either the CAPI
4688 * Compare/Mask register or its NBW Compare/Mask register.
4689 */
4690
4691 /*
4692 * Bit [0:7] XSL_DSNCTL[capiind]
4693 * Init_26 - CAPI Compare/Mask
4694 */
4695 out_be64(p->regs + PHB_CAPI_CMPM,
4696 ((u64)CAPIIND << 48) |
4697 ((u64)CAPIMASK << 32) | PHB_CAPI_CMPM_ENABLE);
4698
4699 /* PB AIB Hardware Control Register
4700 * Wait 32 PCI clocks for a credit to become available
4701 * before rejecting.
4702 */
4703 xscom_read(p->chip_id, p->pci_xscom + XPEC_PCI_PBAIB_HW_CONFIG, ®);
4704 reg |= PPC_BITMASK(40, 42);
4705 if (p->index == CAPP1_PHB_INDEX)
4706 reg |= PPC_BIT(30);
4707 xscom_write(p->chip_id, p->pci_xscom + XPEC_PCI_PBAIB_HW_CONFIG, reg);
4708
4709 /* non-translate/50-bit mode */
4710 out_be64(p->regs + PHB_NXLATE_PREFIX, 0x0000000000000000Ull);
4711
4712 /* set tve no translate mode allow mmio window */
4713 memset(p->tve_cache, 0x0, sizeof(p->tve_cache));
4714
4715 /*
4716 * In 50-bit non-translate mode, the fields of the TVE are
4717 * used to perform an address range check. In this mode TCE
4718 * Table Size(0) must be a '1' (TVE[51] = 1)
4719 * PCI Addr(49:24) >= TVE[52:53]+TVE[0:23] and
4720 * PCI Addr(49:24) < TVE[54:55]+TVE[24:47]
4721 *
4722 * TVE[51] = 1
4723 * TVE[56] = 1: 50-bit Non-Translate Mode Enable
4724 * TVE[0:23] = 0x000000
4725 * TVE[24:47] = 0xFFFFFF
4726 *
4727 * capi dma mode: CAPP DMA mode needs access to all of memory
4728 * capi mode: Allow address range (bit 14 = 1)
4729 * 0x0002000000000000: 0x0002FFFFFFFFFFFF
4730 * TVE[52:53] = '10' and TVE[54:55] = '10'
4731 */
4732
4733 /* TVT#0: CAPI window + DMA, all memory */
4734 start_addr = 0ull;
4735 end_addr = 0x0003ffffffffffffull;
4736 p->tve_cache[pe_number * 2] =
4737 tve_encode_50b_noxlate(start_addr, end_addr);
4738
4739 /* TVT#1: CAPI window + DMA, all memory, in bypass mode */
4740 start_addr = (1ull << 59);
4741 end_addr = start_addr + 0x0003ffffffffffffull;
4742 p->tve_cache[pe_number * 2 + 1] =
4743 tve_encode_50b_noxlate(start_addr, end_addr);
4744
4745 phb4_ioda_sel(p, IODA3_TBL_TVT, 0, true);
4746 for (i = 0; i < p->tvt_size; i++)
4747 out_be64(p->regs + PHB_IODA_DATA0, p->tve_cache[i]);
4748
4749 /*
4750 * Since TVT#0 is in by-pass mode, disable 32-bit MSI, as a
4751 * DMA write targeting 0x00000000FFFFxxxx would be interpreted
4752 * as a 32-bit MSI
4753 */
4754 reg = in_be64(p->regs + PHB_PHB4_CONFIG);
4755 reg &= ~PHB_PHB4C_32BIT_MSI_EN;
4756 out_be64(p->regs + PHB_PHB4_CONFIG, reg);
4757
4758 /* set mbt bar to pass capi mmio window and keep the other
4759 * mmio values
4760 */
4761 mbt0 = IODA3_MBT0_ENABLE | IODA3_MBT0_TYPE_M64 |
4762 SETFIELD(IODA3_MBT0_MODE, 0ull, IODA3_MBT0_MODE_SINGLE_PE) |
4763 SETFIELD(IODA3_MBT0_MDT_COLUMN, 0ull, 0) |
4764 (0x0002000000000000ULL & IODA3_MBT0_BASE_ADDR);
4765
4766 mbt1 = IODA3_MBT1_ENABLE |
4767 (0x00ff000000000000ULL & IODA3_MBT1_MASK) |
4768 SETFIELD(IODA3_MBT1_SINGLE_PE_NUM, 0ull, pe_number);
4769
4770 for (i = 0; i < p->mbt_size; i++) {
4771 /* search if the capi mmio window is already present */
4772 if ((p->mbt_cache[i][0] == mbt0) &&
4773 (p->mbt_cache[i][1] == mbt1))
4774 break;
4775
4776 /* search a free entry */
4777 if ((window_num == -1) &&
4778 ((!(p->mbt_cache[i][0] & IODA3_MBT0_ENABLE)) &&
4779 (!(p->mbt_cache[i][1] & IODA3_MBT1_ENABLE))))
4780 window_num = i;
4781 }
4782
4783 if (window_num >= 0 && i == p->mbt_size) {
4784 /* no capi mmio window found, so add it */
4785 p->mbt_cache[window_num][0] = mbt0;
4786 p->mbt_cache[window_num][1] = mbt1;
4787
4788 phb4_ioda_sel(p, IODA3_TBL_MBT, window_num << 1, true);
4789 out_be64(p->regs + PHB_IODA_DATA0, mbt0);
4790 out_be64(p->regs + PHB_IODA_DATA0, mbt1);
4791 } else if (i == p->mbt_size) {
4792 /* mbt cache full, this case should never happen */
4793 PHBERR(p, "CAPP: Failed to add CAPI mmio window\n");
4794 } else {
4795 /* duplicate entry. Nothing to do */
4796 }
4797
4798 phb4_init_capp_errors(p);
4799
4800 phb4_init_capp_regs(p, capp_eng);
4801
4802 if (!chiptod_capp_timebase_sync(p->chip_id, CAPP_TFMR,
4803 CAPP_TB,
4804 PHB4_CAPP_REG_OFFSET(p)))
4805 PHBERR(p, "CAPP: Failed to sync timebase\n");
4806
4807 /* set callbacks to handle HMI events */
4808 capi_ops.get_capp_info = &phb4_get_capp_info;
4809
4810 return OPAL_SUCCESS;
4811 }
4812
4813
phb4_init_capp(struct phb4 * p)4814 static int64_t phb4_init_capp(struct phb4 *p)
4815 {
4816 struct capp *capp;
4817 int rc;
4818
4819 if (p->index != CAPP0_PHB_INDEX &&
4820 p->index != CAPP1_PHB_INDEX)
4821 return OPAL_UNSUPPORTED;
4822
4823 capp = zalloc(sizeof(struct capp));
4824 if (capp == NULL)
4825 return OPAL_NO_MEM;
4826
4827 if (p->index == CAPP0_PHB_INDEX) {
4828 capp->capp_index = 0;
4829 capp->capp_xscom_offset = 0;
4830
4831 } else if (p->index == CAPP1_PHB_INDEX) {
4832 capp->capp_index = 1;
4833 capp->capp_xscom_offset = CAPP1_REG_OFFSET;
4834 }
4835
4836 capp->attached_pe = phb4_get_reserved_pe_number(&p->phb);
4837 capp->chip_id = p->chip_id;
4838
4839 /* Load capp microcode into the capp unit */
4840 rc = load_capp_ucode(p);
4841
4842 if (rc == OPAL_SUCCESS)
4843 p->capp = capp;
4844 else
4845 free(capp);
4846
4847 return rc;
4848 }
4849
phb4_set_capi_mode(struct phb * phb,uint64_t mode,uint64_t pe_number)4850 static int64_t phb4_set_capi_mode(struct phb *phb, uint64_t mode,
4851 uint64_t pe_number)
4852 {
4853 struct phb4 *p = phb_to_phb4(phb);
4854 struct proc_chip *chip = get_chip(p->chip_id);
4855 struct capp *capp = p->capp;
4856 uint64_t reg, ret;
4857
4858 /* No CAPI on P10. OpenCAPI only */
4859 if (is_phb5())
4860 return OPAL_UNSUPPORTED;
4861
4862 /* cant do a mode switch when capp is in recovery mode */
4863 ret = capp_xscom_read(capp, CAPP_ERR_STATUS_CTRL, ®);
4864 if (ret != OPAL_SUCCESS)
4865 return ret;
4866
4867 if ((reg & PPC_BIT(0)) && (!(reg & PPC_BIT(1)))) {
4868 PHBDBG(p, "CAPP: recovery in progress\n");
4869 return OPAL_BUSY;
4870 }
4871
4872
4873 switch (mode) {
4874
4875 case OPAL_PHB_CAPI_MODE_DMA: /* Enabled by default on p9 */
4876 case OPAL_PHB_CAPI_MODE_SNOOP_ON:
4877 /* nothing to do on P9 if CAPP is already enabled */
4878 ret = p->capp->phb ? OPAL_SUCCESS : OPAL_UNSUPPORTED;
4879 break;
4880
4881 case OPAL_PHB_CAPI_MODE_SNOOP_OFF:
4882 ret = p->capp->phb ? OPAL_UNSUPPORTED : OPAL_SUCCESS;
4883 break;
4884
4885 case OPAL_PHB_CAPI_MODE_PCIE:
4886 if (p->flags & PHB4_CAPP_DISABLE) {
4887 /* We are in middle of a CAPP disable */
4888 ret = OPAL_BUSY;
4889
4890 } else if (capp->phb) {
4891 /* Kick start a creset */
4892 p->flags |= PHB4_CAPP_DISABLE;
4893 PHBINF(p, "CAPP: PCIE mode needs a cold-reset\n");
4894 /* Kick off the pci state machine */
4895 ret = phb4_creset(phb->slot);
4896 ret = ret > 0 ? OPAL_BUSY : ret;
4897
4898 } else {
4899 /* PHB already in PCI mode */
4900 ret = OPAL_SUCCESS;
4901 }
4902 break;
4903
4904 case OPAL_PHB_CAPI_MODE_CAPI: /* Fall Through */
4905 case OPAL_PHB_CAPI_MODE_DMA_TVT1:
4906 /* Make sure that PHB is not disabling CAPP */
4907 if (p->flags & PHB4_CAPP_DISABLE) {
4908 PHBERR(p, "CAPP: Disable in progress\n");
4909 ret = OPAL_BUSY;
4910 break;
4911 }
4912
4913 /* Check if ucode is available */
4914 if (!capp_ucode_loaded(chip, p->index)) {
4915 PHBERR(p, "CAPP: ucode not loaded\n");
4916 ret = OPAL_RESOURCE;
4917 break;
4918 }
4919
4920 /*
4921 * Mark the CAPP attached to the PHB right away so that
4922 * if a MCE happens during CAPP init we can handle it.
4923 * In case of an error in CAPP init we remove the PHB
4924 * from the attached_mask later.
4925 */
4926 capp->phb = phb;
4927 capp->attached_pe = pe_number;
4928
4929 if (mode == OPAL_PHB_CAPI_MODE_DMA_TVT1)
4930 ret = enable_capi_mode(p, pe_number,
4931 CAPP_MIN_STQ_ENGINES |
4932 CAPP_MAX_DMA_READ_ENGINES);
4933
4934 else
4935 ret = enable_capi_mode(p, pe_number,
4936 CAPP_MAX_STQ_ENGINES |
4937 CAPP_MIN_DMA_READ_ENGINES);
4938 if (ret == OPAL_SUCCESS) {
4939 /* register notification on system shutdown */
4940 opal_add_host_sync_notifier(&phb4_host_sync_reset, p);
4941
4942 } else {
4943 /* In case of an error mark the PHB detached */
4944 capp->phb = NULL;
4945 capp->attached_pe = phb4_get_reserved_pe_number(phb);
4946 }
4947 break;
4948
4949 default:
4950 ret = OPAL_UNSUPPORTED;
4951 break;
4952 };
4953
4954 return ret;
4955 }
4956
phb4_p2p_set_initiator(struct phb4 * p,uint16_t pe_number)4957 static void phb4_p2p_set_initiator(struct phb4 *p, uint16_t pe_number)
4958 {
4959 uint64_t tve;
4960 uint16_t window_id = (pe_number << 1) + 1;
4961
4962 /*
4963 * Initiator needs access to the MMIO space of the target,
4964 * which is well beyond the 'normal' memory area. Set its TVE
4965 * with no range checking.
4966 */
4967 PHBDBG(p, "Setting TVE#1 for peer-to-peer for pe %d\n", pe_number);
4968 tve = PPC_BIT(51);
4969 phb4_ioda_sel(p, IODA3_TBL_TVT, window_id, false);
4970 out_be64(p->regs + PHB_IODA_DATA0, tve);
4971 p->tve_cache[window_id] = tve;
4972 }
4973
phb4_p2p_set_target(struct phb4 * p,bool enable)4974 static void phb4_p2p_set_target(struct phb4 *p, bool enable)
4975 {
4976 uint64_t val;
4977
4978 /*
4979 * Enabling p2p on a target PHB reserves an outbound (as seen
4980 * from the CPU) store queue for p2p
4981 */
4982 PHBDBG(p, "%s peer-to-peer\n", (enable ? "Enabling" : "Disabling"));
4983 xscom_read(p->chip_id,
4984 p->pe_stk_xscom + XPEC_NEST_STK_PBCQ_MODE, &val);
4985 if (enable)
4986 val |= XPEC_NEST_STK_PBCQ_MODE_P2P;
4987 else
4988 val &= ~XPEC_NEST_STK_PBCQ_MODE_P2P;
4989 xscom_write(p->chip_id,
4990 p->pe_stk_xscom + XPEC_NEST_STK_PBCQ_MODE, val);
4991 }
4992
phb4_set_p2p(struct phb * phb,uint64_t mode,uint64_t flags,uint16_t pe_number)4993 static void phb4_set_p2p(struct phb *phb, uint64_t mode, uint64_t flags,
4994 uint16_t pe_number)
4995 {
4996 struct phb4 *p = phb_to_phb4(phb);
4997
4998 switch (mode) {
4999 case OPAL_PCI_P2P_INITIATOR:
5000 if (flags & OPAL_PCI_P2P_ENABLE)
5001 phb4_p2p_set_initiator(p, pe_number);
5002 /*
5003 * When disabling p2p on the initiator, we should
5004 * reset the TVE to its default bypass setting, but it
5005 * is more easily done from the OS, as it knows the
5006 * the start and end address and there's already an
5007 * opal call for it, so let linux handle it.
5008 */
5009 break;
5010 case OPAL_PCI_P2P_TARGET:
5011 phb4_p2p_set_target(p, !!(flags & OPAL_PCI_P2P_ENABLE));
5012 break;
5013 default:
5014 assert(0);
5015 }
5016 }
5017
phb4_set_capp_recovery(struct phb * phb)5018 static int64_t phb4_set_capp_recovery(struct phb *phb)
5019 {
5020 struct phb4 *p = phb_to_phb4(phb);
5021
5022 if (p->flags & PHB4_CAPP_RECOVERY)
5023 return 0;
5024
5025 /* set opal event flag to indicate eeh condition */
5026 opal_update_pending_evt(OPAL_EVENT_PCI_ERROR,
5027 OPAL_EVENT_PCI_ERROR);
5028
5029 p->flags |= PHB4_CAPP_RECOVERY;
5030
5031 return 0;
5032 }
5033
5034 /*
5035 * Return the address out of a PBCQ Tunnel Bar register.
5036 */
phb4_get_tunnel_bar(struct phb * phb,uint64_t * addr)5037 static void phb4_get_tunnel_bar(struct phb *phb, uint64_t *addr)
5038 {
5039 struct phb4 *p = phb_to_phb4(phb);
5040 uint64_t val;
5041
5042 xscom_read(p->chip_id, p->pe_stk_xscom + XPEC_NEST_STK_TUNNEL_BAR,
5043 &val);
5044 *addr = val >> 8;
5045 }
5046
5047 /*
5048 * Set PBCQ Tunnel Bar register.
5049 * Store addr bits [8:50] in PBCQ Tunnel Bar register bits [0:42].
5050 * Note that addr bits [8:50] must also match PSL_TNR_ADDR[8:50].
5051 * Reset register if val == 0.
5052 *
5053 * This interface is required to let device drivers set the Tunnel Bar
5054 * value of their choice.
5055 *
5056 * Compatibility with older versions of linux, that do not set the
5057 * Tunnel Bar with phb4_set_tunnel_bar(), is ensured by enable_capi_mode(),
5058 * that will set the default value that used to be assumed.
5059 */
phb4_set_tunnel_bar(struct phb * phb,uint64_t addr)5060 static int64_t phb4_set_tunnel_bar(struct phb *phb, uint64_t addr)
5061 {
5062 struct phb4 *p = phb_to_phb4(phb);
5063 uint64_t mask = 0x00FFFFFFFFFFE000ULL;
5064
5065 if (!addr) {
5066 /* Reset register */
5067 xscom_write(p->chip_id,
5068 p->pe_stk_xscom + XPEC_NEST_STK_TUNNEL_BAR, addr);
5069 return OPAL_SUCCESS;
5070 }
5071 if ((addr & ~mask))
5072 return OPAL_PARAMETER;
5073 if (!(addr & mask))
5074 return OPAL_PARAMETER;
5075
5076 xscom_write(p->chip_id, p->pe_stk_xscom + XPEC_NEST_STK_TUNNEL_BAR,
5077 (addr & mask) << 8);
5078 return OPAL_SUCCESS;
5079 }
5080
5081 static const struct phb_ops phb4_ops = {
5082 .cfg_read8 = phb4_pcicfg_read8,
5083 .cfg_read16 = phb4_pcicfg_read16,
5084 .cfg_read32 = phb4_pcicfg_read32,
5085 .cfg_write8 = phb4_pcicfg_write8,
5086 .cfg_write16 = phb4_pcicfg_write16,
5087 .cfg_write32 = phb4_pcicfg_write32,
5088 .get_reserved_pe_number = phb4_get_reserved_pe_number,
5089 .device_init = phb4_device_init,
5090 .device_remove = NULL,
5091 .ioda_reset = phb4_ioda_reset,
5092 .papr_errinjct_reset = phb4_papr_errinjct_reset,
5093 .pci_reinit = phb4_pci_reinit,
5094 .set_phb_mem_window = phb4_set_phb_mem_window,
5095 .phb_mmio_enable = phb4_phb_mmio_enable,
5096 .map_pe_mmio_window = phb4_map_pe_mmio_window,
5097 .map_pe_dma_window = phb4_map_pe_dma_window,
5098 .map_pe_dma_window_real = phb4_map_pe_dma_window_real,
5099 .set_option = phb4_set_option,
5100 .get_option = phb4_get_option,
5101 .set_xive_pe = phb4_set_ive_pe,
5102 .get_msi_32 = phb4_get_msi_32,
5103 .get_msi_64 = phb4_get_msi_64,
5104 .set_pe = phb4_set_pe,
5105 .set_peltv = phb4_set_peltv,
5106 .eeh_freeze_status = phb4_eeh_freeze_status,
5107 .eeh_freeze_clear = phb4_eeh_freeze_clear,
5108 .eeh_freeze_set = phb4_eeh_freeze_set,
5109 .next_error = phb4_eeh_next_error,
5110 .err_inject = phb4_err_inject,
5111 .get_diag_data2 = phb4_get_diag_data,
5112 .tce_kill = phb4_tce_kill,
5113 .set_capi_mode = phb4_set_capi_mode,
5114 .set_p2p = phb4_set_p2p,
5115 .set_capp_recovery = phb4_set_capp_recovery,
5116 .get_tunnel_bar = phb4_get_tunnel_bar,
5117 .set_tunnel_bar = phb4_set_tunnel_bar,
5118 };
5119
phb4_init_ioda3(struct phb4 * p)5120 static void phb4_init_ioda3(struct phb4 *p)
5121 {
5122 if (is_phb5()) {
5123 /*
5124 * When ABT is on, the MSIs on the PHB use the PQ state bits
5125 * of the IC and MSI triggers from the PHB are forwarded
5126 * directly to the IC ESB page. However, the LSIs are still
5127 * controlled locally on the PHB and LSI triggers use a
5128 * special offset for trigger injection.
5129 */
5130 if (phb_abt_mode(p)) {
5131 uint64_t mmio_base = xive2_get_esb_base(p->base_msi);
5132
5133 PHBDBG(p, "Using ABT mode. ESB: 0x%016llx\n", mmio_base);
5134
5135 /* Init_18 - Interrupt Notify Base Address */
5136 out_be64(p->regs + PHB_INT_NOTIFY_ADDR,
5137 PHB_INT_NOTIFY_ADDR_64K | mmio_base);
5138
5139 /* Interrupt Notify Base Index is unused */
5140 } else {
5141 p->irq_port = xive2_get_notify_port(p->chip_id,
5142 XIVE_HW_SRC_PHBn(p->index));
5143
5144 PHBDBG(p, "Using IC notif page at 0x%016llx\n",
5145 p->irq_port);
5146
5147 /* Init_18 - Interrupt Notify Base Address */
5148 out_be64(p->regs + PHB_INT_NOTIFY_ADDR, p->irq_port);
5149
5150 /* Init_19 - Interrupt Notify Base Index */
5151 out_be64(p->regs + PHB_INT_NOTIFY_INDEX,
5152 xive2_get_notify_base(p->base_msi));
5153 }
5154
5155 } else { /* p9 */
5156 p->irq_port = xive_get_notify_port(p->chip_id,
5157 XIVE_HW_SRC_PHBn(p->index));
5158 /* Init_18 - Interrupt Notify Base Address */
5159 out_be64(p->regs + PHB_INT_NOTIFY_ADDR, p->irq_port);
5160
5161 /* Init_19 - Interrupt Notify Base Index */
5162 out_be64(p->regs + PHB_INT_NOTIFY_INDEX,
5163 xive_get_notify_base(p->base_msi));
5164 }
5165
5166 /* Init_19x - Not in spec: Initialize source ID */
5167 PHBDBG(p, "Reset state SRC_ID: %016llx\n",
5168 in_be64(p->regs + PHB_LSI_SOURCE_ID));
5169 out_be64(p->regs + PHB_LSI_SOURCE_ID,
5170 SETFIELD(PHB_LSI_SRC_ID, 0ull, (p->num_irqs - 1) >> 3));
5171
5172 /* Init_20 - RTT BAR */
5173 out_be64(p->regs + PHB_RTT_BAR, (u64) p->tbl_rtt | PHB_RTT_BAR_ENABLE);
5174
5175 /* Init_21 - PELT-V BAR */
5176 out_be64(p->regs + PHB_PELTV_BAR,
5177 (u64) p->tbl_peltv | PHB_PELTV_BAR_ENABLE);
5178
5179 /* Init_22 - Setup M32 starting address */
5180 out_be64(p->regs + PHB_M32_START_ADDR, M32_PCI_START);
5181
5182 /* Init_23 - Setup PEST BAR */
5183 out_be64(p->regs + PHB_PEST_BAR,
5184 p->tbl_pest | PHB_PEST_BAR_ENABLE);
5185
5186 /* Init_24 - CRW Base Address Reg */
5187 /* See enable_capi_mode() */
5188
5189 if (is_phb4()) {
5190 /* Init_25 - ASN Compare/Mask - P9 only */
5191 out_be64(p->regs + PHB_ASN_CMPM, ((u64)ASNIND << 48) |
5192 ((u64)ASNMASK << 32) | PHB_ASN_CMPM_ENABLE);
5193 }
5194
5195 /* Init_26 - CAPI Compare/Mask */
5196 /* See enable_capi_mode() */
5197 /* if CAPP being disabled then reset CAPI Compare/Mask Register */
5198 if (p->flags & PHB4_CAPP_DISABLE)
5199 out_be64(p->regs + PHB_CAPI_CMPM, 0);
5200
5201 /* Init_27 - PCIE Outbound upper address */
5202 out_be64(p->regs + PHB_M64_UPPER_BITS, 0);
5203
5204 /* Init_28 - PHB4 Configuration */
5205 out_be64(p->regs + PHB_PHB4_CONFIG,
5206 PHB_PHB4C_32BIT_MSI_EN |
5207 PHB_PHB4C_64BIT_MSI_EN);
5208
5209 /* Init_29 - At least 256ns delay according to spec. Do a dummy
5210 * read first to flush posted writes
5211 */
5212 in_be64(p->regs + PHB_PHB4_CONFIG);
5213 time_wait_us(2);
5214
5215 /* Init_30..41 - On-chip IODA tables init */
5216 phb4_ioda_reset(&p->phb, false);
5217 }
5218
5219 /* phb4_init_rc - Initialize the Root Complex config space
5220 */
phb4_init_rc_cfg(struct phb4 * p)5221 static bool phb4_init_rc_cfg(struct phb4 *p)
5222 {
5223 int64_t ecap, aercap;
5224
5225 /* XXX Handle errors ? */
5226
5227 /* Init_46:
5228 *
5229 * Set primary bus to 0, secondary to 1 and subordinate to 0xff
5230 */
5231 phb4_pcicfg_write32(&p->phb, 0, PCI_CFG_PRIMARY_BUS, 0x00ff0100);
5232
5233 /* Init_47 - Clear errors */
5234 /* see phb4_rc_err_clear() called below */
5235
5236 /* Init_48
5237 *
5238 * PCIE Device control/status, enable error reporting, disable relaxed
5239 * ordering, set MPS to 128 (see note), clear errors.
5240 *
5241 * Note: The doc recommends to set MPS to 512. This has proved to have
5242 * some issues as it requires specific clamping of MRSS on devices and
5243 * we've found devices in the field that misbehave when doing that.
5244 *
5245 * We currently leave it all to 128 bytes (minimum setting) at init
5246 * time. The generic PCIe probing later on might apply a different
5247 * value, or the kernel will, but we play it safe at early init
5248 */
5249 if (p->ecap <= 0) {
5250 ecap = pci_find_cap(&p->phb, 0, PCI_CFG_CAP_ID_EXP);
5251 if (ecap < 0) {
5252 PHBERR(p, "Can't locate PCI-E capability\n");
5253 return false;
5254 }
5255 p->ecap = ecap;
5256 } else {
5257 ecap = p->ecap;
5258 }
5259
5260 phb4_pcicfg_write16(&p->phb, 0, ecap + PCICAP_EXP_DEVCTL,
5261 PCICAP_EXP_DEVCTL_CE_REPORT |
5262 PCICAP_EXP_DEVCTL_NFE_REPORT |
5263 PCICAP_EXP_DEVCTL_FE_REPORT |
5264 PCICAP_EXP_DEVCTL_UR_REPORT |
5265 SETFIELD(PCICAP_EXP_DEVCTL_MPS, 0, PCIE_MPS_128B));
5266
5267 /* Init_49 - Device Control/Status 2 */
5268 phb4_pcicfg_write16(&p->phb, 0, ecap + PCICAP_EXP_DCTL2,
5269 SETFIELD(PCICAP_EXP_DCTL2_CMPTOUT, 0, 0x5) |
5270 PCICAP_EXP_DCTL2_ARI_FWD);
5271
5272 /* Init_50..54
5273 *
5274 * AER inits
5275 */
5276 if (p->aercap <= 0) {
5277 aercap = pci_find_ecap(&p->phb, 0, PCIECAP_ID_AER, NULL);
5278 if (aercap < 0) {
5279 PHBERR(p, "Can't locate AER capability\n");
5280 return false;
5281 }
5282 p->aercap = aercap;
5283 } else {
5284 aercap = p->aercap;
5285 }
5286
5287 /* Disable some error reporting as per the PHB4 spec */
5288 phb4_pcicfg_write32(&p->phb, 0, aercap + PCIECAP_AER_UE_MASK,
5289 PCIECAP_AER_UE_POISON_TLP |
5290 PCIECAP_AER_UE_COMPL_TIMEOUT |
5291 PCIECAP_AER_UE_COMPL_ABORT);
5292
5293 /* Enable ECRC generation & checking */
5294 phb4_pcicfg_write32(&p->phb, 0, aercap + PCIECAP_AER_CAPCTL,
5295 PCIECAP_AER_CAPCTL_ECRCG_EN |
5296 PCIECAP_AER_CAPCTL_ECRCC_EN);
5297
5298 phb4_rc_err_clear(p);
5299
5300 return true;
5301 }
5302
phb4_init_errors(struct phb4 * p)5303 static void phb4_init_errors(struct phb4 *p)
5304 {
5305 /* Init_55..63 - PBL errors */
5306 out_be64(p->regs + 0x1900, 0xffffffffffffffffull);
5307 out_be64(p->regs + 0x1908, 0x0000000000000000ull);
5308 out_be64(p->regs + 0x1920, 0x000000004d1780f8ull);
5309 out_be64(p->regs + 0x1928, 0x0000000000000000ull);
5310 out_be64(p->regs + 0x1930, 0xffffffffb2f87f07ull);
5311 out_be64(p->regs + 0x1940, 0x0000000000000000ull);
5312 out_be64(p->regs + 0x1948, 0x0000000000000000ull);
5313 out_be64(p->regs + 0x1950, 0x0000000000000000ull);
5314 out_be64(p->regs + 0x1958, 0x0000000000000000ull);
5315
5316 /* Init_64..72 - REGB errors */
5317 out_be64(p->regs + 0x1c00, 0xffffffffffffffffull);
5318 out_be64(p->regs + 0x1c08, 0x0000000000000000ull);
5319 /* Enable/disable error status indicators that trigger irqs */
5320 if (p->has_link) {
5321 out_be64(p->regs + 0x1c20, 0x2130006efca8bc00ull);
5322 out_be64(p->regs + 0x1c30, 0xde1fff91035743ffull);
5323 } else {
5324 out_be64(p->regs + 0x1c20, 0x0000000000000000ull);
5325 out_be64(p->regs + 0x1c30, 0x0000000000000000ull);
5326 }
5327 out_be64(p->regs + 0x1c28, 0x0080000000000000ull);
5328 out_be64(p->regs + 0x1c40, 0x0000000000000000ull);
5329 out_be64(p->regs + 0x1c48, 0x0000000000000000ull);
5330 out_be64(p->regs + 0x1c50, 0x0000000000000000ull);
5331 out_be64(p->regs + 0x1c58, 0x0040000000000000ull);
5332
5333 /* Init_73..81 - TXE errors */
5334 out_be64(p->regs + 0x0d08, 0x0000000000000000ull);
5335
5336 /* Errata: Clear bit 17, otherwise a CFG write UR/CA will incorrectly
5337 * freeze a "random" PE (whatever last PE did an MMIO)
5338 */
5339 if (is_phb5()) {
5340 out_be64(p->regs + 0x0d28, 0x0000500a00000000ull);
5341 out_be64(p->regs + 0x0d00, 0xffffffffffffffffull);
5342 out_be64(p->regs + 0x0d18, 0xffffff0fffffffffull);
5343 out_be64(p->regs + 0x0d30, 0xdff7af41f7ddffdfull);
5344 } else {
5345 out_be64(p->regs + 0x0d28, 0x0000000a00000000ull);
5346 if (phb4_is_dd20(p)) {
5347 out_be64(p->regs + 0x0d00, 0xf3acff0ff7ddfff0ull);
5348 out_be64(p->regs + 0x0d18, 0xf3acff0ff7ddfff0ull);
5349 out_be64(p->regs + 0x0d30, 0xdfffbd05f7ddfff0ull); /* XXX CAPI has diff. value */
5350 } else {
5351 out_be64(p->regs + 0x0d00, 0xffffffffffffffffull);
5352 out_be64(p->regs + 0x0d18, 0xffffff0fffffffffull);
5353 out_be64(p->regs + 0x0d30, 0xdff7bd05f7ddfff0ull);
5354 }
5355 }
5356
5357 out_be64(p->regs + 0x0d40, 0x0000000000000000ull);
5358 out_be64(p->regs + 0x0d48, 0x0000000000000000ull);
5359 out_be64(p->regs + 0x0d50, 0x0000000000000000ull);
5360 out_be64(p->regs + 0x0d58, 0x0000000000000000ull);
5361
5362 /* Init_82..90 - RXE_ARB errors */
5363 out_be64(p->regs + 0x0d80, 0xffffffffffffffffull);
5364 out_be64(p->regs + 0x0d88, 0x0000000000000000ull);
5365 out_be64(p->regs + 0x0d98, 0xfffffffffbffffffull);
5366 out_be64(p->regs + 0x0da8, 0xc00018b801000060ull);
5367 /*
5368 * Errata ER20161123 says we should set the top two bits in
5369 * 0x0db0 but this causes config space accesses which don't
5370 * get a response to fence the PHB. This breaks probing,
5371 * hence we don't set them here.
5372 */
5373 out_be64(p->regs + 0x0db0, 0x3bffd703fa7fbf8full); /* XXX CAPI has diff. value */
5374 out_be64(p->regs + 0x0dc0, 0x0000000000000000ull);
5375 out_be64(p->regs + 0x0dc8, 0x0000000000000000ull);
5376 out_be64(p->regs + 0x0dd0, 0x0000000000000000ull);
5377 out_be64(p->regs + 0x0dd8, 0x0000000004000000ull);
5378
5379 /* Init_91..99 - RXE_MRG errors */
5380 out_be64(p->regs + 0x0e00, 0xffffffffffffffffull);
5381 out_be64(p->regs + 0x0e08, 0x0000000000000000ull);
5382 out_be64(p->regs + 0x0e18, 0xffffffffffffffffull);
5383 out_be64(p->regs + 0x0e28, 0x0000600000000000ull);
5384 out_be64(p->regs + 0x0e30, 0xfffffeffff7fff57ull);
5385 out_be64(p->regs + 0x0e40, 0x0000000000000000ull);
5386 out_be64(p->regs + 0x0e48, 0x0000000000000000ull);
5387 out_be64(p->regs + 0x0e50, 0x0000000000000000ull);
5388 out_be64(p->regs + 0x0e58, 0x0000000000000000ull);
5389
5390 /* Init_100..108 - RXE_TCE errors */
5391 out_be64(p->regs + 0x0e80, 0xffffffffffffffffull);
5392 out_be64(p->regs + 0x0e88, 0x0000000000000000ull);
5393 out_be64(p->regs + 0x0e98, 0xffffffffffffffffull);
5394 out_be64(p->regs + 0x0ea8, 0x60000000c0000000ull);
5395 out_be64(p->regs + 0x0eb0, 0x9faeffaf3fffffffull); /* XXX CAPI has diff. value */
5396 out_be64(p->regs + 0x0ec0, 0x0000000000000000ull);
5397 out_be64(p->regs + 0x0ec8, 0x0000000000000000ull);
5398 out_be64(p->regs + 0x0ed0, 0x0000000000000000ull);
5399 out_be64(p->regs + 0x0ed8, 0x0000000000000000ull);
5400
5401 /* Init_109..117 - RXPHB errors */
5402 out_be64(p->regs + 0x0c80, 0xffffffffffffffffull);
5403 out_be64(p->regs + 0x0c88, 0x0000000000000000ull);
5404 out_be64(p->regs + 0x0c98, 0xffffffffffffffffull);
5405 out_be64(p->regs + 0x0ca8, 0x0000004000000000ull);
5406 out_be64(p->regs + 0x0cb0, 0x35777033ff000000ull); /* XXX CAPI has diff. value */
5407 out_be64(p->regs + 0x0cc0, 0x0000000000000000ull);
5408 out_be64(p->regs + 0x0cc8, 0x0000000000000000ull);
5409 out_be64(p->regs + 0x0cd0, 0x0000000000000000ull);
5410 out_be64(p->regs + 0x0cd8, 0x0000000000000000ull);
5411
5412 /* Init_118..121 - LEM */
5413 out_be64(p->regs + 0x0c00, 0x0000000000000000ull);
5414 if (phb4_is_dd20(p)) {
5415 out_be64(p->regs + 0x0c30, 0xf3ffffffffffffffull);
5416 out_be64(p->regs + 0x0c38, 0xf3ffffffffffffffull);
5417 } else {
5418 out_be64(p->regs + 0x0c30, 0xffffffffffffffffull);
5419 out_be64(p->regs + 0x0c38, 0xffffffffffffffffull);
5420 }
5421 out_be64(p->regs + 0x0c40, 0x0000000000000000ull);
5422 }
5423
5424
phb4_wait_dlp_reset(struct phb4 * p)5425 static bool phb4_wait_dlp_reset(struct phb4 *p)
5426 {
5427 unsigned int i;
5428 uint64_t val;
5429
5430 /*
5431 * Firmware cannot access the UTL core regs or PCI config space
5432 * until the cores are out of DL_PGRESET.
5433 * DL_PGRESET should be polled until it is inactive with a value
5434 * of '0'. The recommended polling frequency is once every 1ms.
5435 * Firmware should poll at least 200 attempts before giving up.
5436 * MMIO Stores to the link are silently dropped by the UTL core if
5437 * the link is down.
5438 * MMIO Loads to the link will be dropped by the UTL core and will
5439 * eventually time-out and will return an all ones response if the
5440 * link is down.
5441 */
5442 #define DLP_RESET_ATTEMPTS 200
5443
5444 PHBDBG(p, "Waiting for DLP PG reset to complete...\n");
5445 for (i = 0; i < DLP_RESET_ATTEMPTS; i++) {
5446 val = in_be64(p->regs + PHB_PCIE_DLP_TRAIN_CTL);
5447 if (!(val & PHB_PCIE_DLP_DL_PGRESET))
5448 break;
5449 time_wait_ms(1);
5450 }
5451 if (val & PHB_PCIE_DLP_DL_PGRESET) {
5452 PHBERR(p, "Timeout waiting for DLP PG reset !\n");
5453 return false;
5454 }
5455 return true;
5456 }
phb4_init_hw(struct phb4 * p)5457 static void phb4_init_hw(struct phb4 *p)
5458 {
5459 uint64_t val, creset;
5460
5461 PHBDBG(p, "Initializing PHB...\n");
5462
5463 /* Init_1 - Sync reset
5464 *
5465 * At this point we assume the PHB has already been reset.
5466 */
5467
5468 /* Init_2 - Mask FIRs */
5469 out_be64(p->regs + PHB_LEM_ERROR_MASK, 0xffffffffffffffffull);
5470
5471 /* Init_3 - TCE tag enable */
5472 out_be64(p->regs + PHB_TCE_TAG_ENABLE, 0xffffffffffffffffull);
5473
5474 /* Init_4 - PCIE System Configuration Register
5475 *
5476 * Adjust max speed based on system config
5477 */
5478 val = in_be64(p->regs + PHB_PCIE_SCR);
5479 PHBDBG(p, "Default system config: 0x%016llx\n", val);
5480 val = SETFIELD(PHB_PCIE_SCR_MAXLINKSPEED, val, p->max_link_speed);
5481 out_be64(p->regs + PHB_PCIE_SCR, val);
5482 PHBDBG(p, "New system config : 0x%016llx\n",
5483 in_be64(p->regs + PHB_PCIE_SCR));
5484
5485 /* Init_5 - deassert CFG reset */
5486 creset = in_be64(p->regs + PHB_PCIE_CRESET);
5487 PHBDBG(p, "Initial PHB CRESET is 0x%016llx\n", creset);
5488 creset &= ~PHB_PCIE_CRESET_CFG_CORE;
5489 out_be64(p->regs + PHB_PCIE_CRESET, creset);
5490
5491 /* Init_6..13 - PCIE DLP Lane EQ control */
5492 if (p->lane_eq) {
5493 out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL0, be64_to_cpu(p->lane_eq[0]));
5494 out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL1, be64_to_cpu(p->lane_eq[1]));
5495 out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL2, be64_to_cpu(p->lane_eq[2]));
5496 out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL3, be64_to_cpu(p->lane_eq[3]));
5497 out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL40, be64_to_cpu(p->lane_eq[4]));
5498 out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL41, be64_to_cpu(p->lane_eq[5]));
5499 if (is_phb5()) {
5500 out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL50, be64_to_cpu(p->lane_eq[6]));
5501 out_be64(p->regs + PHB_PCIE_LANE_EQ_CNTL51, be64_to_cpu(p->lane_eq[7]));
5502 }
5503 }
5504 if (!p->lane_eq_en) {
5505 /* Read modify write and set to 2 bits */
5506 PHBDBG(p, "LINK: Disabling Lane EQ\n");
5507 val = in_be64(p->regs + PHB_PCIE_DLP_CTL);
5508 val |= PHB_PCIE_DLP_CTL_BYPASS_PH2 | PHB_PCIE_DLP_CTL_BYPASS_PH3;
5509 out_be64(p->regs + PHB_PCIE_DLP_CTL, val);
5510 }
5511
5512 if (is_phb5()) {
5513 /* disable scaled flow control for now. SW527785 */
5514 PHBDBG(p, "LINK: Disabling scaled flow control\n");
5515 val = in_be64(p->regs + PHB_PCIE_DLP_CTL);
5516 val |= PHB_PCIE_DLP_CTL_SFC_DISABLE;
5517 out_be64(p->regs + PHB_PCIE_DLP_CTL, val);
5518
5519 /* lane equalization settings need to be tuned on P10 */
5520 out_be64(p->regs + PHB_PCIE_PDL_PHY_EQ_CNTL,
5521 0x80F4FFFFFF0F9C00);
5522 }
5523
5524 /* Init_14 - Clear link training */
5525 phb4_pcicfg_write32(&p->phb, 0, 0x78,
5526 0x07FE0000 | p->max_link_speed);
5527
5528 /* Init_15 - deassert cores reset */
5529 /*
5530 * Lift the PHB resets but not PERST, this will be lifted
5531 * later by the initial PERST state machine
5532 */
5533 creset &= ~(PHB_PCIE_CRESET_TLDLP | PHB_PCIE_CRESET_PBL);
5534 creset |= PHB_PCIE_CRESET_PIPE_N;
5535 out_be64(p->regs + PHB_PCIE_CRESET, creset);
5536
5537 /* Init_16 - Wait for DLP PGRESET to clear */
5538 if (!phb4_wait_dlp_reset(p))
5539 goto failed;
5540
5541 /* Init_17 - PHB Control */
5542 val = PHB_CTRLR_IRQ_PGSZ_64K;
5543 val |= PHB_CTRLR_TCE_CLB_DISABLE; // HW557787 circumvention
5544 val |= SETFIELD(PHB_CTRLR_TVT_ADDR_SEL, 0ull, TVT_2_PER_PE);
5545 if (phb_pq_disable(p))
5546 val |= PHB_CTRLR_IRQ_PQ_DISABLE;
5547 if (phb_abt_mode(p))
5548 val |= PHB_CTRLR_IRQ_ABT_MODE;
5549 if (phb_can_store_eoi(p)) {
5550 val |= PHB_CTRLR_IRQ_STORE_EOI;
5551 PHBDBG(p, "store EOI is enabled\n");
5552 }
5553
5554 if (!pci_eeh_mmio)
5555 val |= PHB_CTRLR_MMIO_EEH_DISABLE;
5556
5557 out_be64(p->regs + PHB_CTRLR, val);
5558
5559 /* Init_18..41 - Architected IODA3 inits */
5560 phb4_init_ioda3(p);
5561
5562 /* Init_42..45 - Clear DLP error logs */
5563 out_be64(p->regs + 0x1aa0, 0xffffffffffffffffull);
5564 out_be64(p->regs + 0x1aa8, 0xffffffffffffffffull);
5565 out_be64(p->regs + 0x1ab0, 0xffffffffffffffffull);
5566 out_be64(p->regs + 0x1ab8, 0x0);
5567
5568
5569 /* Init_46..54 : Init root complex config space */
5570 if (!phb4_init_rc_cfg(p))
5571 goto failed;
5572
5573 /* Init_55..121 : Setup error registers */
5574 phb4_init_errors(p);
5575
5576 /* Init_122..123 : Wait for link
5577 * NOTE: At this point the spec waits for the link to come up. We
5578 * don't bother as we are doing a PERST soon.
5579 */
5580
5581 /* Init_124 : NBW. XXX TODO */
5582 /* See enable_capi_mode() */
5583
5584 /* Init_125 : Setup PCI command/status on root complex
5585 * I don't know why the spec does this now and not earlier, so
5586 * to be sure to get it right we might want to move it to the freset
5587 * state machine, though the generic PCI layer will probably do
5588 * this anyway (ie, enable MEM, etc... in the RC)
5589
5590 */
5591 phb4_pcicfg_write16(&p->phb, 0, PCI_CFG_CMD,
5592 PCI_CFG_CMD_MEM_EN |
5593 PCI_CFG_CMD_BUS_MASTER_EN);
5594
5595 /* Clear errors */
5596 phb4_pcicfg_write16(&p->phb, 0, PCI_CFG_STAT,
5597 PCI_CFG_STAT_SENT_TABORT |
5598 PCI_CFG_STAT_RECV_TABORT |
5599 PCI_CFG_STAT_RECV_MABORT |
5600 PCI_CFG_STAT_SENT_SERR |
5601 PCI_CFG_STAT_RECV_PERR);
5602
5603 /* Init_126..130 - Re-enable error interrupts */
5604 phb4_int_unmask_all(p);
5605
5606 /* Init_131 - Re-enable LEM error mask */
5607 out_be64(p->regs + PHB_LEM_ERROR_MASK, 0x0000000000000000ull);
5608
5609
5610 /* Init_132 - Enable DMA address speculation */
5611 out_be64(p->regs + PHB_TCE_SPEC_CTL, 0x0000000000000000ull);
5612
5613 /* Init_133 - Timeout Control Register 1 */
5614 out_be64(p->regs + PHB_TIMEOUT_CTRL1, 0x0015150000150000ull);
5615
5616 /* Init_134 - Timeout Control Register 2 */
5617 out_be64(p->regs + PHB_TIMEOUT_CTRL2, 0x0000151500000000ull);
5618
5619 /* Init_135 - PBL Timeout Control Register */
5620 out_be64(p->regs + PHB_PBL_TIMEOUT_CTRL, 0x2013000000000000ull);
5621
5622 /* Mark the PHB as functional which enables all the various sequences */
5623 p->broken = false;
5624
5625 PHBDBG(p, "Initialization complete\n");
5626
5627 return;
5628
5629 failed:
5630 PHBERR(p, "Initialization failed\n");
5631 p->broken = true;
5632 }
5633
5634 /* FIXME: Use scoms rather than MMIO incase we are fenced */
phb4_read_capabilities(struct phb4 * p)5635 static bool phb4_read_capabilities(struct phb4 *p)
5636 {
5637 uint64_t val;
5638
5639 /* XXX Should make sure ETU is out of reset ! */
5640
5641 /* Grab version and fit it in an int */
5642 val = phb4_read_reg_asb(p, PHB_VERSION);
5643 if (val == 0 || val == 0xffffffffffffffffUL) {
5644 PHBERR(p, "Failed to read version, PHB appears broken\n");
5645 return false;
5646 }
5647
5648 p->rev = ((val >> 16) & 0x00ff0000) | (val & 0xffff);
5649 PHBDBG(p, "Core revision 0x%x\n", p->rev);
5650
5651 /* Read EEH capabilities */
5652 val = in_be64(p->regs + PHB_PHB4_EEH_CAP);
5653 if (val == 0xffffffffffffffffUL) {
5654 PHBERR(p, "Failed to read EEH cap, PHB appears broken\n");
5655 return false;
5656 }
5657 p->max_num_pes = val >> 52;
5658 if (p->max_num_pes >= 512) {
5659 p->mrt_size = 16;
5660 p->mbt_size = 32;
5661 p->tvt_size = 1024;
5662 } else {
5663 p->mrt_size = 8;
5664 p->mbt_size = 16;
5665 p->tvt_size = 512;
5666 }
5667
5668 val = in_be64(p->regs + PHB_PHB4_IRQ_CAP);
5669 if (val == 0xffffffffffffffffUL) {
5670 PHBERR(p, "Failed to read IRQ cap, PHB appears broken\n");
5671 return false;
5672 }
5673 p->num_irqs = val & 0xffff;
5674
5675 /* This works for 512 PEs. FIXME calculate for any hardware
5676 * size returned above
5677 */
5678 p->tbl_peltv_size = PELTV_TABLE_SIZE_MAX;
5679
5680 p->tbl_pest_size = p->max_num_pes*16;
5681
5682 PHBDBG(p, "Found %d max PEs and %d IRQs \n",
5683 p->max_num_pes, p->num_irqs);
5684
5685 return true;
5686 }
5687
phb4_allocate_tables(struct phb4 * p)5688 static void phb4_allocate_tables(struct phb4 *p)
5689 {
5690 uint32_t i;
5691
5692 /* XXX Our current memalign implementation sucks,
5693 *
5694 * It will do the job, however it doesn't support freeing
5695 * the memory and wastes space by always allocating twice
5696 * as much as requested (size + alignment)
5697 */
5698 p->tbl_rtt = local_alloc(p->chip_id, RTT_TABLE_SIZE, RTT_TABLE_SIZE);
5699 assert(p->tbl_rtt);
5700 for (i = 0; i < RTT_TABLE_ENTRIES; i++)
5701 p->tbl_rtt[i] = cpu_to_be16(PHB4_RESERVED_PE_NUM(p));
5702
5703 p->tbl_peltv = local_alloc(p->chip_id, p->tbl_peltv_size, p->tbl_peltv_size);
5704 assert(p->tbl_peltv);
5705 memset(p->tbl_peltv, 0, p->tbl_peltv_size);
5706
5707 p->tbl_pest = (uint64_t)local_alloc(p->chip_id, p->tbl_pest_size, p->tbl_pest_size);
5708 assert(p->tbl_pest);
5709 memset((void *)p->tbl_pest, 0, p->tbl_pest_size);
5710 }
5711
phb4_add_properties(struct phb4 * p)5712 static void phb4_add_properties(struct phb4 *p)
5713 {
5714 struct dt_node *np = p->phb.dt_node;
5715 uint32_t lsibase, icsp = get_ics_phandle();
5716 uint64_t m32b, m64b, m64s;
5717
5718 /* Add various properties that HB doesn't have to
5719 * add, some of them simply because they result from
5720 * policy decisions made in skiboot rather than in HB
5721 * such as the MMIO windows going to PCI, interrupts,
5722 * etc...
5723 */
5724 dt_add_property_cells(np, "#address-cells", 3);
5725 dt_add_property_cells(np, "#size-cells", 2);
5726 dt_add_property_cells(np, "#interrupt-cells", 1);
5727 dt_add_property_cells(np, "bus-range", 0, 0xff);
5728 dt_add_property_cells(np, "clock-frequency", 0x200, 0); /* ??? */
5729
5730 dt_add_property_cells(np, "interrupt-parent", icsp);
5731
5732 /* XXX FIXME: add slot-name */
5733 //dt_property_cell("bus-width", 8); /* Figure it out from VPD ? */
5734
5735 /* "ranges", we only expose M32 (PHB4 doesn't do IO)
5736 *
5737 * Note: The kernel expects us to have chopped of 64k from the
5738 * M32 size (for the 32-bit MSIs). If we don't do that, it will
5739 * get confused (OPAL does it)
5740 */
5741 m32b = cleanup_addr(p->mm1_base);
5742 m64b = cleanup_addr(p->mm0_base);
5743 m64s = p->mm0_size;
5744 dt_add_property_cells(np, "ranges",
5745 /* M32 space */
5746 0x02000000, 0x00000000, M32_PCI_START,
5747 hi32(m32b), lo32(m32b), 0, M32_PCI_SIZE - 0x10000);
5748
5749 /* XXX FIXME: add opal-memwin32, dmawins, etc... */
5750 dt_add_property_u64s(np, "ibm,opal-m64-window", m64b, m64b, m64s);
5751 dt_add_property(np, "ibm,opal-single-pe", NULL, 0);
5752 dt_add_property_cells(np, "ibm,opal-num-pes", p->num_pes);
5753 dt_add_property_cells(np, "ibm,opal-reserved-pe",
5754 PHB4_RESERVED_PE_NUM(p));
5755 dt_add_property_cells(np, "ibm,opal-msi-ranges",
5756 p->base_msi, p->num_irqs - 8);
5757 /* M64 ranges start at 1 as MBT0 is used for M32 */
5758 dt_add_property_cells(np, "ibm,opal-available-m64-ranges",
5759 1, p->mbt_size - 1);
5760 dt_add_property_cells(np, "ibm,supported-tce-sizes",
5761 12, // 4K
5762 16, // 64K
5763 21, // 2M
5764 30); // 1G
5765
5766 /* Tell Linux about alignment limits for segment splits.
5767 *
5768 * XXX We currently only expose splits of 1 and "num PEs",
5769 */
5770 dt_add_property_cells(np, "ibm,opal-m64-segment-splits",
5771 /* Full split, number of segments: */
5772 p->num_pes,
5773 /* Encoding passed to the enable call */
5774 OPAL_ENABLE_M64_SPLIT,
5775 /* Alignement/size restriction in #bits*/
5776 /* XXX VERIFY VALUE */
5777 12,
5778 /* Unused */
5779 0,
5780 /* single PE, number of segments: */
5781 1,
5782 /* Encoding passed to the enable call */
5783 OPAL_ENABLE_M64_NON_SPLIT,
5784 /* Alignement/size restriction in #bits*/
5785 /* XXX VERIFY VALUE */
5786 12,
5787 /* Unused */
5788 0);
5789
5790 /* The interrupt maps will be generated in the RC node by the
5791 * PCI code based on the content of this structure:
5792 */
5793 lsibase = p->base_lsi;
5794 p->phb.lstate.int_size = 2;
5795 p->phb.lstate.int_val[0][0] = lsibase + PHB4_LSI_PCIE_INTA;
5796 p->phb.lstate.int_val[0][1] = 1;
5797 p->phb.lstate.int_val[1][0] = lsibase + PHB4_LSI_PCIE_INTB;
5798 p->phb.lstate.int_val[1][1] = 1;
5799 p->phb.lstate.int_val[2][0] = lsibase + PHB4_LSI_PCIE_INTC;
5800 p->phb.lstate.int_val[2][1] = 1;
5801 p->phb.lstate.int_val[3][0] = lsibase + PHB4_LSI_PCIE_INTD;
5802 p->phb.lstate.int_val[3][1] = 1;
5803 p->phb.lstate.int_parent[0] = icsp;
5804 p->phb.lstate.int_parent[1] = icsp;
5805 p->phb.lstate.int_parent[2] = icsp;
5806 p->phb.lstate.int_parent[3] = icsp;
5807
5808 /* Indicators for variable tables */
5809 dt_add_property_cells(np, "ibm,opal-rtt-table",
5810 hi32((u64) p->tbl_rtt), lo32((u64) p->tbl_rtt), RTT_TABLE_SIZE);
5811
5812 dt_add_property_cells(np, "ibm,opal-peltv-table",
5813 hi32((u64) p->tbl_peltv), lo32((u64) p->tbl_peltv),
5814 p->tbl_peltv_size);
5815
5816 dt_add_property_cells(np, "ibm,opal-pest-table",
5817 hi32(p->tbl_pest), lo32(p->tbl_pest), p->tbl_pest_size);
5818
5819 dt_add_property_cells(np, "ibm,phb-diag-data-size",
5820 sizeof(struct OpalIoPhb4ErrorData));
5821
5822 /* Indicate to Linux that CAPP timebase sync is supported */
5823 dt_add_property_string(np, "ibm,capp-timebase-sync", NULL);
5824
5825 /* Tell Linux Compare/Mask indication values */
5826 dt_add_property_cells(np, "ibm,phb-indications", CAPIIND, ASNIND,
5827 NBWIND);
5828 }
5829
phb4_calculate_windows(struct phb4 * p)5830 static bool phb4_calculate_windows(struct phb4 *p)
5831 {
5832 const struct dt_property *prop;
5833
5834 /* Get PBCQ MMIO windows from device-tree */
5835 prop = dt_require_property(p->phb.dt_node,
5836 "ibm,mmio-windows", -1);
5837 assert(prop->len >= (2 * sizeof(uint64_t)));
5838
5839 p->mm0_base = dt_property_get_u64(prop, 0);
5840 p->mm0_size = dt_property_get_u64(prop, 1);
5841 if (prop->len > 16) {
5842 p->mm1_base = dt_property_get_u64(prop, 2);
5843 p->mm1_size = dt_property_get_u64(prop, 3);
5844 }
5845
5846 /* Sort them so that 0 is big and 1 is small */
5847 if (p->mm1_size && p->mm1_size > p->mm0_size) {
5848 uint64_t b = p->mm0_base;
5849 uint64_t s = p->mm0_size;
5850 p->mm0_base = p->mm1_base;
5851 p->mm0_size = p->mm1_size;
5852 p->mm1_base = b;
5853 p->mm1_size = s;
5854 }
5855
5856 /* If 1 is too small, ditch it */
5857 if (p->mm1_size < M32_PCI_SIZE)
5858 p->mm1_size = 0;
5859
5860 /* If 1 doesn't exist, carve it out of 0 */
5861 if (p->mm1_size == 0) {
5862 p->mm0_size /= 2;
5863 p->mm1_base = p->mm0_base + p->mm0_size;
5864 p->mm1_size = p->mm0_size;
5865 }
5866
5867 /* Crop mm1 to our desired size */
5868 if (p->mm1_size > M32_PCI_SIZE)
5869 p->mm1_size = M32_PCI_SIZE;
5870
5871 return true;
5872 }
5873
phb4_err_interrupt(struct irq_source * is,uint32_t isn)5874 static void phb4_err_interrupt(struct irq_source *is, uint32_t isn)
5875 {
5876 struct phb4 *p = is->data;
5877
5878 PHBDBG(p, "Got interrupt 0x%08x\n", isn);
5879
5880 /* mask the interrupt conditions to prevent it from re-firing */
5881 phb4_int_mask_active(p);
5882
5883 /* Update pending event */
5884 opal_update_pending_evt(OPAL_EVENT_PCI_ERROR,
5885 OPAL_EVENT_PCI_ERROR);
5886
5887 /* If the PHB is broken, go away */
5888 if (p->broken)
5889 return;
5890
5891 /*
5892 * Mark the PHB has pending error so that the OS
5893 * can handle it at late point.
5894 */
5895 phb4_set_err_pending(p, true);
5896 }
5897
phb4_lsi_attributes(struct irq_source * is __unused,uint32_t isn __unused)5898 static uint64_t phb4_lsi_attributes(struct irq_source *is __unused,
5899 uint32_t isn __unused)
5900 {
5901 #ifndef DISABLE_ERR_INTS
5902 struct phb4 *p = is->data;
5903 uint32_t idx = isn - p->base_lsi;
5904
5905 if (idx == PHB4_LSI_PCIE_INF || idx == PHB4_LSI_PCIE_ER)
5906 return IRQ_ATTR_TARGET_OPAL | IRQ_ATTR_TARGET_RARE | IRQ_ATTR_TYPE_LSI;
5907 #endif
5908 return IRQ_ATTR_TARGET_LINUX;
5909 }
5910
phb4_lsi_name(struct irq_source * is,uint32_t isn)5911 static char *phb4_lsi_name(struct irq_source *is, uint32_t isn)
5912 {
5913 struct phb4 *p = is->data;
5914 uint32_t idx = isn - p->base_lsi;
5915 char buf[32];
5916
5917 if (idx == PHB4_LSI_PCIE_INF)
5918 snprintf(buf, 32, "phb#%04x-inf", p->phb.opal_id);
5919 else if (idx == PHB4_LSI_PCIE_ER)
5920 snprintf(buf, 32, "phb#%04x-err", p->phb.opal_id);
5921 else
5922 assert(0); /* PCIe LSIs should never be directed to OPAL */
5923
5924 return strdup(buf);
5925 }
5926
5927 static const struct irq_source_ops phb4_lsi_ops = {
5928 .interrupt = phb4_err_interrupt,
5929 .attributes = phb4_lsi_attributes,
5930 .name = phb4_lsi_name,
5931 };
5932
5933 static __be64 lane_eq_default[8] = {
5934 CPU_TO_BE64(0x5454545454545454UL), CPU_TO_BE64(0x5454545454545454UL),
5935 CPU_TO_BE64(0x5454545454545454UL), CPU_TO_BE64(0x5454545454545454UL),
5936 CPU_TO_BE64(0x7777777777777777UL), CPU_TO_BE64(0x7777777777777777UL),
5937 CPU_TO_BE64(0x7777777777777777UL), CPU_TO_BE64(0x7777777777777777UL),
5938 };
5939
5940 static __be64 lane_eq_phb5_default[8] = {
5941 CPU_TO_BE64(0x4444444444444444UL), CPU_TO_BE64(0x4444444444444444UL),
5942 CPU_TO_BE64(0x4444444444444444UL), CPU_TO_BE64(0x4444444444444444UL),
5943 CPU_TO_BE64(0x4444444444444444UL), CPU_TO_BE64(0x4444444444444444UL),
5944 CPU_TO_BE64(0x9999999999999999UL), CPU_TO_BE64(0x9999999999999999UL),
5945 };
5946
phb4_create(struct dt_node * np)5947 static void phb4_create(struct dt_node *np)
5948 {
5949 const struct dt_property *prop;
5950 struct phb4 *p;
5951 struct pci_slot *slot;
5952 size_t lane_eq_len, lane_eq_len_req;
5953 struct dt_node *iplp;
5954 char *path;
5955 uint32_t irq_base, irq_flags;
5956 int i, eq_reg_count;
5957 int chip_id;
5958
5959 chip_id = dt_prop_get_u32(np, "ibm,chip-id");
5960 p = local_alloc(chip_id, sizeof(struct phb4), 8);
5961 assert(p);
5962 memset(p, 0x0, sizeof(struct phb4));
5963
5964 /* Populate base stuff */
5965 p->index = dt_prop_get_u32(np, "ibm,phb-index");
5966 p->chip_id = chip_id;
5967 p->pec = dt_prop_get_u32(np, "ibm,phb-pec-index");
5968 p->regs = (void *)dt_get_address(np, 0, NULL);
5969 p->int_mmio = (void *)dt_get_address(np, 1, NULL);
5970 p->phb.dt_node = np;
5971 p->phb.ops = &phb4_ops;
5972 p->phb.phb_type = phb_type_pcie_v4;
5973 p->phb.scan_map = 0x1; /* Only device 0 to scan */
5974
5975 if (!phb4_calculate_windows(p))
5976 return;
5977
5978 /* Get the various XSCOM register bases from the device-tree */
5979 prop = dt_require_property(np, "ibm,xscom-bases", 5 * sizeof(uint32_t));
5980 p->pe_xscom = dt_property_get_cell(prop, 0);
5981 p->pe_stk_xscom = dt_property_get_cell(prop, 1);
5982 p->pci_xscom = dt_property_get_cell(prop, 2);
5983 p->pci_stk_xscom = dt_property_get_cell(prop, 3);
5984 p->etu_xscom = dt_property_get_cell(prop, 4);
5985
5986 /*
5987 * We skip the initial PERST assertion requested by the generic code
5988 * when doing a cold boot because we are coming out of cold boot already
5989 * so we save boot time that way. The PERST state machine will still
5990 * handle waiting for the link to come up, it will just avoid actually
5991 * asserting & deasserting the PERST output
5992 *
5993 * For a hot IPL, we still do a PERST
5994 *
5995 * Note: In absence of property (ie, FSP-less), we stick to the old
5996 * behaviour and set skip_perst to true
5997 */
5998 p->skip_perst = true; /* Default */
5999
6000 iplp = dt_find_by_path(dt_root, "ipl-params/ipl-params");
6001 if (iplp) {
6002 const char *ipl_type = dt_prop_get_def(iplp, "cec-major-type", NULL);
6003 if (ipl_type && (!strcmp(ipl_type, "hot")))
6004 p->skip_perst = false;
6005 }
6006
6007 /* By default link is assumed down */
6008 p->has_link = false;
6009
6010 /* We register the PHB before we initialize it so we
6011 * get a useful OPAL ID for it
6012 */
6013 pci_register_phb(&p->phb, phb4_get_opal_id(p->chip_id, p->index));
6014
6015 /* Create slot structure */
6016 slot = phb4_slot_create(&p->phb);
6017 if (!slot)
6018 PHBERR(p, "Cannot create PHB slot\n");
6019
6020 /* Hello ! */
6021 path = dt_get_path(np);
6022 PHBINF(p, "Found %s @%p\n", path, p->regs);
6023 PHBINF(p, " M32 [0x%016llx..0x%016llx]\n",
6024 p->mm1_base, p->mm1_base + p->mm1_size - 1);
6025 PHBINF(p, " M64 [0x%016llx..0x%016llx]\n",
6026 p->mm0_base, p->mm0_base + p->mm0_size - 1);
6027 free(path);
6028
6029 /* Find base location code from root node */
6030 p->phb.base_loc_code = dt_prop_get_def(dt_root,
6031 "ibm,io-base-loc-code", NULL);
6032 if (!p->phb.base_loc_code)
6033 PHBDBG(p, "Base location code not found !\n");
6034
6035 /*
6036 * Grab CEC IO VPD load info from the root of the device-tree,
6037 * on P8 there's a single such VPD for the whole machine
6038 */
6039 prop = dt_find_property(dt_root, "ibm,io-vpd");
6040 if (!prop) {
6041 /* LX VPD Lid not already loaded */
6042 if (platform.vpd_iohub_load)
6043 platform.vpd_iohub_load(dt_root);
6044 }
6045
6046 /* Obtain informatin about the PHB from the hardware directly */
6047 if (!phb4_read_capabilities(p))
6048 goto failed;
6049
6050 p->max_link_speed = phb4_get_max_link_speed(p, np);
6051 p->max_link_width = phb4_get_max_link_width(p);
6052 PHBINF(p, "Max link speed: GEN%i, max link width %i\n",
6053 p->max_link_speed, p->max_link_width);
6054
6055 /* Check for lane equalization values from HB or HDAT */
6056 p->lane_eq_en = true;
6057 p->lane_eq = dt_prop_get_def_size(np, "ibm,lane-eq", NULL, &lane_eq_len);
6058 if (is_phb5())
6059 eq_reg_count = 8;
6060 else
6061 eq_reg_count = 6;
6062 lane_eq_len_req = eq_reg_count * 8;
6063 if (p->lane_eq) {
6064 if (lane_eq_len < lane_eq_len_req) {
6065 PHBERR(p, "Device-tree has ibm,lane-eq too short: %ld"
6066 " (want %ld)\n", lane_eq_len, lane_eq_len_req);
6067 p->lane_eq = NULL;
6068 }
6069 } else {
6070 PHBDBG(p, "Using default lane equalization settings\n");
6071 if (is_phb5())
6072 p->lane_eq = lane_eq_phb5_default;
6073 else
6074 p->lane_eq = lane_eq_default;
6075 }
6076 if (p->lane_eq) {
6077 PHBDBG(p, "Override lane equalization settings:\n");
6078 for (i = 0 ; i < lane_eq_len_req/(8 * 2) ; i++)
6079 PHBDBG(p, " 0x%016llx 0x%016llx\n",
6080 be64_to_cpu(p->lane_eq[2 * i]),
6081 be64_to_cpu(p->lane_eq[2 * i + 1]));
6082 }
6083
6084 /* Allocate a block of interrupts. We need to know if it needs
6085 * 2K or 4K interrupts ... for now we just use 4K but that
6086 * needs to be fixed
6087 */
6088 if (is_phb5())
6089 irq_base = xive2_alloc_hw_irqs(p->chip_id, p->num_irqs, p->num_irqs);
6090 else
6091 irq_base = xive_alloc_hw_irqs(p->chip_id, p->num_irqs, p->num_irqs);
6092 if (irq_base == XIVE_IRQ_ERROR) {
6093 PHBERR(p, "Failed to allocate %d interrupt sources\n",
6094 p->num_irqs);
6095 goto failed;
6096 }
6097 p->base_msi = irq_base;
6098 p->base_lsi = irq_base + p->num_irqs - 8;
6099 p->num_pes = p->max_num_pes;
6100
6101 /* Allocate the SkiBoot internal in-memory tables for the PHB */
6102 phb4_allocate_tables(p);
6103
6104 phb4_add_properties(p);
6105
6106 /* Clear IODA3 cache */
6107 phb4_init_ioda_cache(p);
6108
6109 /* Get the HW up and running */
6110 phb4_init_hw(p);
6111
6112 /* init capp that might get attached to the phb */
6113 if (is_phb4())
6114 phb4_init_capp(p);
6115
6116 /* Compute XIVE source flags depending on PHB revision */
6117 irq_flags = 0;
6118 if (phb_can_store_eoi(p))
6119 irq_flags |= XIVE_SRC_STORE_EOI;
6120 else
6121 irq_flags |= XIVE_SRC_TRIGGER_PAGE;
6122
6123 if (is_phb5()) {
6124 /*
6125 * Register sources with XIVE. If offloading is on, use the
6126 * ESB pages of the XIVE IC for the MSI sources instead of the
6127 * ESB pages of the PHB.
6128 */
6129 if (phb_pq_disable(p) || phb_abt_mode(p)) {
6130 xive2_register_esb_source(p->base_msi, p->num_irqs - 8);
6131 } else {
6132 xive2_register_hw_source(p->base_msi,
6133 p->num_irqs - 8, 16,
6134 p->int_mmio, irq_flags,
6135 NULL, NULL);
6136 }
6137
6138 /*
6139 * LSI sources always use the ESB pages of the PHB.
6140 */
6141 xive2_register_hw_source(p->base_lsi, 8, 16,
6142 p->int_mmio + ((p->num_irqs - 8) << 16),
6143 XIVE_SRC_LSI | irq_flags, p, &phb4_lsi_ops);
6144 } else {
6145 /* Register all interrupt sources with XIVE */
6146 xive_register_hw_source(p->base_msi, p->num_irqs - 8, 16,
6147 p->int_mmio, irq_flags, NULL, NULL);
6148
6149 xive_register_hw_source(p->base_lsi, 8, 16,
6150 p->int_mmio + ((p->num_irqs - 8) << 16),
6151 XIVE_SRC_LSI, p, &phb4_lsi_ops);
6152 }
6153
6154 /* Platform additional setup */
6155 if (platform.pci_setup_phb)
6156 platform.pci_setup_phb(&p->phb, p->index);
6157
6158 dt_add_property_string(np, "status", "okay");
6159
6160 return;
6161
6162 failed:
6163 p->broken = true;
6164
6165 /* Tell Linux it's broken */
6166 dt_add_property_string(np, "status", "error");
6167 }
6168
phb4_probe_stack(struct dt_node * stk_node,uint32_t pec_index,uint32_t nest_base,uint32_t pci_base)6169 static void phb4_probe_stack(struct dt_node *stk_node, uint32_t pec_index,
6170 uint32_t nest_base, uint32_t pci_base)
6171 {
6172 enum phys_map_type phys_mmio64, phys_mmio32, phys_xive_esb, phys_reg_spc;
6173 uint32_t pci_stack, nest_stack, etu_base, gcid, phb_num, stk_index;
6174 uint64_t val, phb_bar = 0, irq_bar = 0, bar_en;
6175 uint64_t mmio0_bar = 0, mmio0_bmask, mmio0_sz;
6176 uint64_t mmio1_bar = 0, mmio1_bmask, mmio1_sz;
6177 void *foo;
6178 __be64 mmio_win[4];
6179 unsigned int mmio_win_sz;
6180 struct dt_node *np;
6181 char *path;
6182 uint64_t capp_ucode_base;
6183 unsigned int max_link_speed;
6184 int rc;
6185
6186 assert(is_phb5() || is_phb4()); /* Sanity check */
6187
6188 gcid = dt_get_chip_id(stk_node);
6189 stk_index = dt_prop_get_u32(stk_node, "reg");
6190 phb_num = dt_prop_get_u32(stk_node, "ibm,phb-index");
6191 path = dt_get_path(stk_node);
6192 if (is_phb5()) {
6193 phys_mmio64 = PHB5_64BIT_MMIO;
6194 phys_mmio32 = PHB5_32BIT_MMIO;
6195 phys_xive_esb = PHB5_XIVE_ESB;
6196 phys_reg_spc = PHB5_REG_SPC;
6197 prlog(PR_INFO, "PHB: Chip %d Found PHB5 PBCQ%d Stack %d at %s\n",
6198 gcid, pec_index, stk_index, path);
6199 } else {
6200 phys_mmio64 = PHB4_64BIT_MMIO;
6201 phys_mmio32 = PHB4_32BIT_MMIO;
6202 phys_xive_esb = PHB4_XIVE_ESB;
6203 phys_reg_spc = PHB4_REG_SPC;
6204 prlog(PR_INFO, "PHB: Chip %d Found PHB4 PBCQ%d Stack %d at %s\n",
6205 gcid, pec_index, stk_index, path);
6206 }
6207 free(path);
6208
6209 pci_stack = pci_base + 0x40 * (stk_index + 1);
6210 nest_stack = nest_base + 0x40 * (stk_index + 1);
6211 etu_base = pci_base + 0x100 + 0x40 * stk_index;
6212
6213 prlog(PR_DEBUG, "PHB[%d:%d] X[PE]=0x%08x/0x%08x X[PCI]=0x%08x/0x%08x X[ETU]=0x%08x\n",
6214 gcid, phb_num, nest_base, nest_stack, pci_base, pci_stack, etu_base);
6215
6216 /* Default BAR enables */
6217 bar_en = 0;
6218
6219 /* Initialize PHB register BAR */
6220 phys_map_get(gcid, phys_reg_spc, phb_num, &phb_bar, NULL);
6221 rc = xscom_write(gcid, nest_stack + XPEC_NEST_STK_PHB_REG_BAR,
6222 phb_bar << 8);
6223
6224 /* A scom error here probably indicates a defective/garded PHB */
6225 if (rc != OPAL_SUCCESS) {
6226 prerror("PHB[%d:%d] Unable to set PHB BAR. Error=%d\n",
6227 gcid, phb_num, rc);
6228 return;
6229 }
6230
6231 bar_en |= XPEC_NEST_STK_BAR_EN_PHB;
6232
6233 /* Same with INT BAR (ESB) */
6234 phys_map_get(gcid, phys_xive_esb, phb_num, &irq_bar, NULL);
6235 xscom_write(gcid, nest_stack + XPEC_NEST_STK_IRQ_BAR, irq_bar << 8);
6236 bar_en |= XPEC_NEST_STK_BAR_EN_INT;
6237
6238
6239 /* Same with MMIO windows */
6240 phys_map_get(gcid, phys_mmio64, phb_num, &mmio0_bar, &mmio0_sz);
6241 mmio0_bmask = (~(mmio0_sz - 1)) & 0x00FFFFFFFFFFFFFFULL;
6242 xscom_write(gcid, nest_stack + XPEC_NEST_STK_MMIO_BAR0, mmio0_bar << 8);
6243 xscom_write(gcid, nest_stack + XPEC_NEST_STK_MMIO_BAR0_MASK, mmio0_bmask << 8);
6244
6245 phys_map_get(gcid, phys_mmio32, phb_num, &mmio1_bar, &mmio1_sz);
6246 mmio1_bmask = (~(mmio1_sz - 1)) & 0x00FFFFFFFFFFFFFFULL;
6247 xscom_write(gcid, nest_stack + XPEC_NEST_STK_MMIO_BAR1, mmio1_bar << 8);
6248 xscom_write(gcid, nest_stack + XPEC_NEST_STK_MMIO_BAR1_MASK, mmio1_bmask << 8);
6249
6250 /* Build MMIO windows list */
6251 mmio_win_sz = 0;
6252 if (mmio0_bar) {
6253 mmio_win[mmio_win_sz++] = cpu_to_be64(mmio0_bar);
6254 mmio_win[mmio_win_sz++] = cpu_to_be64(mmio0_sz);
6255 bar_en |= XPEC_NEST_STK_BAR_EN_MMIO0;
6256 }
6257 if (mmio1_bar) {
6258 mmio_win[mmio_win_sz++] = cpu_to_be64(mmio1_bar);
6259 mmio_win[mmio_win_sz++] = cpu_to_be64(mmio1_sz);
6260 bar_en |= XPEC_NEST_STK_BAR_EN_MMIO1;
6261 }
6262
6263 /* Set the appropriate enables */
6264 xscom_read(gcid, nest_stack + XPEC_NEST_STK_BAR_EN, &val);
6265 val |= bar_en;
6266 xscom_write(gcid, nest_stack + XPEC_NEST_STK_BAR_EN, val);
6267
6268 /* No MMIO windows ? Barf ! */
6269 if (mmio_win_sz == 0) {
6270 prerror("PHB[%d:%d] No MMIO windows enabled !\n", gcid, phb_num);
6271 return;
6272 }
6273
6274 /* Clear errors in PFIR and NFIR */
6275 xscom_write(gcid, pci_stack + XPEC_PCI_STK_PCI_FIR, 0);
6276 xscom_write(gcid, nest_stack + XPEC_NEST_STK_PCI_NFIR, 0);
6277
6278 /* Check ETU reset */
6279 xscom_read(gcid, pci_stack + XPEC_PCI_STK_ETU_RESET, &val);
6280 prlog_once(PR_DEBUG, "ETU reset: %llx\n", val);
6281 xscom_write(gcid, pci_stack + XPEC_PCI_STK_ETU_RESET, 0);
6282 time_wait_ms(1);
6283
6284 // show we can read phb mmio space
6285 foo = (void *)(phb_bar + 0x800); // phb version register
6286 prlog_once(PR_DEBUG, "Version reg: 0x%016llx\n", in_be64(foo));
6287
6288 /* Create PHB node */
6289 np = dt_new_addr(dt_root, "pciex", phb_bar);
6290 if (!np)
6291 return;
6292
6293 if (is_phb5())
6294 dt_add_property_strings(np, "compatible", "ibm,power10-pciex", "ibm,ioda3-phb");
6295 else
6296 dt_add_property_strings(np, "compatible", "ibm,power9-pciex", "ibm,ioda3-phb");
6297 dt_add_property_strings(np, "device_type", "pciex");
6298 dt_add_property_u64s(np, "reg",
6299 phb_bar, 0x1000,
6300 irq_bar, 0x10000000);
6301
6302 /* Everything else is handled later by skiboot, we just
6303 * stick a few hints here
6304 */
6305 dt_add_property_cells(np, "ibm,xscom-bases",
6306 nest_base, nest_stack, pci_base, pci_stack, etu_base);
6307 dt_add_property(np, "ibm,mmio-windows", mmio_win, 8 * mmio_win_sz);
6308 dt_add_property_cells(np, "ibm,phb-index", phb_num);
6309 dt_add_property_cells(np, "ibm,phb-pec-index", pec_index);
6310 dt_add_property_cells(np, "ibm,phb-stack", stk_node->phandle);
6311 dt_add_property_cells(np, "ibm,phb-stack-index", stk_index);
6312 dt_add_property_cells(np, "ibm,chip-id", gcid);
6313
6314 /* read the hub-id out of the pbcq node */
6315 if (dt_has_node_property(stk_node->parent, "ibm,hub-id", NULL)) {
6316 uint32_t hub_id;
6317
6318 hub_id = dt_prop_get_u32(stk_node->parent, "ibm,hub-id");
6319 dt_add_property_cells(np, "ibm,hub-id", hub_id);
6320 }
6321
6322 if (dt_has_node_property(stk_node->parent, "ibm,loc-code", NULL)) {
6323 const char *lc = dt_prop_get(stk_node->parent, "ibm,loc-code");
6324 dt_add_property_string(np, "ibm,loc-code", lc);
6325 }
6326 if (dt_has_node_property(stk_node, "ibm,lane-eq", NULL)) {
6327 size_t leq_size;
6328 const void *leq = dt_prop_get_def_size(stk_node, "ibm,lane-eq",
6329 NULL, &leq_size);
6330 if (leq != NULL && leq_size >= 6 * 8)
6331 dt_add_property(np, "ibm,lane-eq", leq, leq_size);
6332 }
6333 if (dt_has_node_property(stk_node, "ibm,capp-ucode", NULL)) {
6334 capp_ucode_base = dt_prop_get_u32(stk_node, "ibm,capp-ucode");
6335 dt_add_property_cells(np, "ibm,capp-ucode", capp_ucode_base);
6336 }
6337 if (dt_has_node_property(stk_node, "ibm,max-link-speed", NULL)) {
6338 max_link_speed = dt_prop_get_u32(stk_node, "ibm,max-link-speed");
6339 dt_add_property_cells(np, "ibm,max-link-speed", max_link_speed);
6340 }
6341 dt_add_property_cells(np, "ibm,capi-flags",
6342 OPAL_PHB_CAPI_FLAG_SNOOP_CONTROL);
6343
6344 add_chip_dev_associativity(np);
6345 }
6346
phb4_probe_pbcq(struct dt_node * pbcq)6347 static void phb4_probe_pbcq(struct dt_node *pbcq)
6348 {
6349 uint32_t nest_base, pci_base, pec_index;
6350 struct dt_node *stk;
6351
6352 /* REMOVEME: force this for now until we stabalise PCIe */
6353 verbose_eeh = 1;
6354
6355 nest_base = dt_get_address(pbcq, 0, NULL);
6356 pci_base = dt_get_address(pbcq, 1, NULL);
6357 pec_index = dt_prop_get_u32(pbcq, "ibm,pec-index");
6358
6359 dt_for_each_child(pbcq, stk) {
6360 if (dt_node_is_enabled(stk))
6361 phb4_probe_stack(stk, pec_index, nest_base, pci_base);
6362 }
6363 }
6364
probe_phb4(void)6365 void probe_phb4(void)
6366 {
6367 struct dt_node *np;
6368 const char *s;
6369
6370 pci_eeh_mmio = !nvram_query_eq_dangerous("pci-eeh-mmio", "disabled");
6371 pci_retry_all = nvram_query_eq_dangerous("pci-retry-all", "true");
6372 s = nvram_query_dangerous("phb-rx-err-max");
6373 if (s) {
6374 rx_err_max = atoi(s);
6375
6376 /* Clip to uint8_t used by hardware */
6377 rx_err_max = MAX(rx_err_max, 0);
6378 rx_err_max = MIN(rx_err_max, 255);
6379 }
6380
6381 if (is_phb5()) {
6382 prlog(PR_DEBUG, "PHB5: Maximum RX errors during training: %d\n", rx_err_max);
6383 /* Look for PBCQ XSCOM nodes */
6384 dt_for_each_compatible(dt_root, np, "ibm,power10-pbcq")
6385 phb4_probe_pbcq(np);
6386
6387 /* Look for newly created PHB nodes */
6388 dt_for_each_compatible(dt_root, np, "ibm,power10-pciex")
6389 phb4_create(np);
6390 } else {
6391 prlog(PR_DEBUG, "PHB4: Maximum RX errors during training: %d\n", rx_err_max);
6392 /* Look for PBCQ XSCOM nodes */
6393 dt_for_each_compatible(dt_root, np, "ibm,power9-pbcq")
6394 phb4_probe_pbcq(np);
6395
6396 /* Look for newly created PHB nodes */
6397 dt_for_each_compatible(dt_root, np, "ibm,power9-pciex")
6398 phb4_create(np);
6399 }
6400 }
6401