1 /* $NetBSD: dec_kn8ae.c,v 1.39 2009/03/14 15:35:59 dsl Exp $ */ 2 3 /* 4 * Copyright (c) 1997 by Matthew Jacob 5 * NASA AMES Research Center. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice immediately at the beginning of the file, without modification, 13 * this list of conditions, and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 24 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ 34 35 __KERNEL_RCSID(0, "$NetBSD: dec_kn8ae.c,v 1.39 2009/03/14 15:35:59 dsl Exp $"); 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/device.h> 40 #include <sys/termios.h> 41 #include <sys/conf.h> 42 #include <dev/cons.h> 43 44 #include <machine/rpb.h> 45 #include <machine/autoconf.h> 46 #include <machine/cpuconf.h> 47 #include <machine/frame.h> 48 #include <machine/alpha.h> 49 #include <machine/logout.h> 50 51 #include <dev/ic/comreg.h> 52 #include <dev/ic/comvar.h> 53 54 #include <dev/isa/isavar.h> 55 #include <dev/pci/pcireg.h> 56 #include <dev/pci/pcivar.h> 57 58 #include <dev/scsipi/scsi_all.h> 59 #include <dev/scsipi/scsipi_all.h> 60 #include <dev/scsipi/scsiconf.h> 61 62 #include <alpha/tlsb/tlsbreg.h> 63 #include <alpha/tlsb/tlsbvar.h> 64 #include <alpha/tlsb/kftxxreg.h> 65 #include <alpha/tlsb/kftxxvar.h> 66 #define KV(_addr) ((void *)ALPHA_PHYS_TO_K0SEG((_addr))) 67 68 69 void dec_kn8ae_init(void); 70 void dec_kn8ae_cons_init(void); 71 static void dec_kn8ae_device_register(struct device *, void *); 72 73 static void dec_kn8ae_mcheck_handler 74 (unsigned long, struct trapframe *, unsigned long, unsigned long); 75 76 const struct alpha_variation_table dec_kn8ae_variations[] = { 77 { 0, "AlphaServer 8400" }, 78 { 0, NULL }, 79 }; 80 81 void 82 dec_kn8ae_init() 83 { 84 u_int64_t variation; 85 86 platform.family = "AlphaServer 8400"; 87 88 if ((platform.model = alpha_dsr_sysname()) == NULL) { 89 variation = hwrpb->rpb_variation & SV_ST_MASK; 90 if ((platform.model = alpha_variation_name(variation, 91 dec_kn8ae_variations)) == NULL) 92 platform.model = alpha_unknown_sysname(); 93 } 94 95 platform.iobus = "tlsb"; 96 platform.cons_init = dec_kn8ae_cons_init; 97 platform.device_register = dec_kn8ae_device_register; 98 platform.mcheck_handler = dec_kn8ae_mcheck_handler; 99 } 100 101 void 102 dec_kn8ae_cons_init() 103 { 104 105 /* 106 * Info to retain: 107 * 108 * The AXP 8X00 seems to encode the 109 * type of console in the ctb_type field, 110 * not the ctb_term_type field. 111 * 112 * XXX Not Type 4 CTB? 113 */ 114 } 115 116 /* #define BDEBUG 1 */ 117 static void 118 dec_kn8ae_device_register(struct device *dev, void *aux) 119 { 120 static int found, initted, diskboot, netboot; 121 static struct device *primarydev, *pcidev, *ctrlrdev; 122 struct bootdev_data *b = bootdev_data; 123 struct device *parent = device_parent(dev); 124 125 if (found) 126 return; 127 128 if (!initted) { 129 diskboot = (strcasecmp(b->protocol, "SCSI") == 0); 130 netboot = (strcasecmp(b->protocol, "BOOTP") == 0) || 131 (strcasecmp(b->protocol, "MOP") == 0); 132 #if BDEBUG 133 printf("proto:%s bus:%d slot:%d chan:%d", b->protocol, 134 b->bus, b->slot, b->channel); 135 if (b->remote_address) 136 printf(" remote_addr:%s", b->remote_address); 137 printf(" un:%d bdt:%d", b->unit, b->boot_dev_type); 138 if (b->ctrl_dev_type) 139 printf(" cdt:%s\n", b->ctrl_dev_type); 140 else 141 printf("\n"); 142 printf("diskboot = %d, netboot = %d\n", diskboot, netboot); 143 #endif 144 initted = 1; 145 } 146 147 if (primarydev == NULL) { 148 if (!device_is_a(dev, "dwlpx")) 149 return; 150 else { 151 struct kft_dev_attach_args *ka = aux; 152 153 if (b->bus != ka->ka_hosenum) 154 return; 155 primarydev = dev; 156 #ifdef BDEBUG 157 printf("\nprimarydev = %s\n", dev->dv_xname); 158 #endif 159 return; 160 } 161 } 162 163 if (pcidev == NULL) { 164 if (!device_is_a(dev, "pci")) 165 return; 166 /* 167 * Try to find primarydev anywhere in the ancestry. This is 168 * necessary if the PCI bus is hidden behind a bridge. 169 */ 170 while (parent) { 171 if (parent == primarydev) 172 break; 173 parent = device_parent(parent); 174 } 175 if (!parent) 176 return; 177 else { 178 struct pcibus_attach_args *pba = aux; 179 180 if ((b->slot / 1000) != pba->pba_bus) 181 return; 182 183 pcidev = dev; 184 #if BDEBUG 185 printf("\npcidev = %s\n", dev->dv_xname); 186 #endif 187 return; 188 } 189 } 190 191 if (ctrlrdev == NULL) { 192 if (parent != pcidev) 193 return; 194 else { 195 struct pci_attach_args *pa = aux; 196 int slot; 197 198 slot = pa->pa_bus * 1000 + pa->pa_function * 100 + 199 pa->pa_device; 200 if (b->slot != slot) 201 return; 202 203 if (netboot) { 204 booted_device = dev; 205 #ifdef BDEBUG 206 printf("\nbooted_device = %s\n", dev->dv_xname); 207 #endif 208 found = 1; 209 } else { 210 ctrlrdev = dev; 211 #if BDEBUG 212 printf("\nctrlrdev = %s\n", dev->dv_xname); 213 #endif 214 } 215 return; 216 } 217 } 218 219 if (!diskboot) 220 return; 221 222 if (device_is_a(dev, "sd") || 223 device_is_a(dev, "st") || 224 device_is_a(dev, "cd")) { 225 struct scsipibus_attach_args *sa = aux; 226 struct scsipi_periph *periph = sa->sa_periph; 227 int unit; 228 229 if (device_parent(parent) != ctrlrdev) 230 return; 231 232 unit = periph->periph_target * 100 + periph->periph_lun; 233 if (b->unit != unit) 234 return; 235 if (b->channel != periph->periph_channel->chan_channel) 236 return; 237 238 /* we've found it! */ 239 booted_device = dev; 240 #if BDEBUG 241 printf("\nbooted_device = %s\n", dev->dv_xname); 242 #endif 243 found = 1; 244 } 245 } 246 247 /* 248 * KN8AE Machine Check Handlers. 249 */ 250 void kn8ae_harderr(unsigned long, unsigned long, 251 unsigned long, struct trapframe *); 252 253 static void kn8ae_softerr(unsigned long, unsigned long, 254 unsigned long, struct trapframe *); 255 256 void kn8ae_mcheck(unsigned long, unsigned long, 257 unsigned long, struct trapframe *); 258 259 /* 260 * Support routine for clearing errors 261 */ 262 static void clear_tlsb_ebits(int); 263 264 static void 265 clear_tlsb_ebits(int cpuonly) 266 { 267 int node; 268 u_int32_t tldev; 269 270 for (node = 0; node <= TLSB_NODE_MAX; ++node) { 271 if ((tlsb_found & (1 << node)) == 0) 272 continue; 273 tldev = TLSB_GET_NODEREG(node, TLDEV); 274 if (tldev == 0) { 275 /* "cannot happen" */ 276 continue; 277 } 278 /* 279 * Registers to clear for all nodes. 280 */ 281 if (TLSB_GET_NODEREG(node, TLBER) & 282 (TLBER_UDE|TLBER_CWDE|TLBER_CRDE)) { 283 TLSB_PUT_NODEREG(node, TLESR0, 284 TLSB_GET_NODEREG(node, TLESR0)); 285 TLSB_PUT_NODEREG(node, TLESR1, 286 TLSB_GET_NODEREG(node, TLESR1)); 287 TLSB_PUT_NODEREG(node, TLESR2, 288 TLSB_GET_NODEREG(node, TLESR2)); 289 TLSB_PUT_NODEREG(node, TLESR3, 290 TLSB_GET_NODEREG(node, TLESR3)); 291 } 292 TLSB_PUT_NODEREG(node, TLBER, 293 TLSB_GET_NODEREG(node, TLBER)); 294 TLSB_PUT_NODEREG(node, TLFADR0, 295 TLSB_GET_NODEREG(node, TLFADR0)); 296 TLSB_PUT_NODEREG(node, TLFADR1, 297 TLSB_GET_NODEREG(node, TLFADR1)); 298 299 if (TLDEV_ISCPU(tldev)) { 300 TLSB_PUT_NODEREG(node, TLEPAERR, 301 TLSB_GET_NODEREG(node, TLEPAERR)); 302 TLSB_PUT_NODEREG(node, TLEPDERR, 303 TLSB_GET_NODEREG(node, TLEPDERR)); 304 TLSB_PUT_NODEREG(node, TLEPMERR, 305 TLSB_GET_NODEREG(node, TLEPMERR)); 306 continue; 307 } 308 /* 309 * If we're only doing CPU nodes, or this was a memory 310 * node, we're done. Onwards. 311 */ 312 if (cpuonly || TLDEV_ISMEM(tldev)) { 313 continue; 314 } 315 316 TLSB_PUT_NODEREG(node, KFT_ICCNSE, 317 TLSB_GET_NODEREG(node, KFT_ICCNSE)); 318 TLSB_PUT_NODEREG(node, KFT_IDPNSE0, 319 TLSB_GET_NODEREG(node, KFT_IDPNSE0)); 320 TLSB_PUT_NODEREG(node, KFT_IDPNSE1, 321 TLSB_GET_NODEREG(node, KFT_IDPNSE1)); 322 if (TLDEV_DTYPE(tldev) == TLDEV_DTYPE_KFTHA) { 323 TLSB_PUT_NODEREG(node, KFT_IDPNSE2, 324 TLSB_GET_NODEREG(node, KFT_IDPNSE2)); 325 TLSB_PUT_NODEREG(node, KFT_IDPNSE3, 326 TLSB_GET_NODEREG(node, KFT_IDPNSE3)); 327 } 328 /* 329 * Digital Unix cleares the Mailbox Transaction Register 330 * here. I don't think we should because we aren't using 331 * mailboxes yet, and the tech manual makes dire warnings 332 * about *not* rewriting this register. 333 */ 334 } 335 } 336 337 /* 338 * System Corrected Errors. 339 */ 340 static const char *fmt1 = " %-25s = 0x%l016x\n"; 341 342 void 343 kn8ae_harderr(unsigned long mces, unsigned long type, unsigned long logout, struct trapframe *framep) 344 { 345 int whami, cpuwerr, dof_cnt; 346 mc_hdr_ev5 *hdr; 347 mc_cc_ev5 *mptr; 348 struct tlsb_mchk_fatal *ptr; 349 350 hdr = (mc_hdr_ev5 *) logout; 351 mptr = (mc_cc_ev5 *) (logout + sizeof (*hdr)); 352 ptr = (struct tlsb_mchk_fatal *) 353 (logout + sizeof (*hdr) + sizeof (*mptr)); 354 whami = alpha_pal_whami(); 355 356 printf("kn8ae: CPU ID %d system correctable error\n", whami); 357 358 printf(" Machine Check Code 0x%lx\n", hdr->mcheck_code); 359 printf(fmt1, "EI Status", mptr->ei_stat); 360 printf(fmt1, "EI Address", mptr->ei_addr); 361 printf(fmt1, "Fill Syndrome", mptr->fill_syndrome); 362 printf(fmt1, "Interrupt Status Reg.", mptr->isr); 363 printf("\n"); 364 dof_cnt = (ptr->rsvdheader & 0xffffffff00000000) >> 32; 365 cpuwerr = ptr->rsvdheader & 0xffff; 366 367 printf(fmt1, "CPU W/Error.", cpuwerr); 368 printf(fmt1, "DOF Count.", dof_cnt); 369 printf(fmt1, "TLDEV", ptr->tldev); 370 printf(fmt1, "TLSB Bus Error", ptr->tlber); 371 printf(fmt1, "TLSB CNR", ptr->tlcnr); 372 printf(fmt1, "TLSB VID", ptr->tlvid); 373 printf(fmt1, "TLSB Error Syndrome 0", ptr->tlesr0); 374 printf(fmt1, "TLSB Error Syndrome 1", ptr->tlesr1); 375 printf(fmt1, "TLSB Error Syndrome 2", ptr->tlesr2); 376 printf(fmt1, "TLSB Error Syndrome 3", ptr->tlesr3); 377 printf(fmt1, "TLSB LEP_AERR", ptr->tlepaerr); 378 printf(fmt1, "TLSB MODCONF", ptr->tlmodconfig); 379 printf(fmt1, "TLSB LEP_MERR", ptr->tlepmerr); 380 printf(fmt1, "TLSB LEP_DERR", ptr->tlepderr); 381 printf(fmt1, "TLSB INTRMASK0", ptr->tlintrmask0); 382 printf(fmt1, "TLSB INTRMASK1", ptr->tlintrmask1); 383 printf(fmt1, "TLSB INTRSUM0", ptr->tlintrsum0); 384 printf(fmt1, "TLSB INTRSUM1", ptr->tlintrsum1); 385 printf(fmt1, "TLSB VMG", ptr->tlep_vmg); 386 387 /* CLEAN UP */ 388 /* 389 * Here's what Digital Unix says to do- 390 * 391 * 1. Log the ECC error that got us here 392 * 393 * 2. Turn off error reporting 394 * 395 * 3. Attempt to have CPU read bad memory location (specified by the 396 * tlfadr reg of the TIOP or TMEM (depending on type of error, 397 * see upcoming code branches) and write data back to location. 398 * 399 * 4. When the CPU attempts to read the location, another 620 interrupt 400 * should occur for the CPU at which instant PAL will scrub the 401 * location. Then the o.s. scrub routine finishes. If the PAL scrubs 402 * the location then the scrubbed flag should be 0 (this is what we 403 * expect). 404 * 405 * If it's a 1 then the alpha_scrub_long routine did the scrub. 406 * 407 * 5. We renable correctable error logging and continue 408 */ 409 printf("WARNING THIS IS NOT DONE YET YOU MAY GET DATA CORRUPTION"); 410 clear_tlsb_ebits(0); 411 /* 412 * Clear error by rewriting register. 413 */ 414 alpha_pal_wrmces(mces); 415 } 416 417 /* 418 * Processor Corrected Errors- BCACHE ECC errors. 419 */ 420 421 static void 422 kn8ae_softerr(unsigned long mces, unsigned long type, unsigned long logout, struct trapframe *framep) 423 { 424 int whami, cpuwerr, dof_cnt; 425 mc_hdr_ev5 *hdr; 426 mc_cc_ev5 *mptr; 427 struct tlsb_mchk_soft *ptr; 428 429 hdr = (mc_hdr_ev5 *) logout; 430 mptr = (mc_cc_ev5 *) (logout + sizeof (*hdr)); 431 ptr = (struct tlsb_mchk_soft *) 432 (logout + sizeof (*hdr) + sizeof (*mptr)); 433 whami = alpha_pal_whami(); 434 435 printf("kn8ae: CPU ID %d processor correctable error\n", whami); 436 printf(" Machine Check Code 0x%lx\n", hdr->mcheck_code); 437 printf(fmt1, "EI Status", mptr->ei_stat); 438 printf(fmt1, "EI Address", mptr->ei_addr); 439 printf(fmt1, "Fill Syndrome", mptr->fill_syndrome); 440 printf(fmt1, "Interrupt Status Reg.", mptr->isr); 441 printf("\n"); 442 dof_cnt = (ptr->rsvdheader & 0xffffffff00000000) >> 32; 443 cpuwerr = ptr->rsvdheader & 0xffff; 444 445 printf(fmt1, "CPU W/Error.", cpuwerr); 446 printf(fmt1, "DOF Count.", dof_cnt); 447 printf(fmt1, "TLDEV", ptr->tldev); 448 printf(fmt1, "TLSB Bus Error", ptr->tlber); 449 printf(fmt1, "TLSB Error Syndrome 0", ptr->tlesr0); 450 printf(fmt1, "TLSB Error Syndrome 1", ptr->tlesr1); 451 printf(fmt1, "TLSB Error Syndrome 2", ptr->tlesr2); 452 printf(fmt1, "TLSB Error Syndrome 3", ptr->tlesr3); 453 454 /* 455 * Clear TLSB bits on all CPU TLSB nodes. 456 */ 457 clear_tlsb_ebits(1); 458 459 /* 460 * Clear error by rewriting register. 461 */ 462 alpha_pal_wrmces(mces); 463 } 464 465 /* 466 * KN8AE specific machine check handler 467 */ 468 469 void 470 kn8ae_mcheck(unsigned long mces, unsigned long type, unsigned long logout, struct trapframe *framep) 471 { 472 struct mchkinfo *mcp; 473 int get_dwlpx_regs; 474 struct tlsb_mchk_fatal mcs[TLSB_NODE_MAX+1], *ptr; 475 mc_hdr_ev5 *hdr; 476 mc_uc_ev5 *mptr; 477 478 /* 479 * If we expected a machine check, just go handle it in common code. 480 */ 481 mcp = &curcpu()->ci_mcinfo; 482 if (mcp->mc_expected) { 483 machine_check(mces, framep, type, logout); 484 return; 485 } 486 487 get_dwlpx_regs = 0; 488 ptr = NULL; 489 memset(mcs, 0, sizeof (mcs)); 490 491 hdr = (mc_hdr_ev5 *) logout; 492 mptr = (mc_uc_ev5 *) (logout + sizeof (*hdr)); 493 494 /* 495 * If detected by the system, we print out some TLASER registers. 496 */ 497 if (type == ALPHA_SYS_MCHECK) { 498 #if 0 499 int get_lsb_regs = 0; 500 int get_dwlpx_regs = 0; 501 #endif 502 503 ptr = (struct tlsb_mchk_fatal *) 504 (logout + sizeof (*hdr) + sizeof (*mptr)); 505 506 #if 0 507 if (ptr->tlepaerr & TLEPAERR_WSPC_RD) { 508 get_dwlpx_regs++; 509 } 510 if ((ptr->tlepaerr & TLEPAERR_IBOX_TMO) && 511 (mptr->ic_perr_stat & EV5_IC_PERR_IBOXTMO) && 512 (ptr->tlepderr & TLEPDERR_GBTMO)) { 513 get_dwlpx_regs++; 514 } 515 #endif 516 } else { 517 /* 518 * We have a processor machine check- which doesn't 519 * have information with it about any TLSB related 520 * failures. 521 */ 522 } 523 524 /* 525 * Now we can finally print some stuff... 526 */ 527 ev5_logout_print(hdr, mptr); 528 if (type == ALPHA_SYS_MCHECK) { 529 if (ptr->tlepaerr & TLEPAERR_WSPC_RD) { 530 printf("\tWSPC READ error\n"); 531 } 532 if ((ptr->tlepaerr & TLEPAERR_IBOX_TMO) && 533 (mptr->ic_perr_stat & EV5_IC_PERR_IBOXTMO) && 534 (ptr->tlepderr & TLEPDERR_GBTMO)) { 535 printf ("\tWSPC IBOX timeout detected\n"); 536 } 537 #ifdef DIAGNOSTIC 538 printf(fmt1, "TLDEV", ptr->tldev); 539 printf(fmt1, "TLSB Bus Error", ptr->tlber); 540 printf(fmt1, "TLSB CNR", ptr->tlcnr); 541 printf(fmt1, "TLSB VID", ptr->tlvid); 542 printf(fmt1, "TLSB Error Syndrome 0", ptr->tlesr0); 543 printf(fmt1, "TLSB Error Syndrome 1", ptr->tlesr1); 544 printf(fmt1, "TLSB Error Syndrome 2", ptr->tlesr2); 545 printf(fmt1, "TLSB Error Syndrome 3", ptr->tlesr3); 546 printf(fmt1, "TLSB LEP_AERR", ptr->tlepaerr); 547 printf(fmt1, "TLSB MODCONF", ptr->tlmodconfig); 548 printf(fmt1, "TLSB LEP_MERR", ptr->tlepmerr); 549 printf(fmt1, "TLSB LEP_DERR", ptr->tlepderr); 550 printf(fmt1, "TLSB INTRMASK0", ptr->tlintrmask0); 551 printf(fmt1, "TLSB INTRMASK1", ptr->tlintrmask1); 552 printf(fmt1, "TLSB INTRSUM0", ptr->tlintrsum0); 553 printf(fmt1, "TLSB INTRSUM1", ptr->tlintrsum1); 554 printf(fmt1, "TLSB VMG", ptr->tlep_vmg); 555 #endif 556 } else { 557 } 558 559 /* 560 * Now that we've printed all sorts of useful information 561 * and have decided that we really can't do any more to 562 * respond to the error, go on to the common code for 563 * final disposition. Usually this means that we die. 564 */ 565 clear_tlsb_ebits(0); 566 567 machine_check(mces, framep, type, logout); 568 } 569 570 static void 571 dec_kn8ae_mcheck_handler(unsigned long mces, struct trapframe *framep, unsigned long vector, unsigned long param) 572 { 573 switch (vector) { 574 case ALPHA_SYS_ERROR: 575 kn8ae_harderr(mces, vector, param, framep); 576 break; 577 578 case ALPHA_PROC_ERROR: 579 kn8ae_softerr(mces, vector, param, framep); 580 break; 581 582 case ALPHA_SYS_MCHECK: 583 case ALPHA_PROC_MCHECK: 584 kn8ae_mcheck(mces, vector, param, framep); 585 break; 586 default: 587 printf("KN8AE_MCHECK: unknown check vector 0x%lx\n", vector); 588 machine_check(mces, framep, vector, param); 589 break; 590 } 591 } 592