1 /* $NetBSD: dec_kn8ae.c,v 1.30 2002/09/27 02:24:08 thorpej Exp $ */ 2 3 /* 4 * Copyright (c) 1997 by Matthew Jacob 5 * NASA AMES Research Center. 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice immediately at the beginning of the file, without modification, 13 * this list of conditions, and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 23 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR 24 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 30 * SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> /* RCS ID & Copyright macro defns */ 34 35 __KERNEL_RCSID(0, "$NetBSD: dec_kn8ae.c,v 1.30 2002/09/27 02:24:08 thorpej Exp $"); 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/device.h> 40 #include <sys/termios.h> 41 #include <sys/conf.h> 42 #include <dev/cons.h> 43 44 #include <machine/rpb.h> 45 #include <machine/autoconf.h> 46 #include <machine/cpuconf.h> 47 #include <machine/frame.h> 48 #include <machine/alpha.h> 49 #include <machine/cpuconf.h> 50 #include <machine/logout.h> 51 52 #include <dev/ic/comreg.h> 53 #include <dev/ic/comvar.h> 54 55 #include <dev/isa/isavar.h> 56 #include <dev/pci/pcireg.h> 57 #include <dev/pci/pcivar.h> 58 59 #include <dev/scsipi/scsi_all.h> 60 #include <dev/scsipi/scsipi_all.h> 61 #include <dev/scsipi/scsiconf.h> 62 63 #include <alpha/tlsb/tlsbreg.h> 64 #include <alpha/tlsb/tlsbvar.h> 65 #include <alpha/tlsb/kftxxreg.h> 66 #define KV(_addr) ((caddr_t)ALPHA_PHYS_TO_K0SEG((_addr))) 67 68 69 void dec_kn8ae_init __P((void)); 70 void dec_kn8ae_cons_init __P((void)); 71 static void dec_kn8ae_device_register __P((struct device *, void *)); 72 73 static void dec_kn8ae_mcheck_handler 74 __P((unsigned long, struct trapframe *, unsigned long, unsigned long)); 75 76 const struct alpha_variation_table dec_kn8ae_variations[] = { 77 { 0, "AlphaServer 8400" }, 78 { 0, NULL }, 79 }; 80 81 void 82 dec_kn8ae_init() 83 { 84 u_int64_t variation; 85 86 platform.family = "AlphaServer 8400"; 87 88 if ((platform.model = alpha_dsr_sysname()) == NULL) { 89 variation = hwrpb->rpb_variation & SV_ST_MASK; 90 if ((platform.model = alpha_variation_name(variation, 91 dec_kn8ae_variations)) == NULL) 92 platform.model = alpha_unknown_sysname(); 93 } 94 95 platform.iobus = "tlsb"; 96 platform.cons_init = dec_kn8ae_cons_init; 97 platform.device_register = dec_kn8ae_device_register; 98 platform.mcheck_handler = dec_kn8ae_mcheck_handler; 99 } 100 101 void 102 dec_kn8ae_cons_init() 103 { 104 105 /* 106 * Info to retain: 107 * 108 * The AXP 8X00 seems to encode the 109 * type of console in the ctb_type field, 110 * not the ctb_term_type field. 111 * 112 * XXX Not Type 4 CTB? 113 */ 114 } 115 116 /* #define BDEBUG 1 */ 117 static void 118 dec_kn8ae_device_register(dev, aux) 119 struct device *dev; 120 void *aux; 121 { 122 static int found, initted, scsiboot, netboot; 123 static struct device *pcidev, *scsidev; 124 struct bootdev_data *b = bootdev_data; 125 struct device *parent = dev->dv_parent; 126 struct cfdata *cf = dev->dv_cfdata; 127 const char *name = cf->cf_name; 128 129 if (found) 130 return; 131 132 if (!initted) { 133 scsiboot = (strcmp(b->protocol, "scsi") == 0); 134 netboot = (strcmp(b->protocol, "bootp") == 0) || 135 (strcmp(b->protocol, "mop") == 0); 136 #if BDEBUG 137 printf("proto:%s bus:%d slot:%d chan:%d", b->protocol, 138 b->bus, b->slot, b->channel); 139 if (b->remote_address) 140 printf(" remote_addr:%s", b->remote_address); 141 printf(" un:%d bdt:%d", b->unit, b->boot_dev_type); 142 if (b->ctrl_dev_type) 143 printf(" cdt:%s\n", b->ctrl_dev_type); 144 else 145 printf("\n"); 146 printf("scsiboot = %d, netboot = %d\n", scsiboot, netboot); 147 #endif 148 initted = 1; 149 } 150 151 if (pcidev == NULL) { 152 if (strcmp(name, "pci")) 153 return; 154 else { 155 struct pcibus_attach_args *pba = aux; 156 157 if ((b->slot / 1000) != pba->pba_bus) 158 return; 159 160 pcidev = dev; 161 #if BDEBUG 162 printf("\npcidev = %s\n", pcidev->dv_xname); 163 #endif 164 return; 165 } 166 } 167 168 if (scsiboot && (scsidev == NULL)) { 169 if (parent != pcidev) 170 return; 171 else { 172 struct pci_attach_args *pa = aux; 173 174 if ((b->slot % 1000) != pa->pa_device) 175 return; 176 177 /* XXX function? */ 178 179 scsidev = dev; 180 #if BDEBUG 181 printf("\nscsidev = %s\n", scsidev->dv_xname); 182 #endif 183 184 return; 185 } 186 } 187 188 if (scsiboot && 189 (!strcmp(name, "sd") || 190 !strcmp(name, "st") || 191 !strcmp(name, "cd"))) { 192 struct scsipibus_attach_args *sa = aux; 193 194 if (parent->dv_parent != scsidev) 195 return; 196 197 if (b->unit / 100 != sa->sa_periph->periph_target) 198 return; 199 200 /* XXX LUN! */ 201 202 /* 203 * the value in boot_dev_type is some weird number 204 * XXX: Only support SD booting for now. 205 */ 206 if (strcmp(name, "sd") && 207 strcmp(name, "cd") && 208 strcmp(name, "st")) 209 return; 210 211 /* we've found it! */ 212 booted_device = dev; 213 #if BDEBUG 214 printf("\nbooted_device = %s\n", booted_device->dv_xname); 215 #endif 216 found = 1; 217 } 218 219 if (netboot) { 220 if (parent != pcidev) 221 return; 222 else { 223 struct pci_attach_args *pa = aux; 224 225 if ((b->slot % 1000) != pa->pa_device) 226 return; 227 228 /* XXX function? */ 229 230 booted_device = dev; 231 #if BDEBUG 232 printf("\nbooted_device = %s\n", booted_device->dv_xname); 233 #endif 234 found = 1; 235 return; 236 } 237 } 238 } 239 240 /* 241 * KN8AE Machine Check Handlers. 242 */ 243 void kn8ae_harderr __P((unsigned long, unsigned long, 244 unsigned long, struct trapframe *)); 245 246 static void kn8ae_softerr __P((unsigned long, unsigned long, 247 unsigned long, struct trapframe *)); 248 249 void kn8ae_mcheck __P((unsigned long, unsigned long, 250 unsigned long, struct trapframe *)); 251 252 /* 253 * Support routine for clearing errors 254 */ 255 static void clear_tlsb_ebits __P((int)); 256 257 static void 258 clear_tlsb_ebits(cpuonly) 259 int cpuonly; 260 { 261 int node; 262 u_int32_t tldev; 263 264 for (node = 0; node <= TLSB_NODE_MAX; ++node) { 265 if ((tlsb_found & (1 << node)) == 0) 266 continue; 267 tldev = TLSB_GET_NODEREG(node, TLDEV); 268 if (tldev == 0) { 269 /* "cannot happen" */ 270 continue; 271 } 272 /* 273 * Registers to clear for all nodes. 274 */ 275 if (TLSB_GET_NODEREG(node, TLBER) & 276 (TLBER_UDE|TLBER_CWDE|TLBER_CRDE)) { 277 TLSB_PUT_NODEREG(node, TLESR0, 278 TLSB_GET_NODEREG(node, TLESR0)); 279 TLSB_PUT_NODEREG(node, TLESR1, 280 TLSB_GET_NODEREG(node, TLESR1)); 281 TLSB_PUT_NODEREG(node, TLESR2, 282 TLSB_GET_NODEREG(node, TLESR2)); 283 TLSB_PUT_NODEREG(node, TLESR3, 284 TLSB_GET_NODEREG(node, TLESR3)); 285 } 286 TLSB_PUT_NODEREG(node, TLBER, 287 TLSB_GET_NODEREG(node, TLBER)); 288 TLSB_PUT_NODEREG(node, TLFADR0, 289 TLSB_GET_NODEREG(node, TLFADR0)); 290 TLSB_PUT_NODEREG(node, TLFADR1, 291 TLSB_GET_NODEREG(node, TLFADR1)); 292 293 if (TLDEV_ISCPU(tldev)) { 294 TLSB_PUT_NODEREG(node, TLEPAERR, 295 TLSB_GET_NODEREG(node, TLEPAERR)); 296 TLSB_PUT_NODEREG(node, TLEPDERR, 297 TLSB_GET_NODEREG(node, TLEPDERR)); 298 TLSB_PUT_NODEREG(node, TLEPMERR, 299 TLSB_GET_NODEREG(node, TLEPMERR)); 300 continue; 301 } 302 /* 303 * If we're only doing CPU nodes, or this was a memory 304 * node, we're done. Onwards. 305 */ 306 if (cpuonly || TLDEV_ISMEM(tldev)) { 307 continue; 308 } 309 310 TLSB_PUT_NODEREG(node, KFT_ICCNSE, 311 TLSB_GET_NODEREG(node, KFT_ICCNSE)); 312 TLSB_PUT_NODEREG(node, KFT_IDPNSE0, 313 TLSB_GET_NODEREG(node, KFT_IDPNSE0)); 314 TLSB_PUT_NODEREG(node, KFT_IDPNSE1, 315 TLSB_GET_NODEREG(node, KFT_IDPNSE1)); 316 if (TLDEV_DTYPE(tldev) == TLDEV_DTYPE_KFTHA) { 317 TLSB_PUT_NODEREG(node, KFT_IDPNSE2, 318 TLSB_GET_NODEREG(node, KFT_IDPNSE2)); 319 TLSB_PUT_NODEREG(node, KFT_IDPNSE3, 320 TLSB_GET_NODEREG(node, KFT_IDPNSE3)); 321 } 322 /* 323 * Digital Unix cleares the Mailbox Transaction Register 324 * here. I don't think we should because we aren't using 325 * mailboxes yet, and the tech manual makes dire warnings 326 * about *not* rewriting this register. 327 */ 328 } 329 } 330 331 /* 332 * System Corrected Errors. 333 */ 334 static const char *fmt1 = " %-25s = 0x%l016x\n"; 335 336 void 337 kn8ae_harderr(mces, type, logout, framep) 338 unsigned long mces; 339 unsigned long type; 340 unsigned long logout; 341 struct trapframe *framep; 342 { 343 int whami, cpuwerr, dof_cnt; 344 mc_hdr_ev5 *hdr; 345 mc_cc_ev5 *mptr; 346 struct tlsb_mchk_fatal *ptr; 347 348 hdr = (mc_hdr_ev5 *) logout; 349 mptr = (mc_cc_ev5 *) (logout + sizeof (*hdr)); 350 ptr = (struct tlsb_mchk_fatal *) 351 (logout + sizeof (*hdr) + sizeof (*mptr)); 352 whami = alpha_pal_whami(); 353 354 printf("kn8ae: CPU ID %d system correctable error\n", whami); 355 356 printf(" Machine Check Code 0x%lx\n", hdr->mcheck_code); 357 printf(fmt1, "EI Status", mptr->ei_stat); 358 printf(fmt1, "EI Address", mptr->ei_addr); 359 printf(fmt1, "Fill Syndrome", mptr->fill_syndrome); 360 printf(fmt1, "Interrupt Status Reg.", mptr->isr); 361 printf("\n"); 362 dof_cnt = (ptr->rsvdheader & 0xffffffff00000000) >> 32; 363 cpuwerr = ptr->rsvdheader & 0xffff; 364 365 printf(fmt1, "CPU W/Error.", cpuwerr); 366 printf(fmt1, "DOF Count.", dof_cnt); 367 printf(fmt1, "TLDEV", ptr->tldev); 368 printf(fmt1, "TLSB Bus Error", ptr->tlber); 369 printf(fmt1, "TLSB CNR", ptr->tlcnr); 370 printf(fmt1, "TLSB VID", ptr->tlvid); 371 printf(fmt1, "TLSB Error Syndrome 0", ptr->tlesr0); 372 printf(fmt1, "TLSB Error Syndrome 1", ptr->tlesr1); 373 printf(fmt1, "TLSB Error Syndrome 2", ptr->tlesr2); 374 printf(fmt1, "TLSB Error Syndrome 3", ptr->tlesr3); 375 printf(fmt1, "TLSB LEP_AERR", ptr->tlepaerr); 376 printf(fmt1, "TLSB MODCONF", ptr->tlmodconfig); 377 printf(fmt1, "TLSB LEP_MERR", ptr->tlepmerr); 378 printf(fmt1, "TLSB LEP_DERR", ptr->tlepderr); 379 printf(fmt1, "TLSB INTRMASK0", ptr->tlintrmask0); 380 printf(fmt1, "TLSB INTRMASK1", ptr->tlintrmask1); 381 printf(fmt1, "TLSB INTRSUM0", ptr->tlintrsum0); 382 printf(fmt1, "TLSB INTRSUM1", ptr->tlintrsum1); 383 printf(fmt1, "TLSB VMG", ptr->tlep_vmg); 384 385 /* CLEAN UP */ 386 /* 387 * Here's what Digital Unix says to do- 388 * 389 * 1. Log the ECC error that got us here 390 * 391 * 2. Turn off error reporting 392 * 393 * 3. Attempt to have CPU read bad memory location (specified by the 394 * tlfadr reg of the TIOP or TMEM (depending on type of error, 395 * see upcoming code branches) and write data back to location. 396 * 397 * 4. When the CPU attempts to read the location, another 620 interrupt 398 * should occur for the cpu at which instant PAL will scrub the 399 * location. Then the o.s. scrub routine finishes. If the PAL scrubs 400 * the location then the scrubbed flag should be 0 (this is what we 401 * expect). 402 * 403 * If it's a 1 then the alpha_scrub_long routine did the scrub. 404 * 405 * 5. We renable correctable error logging and continue 406 */ 407 printf("WARNING THIS IS NOT DONE YET YOU MAY GET DATA CORRUPTION"); 408 clear_tlsb_ebits(0); 409 /* 410 * Clear error by rewriting register. 411 */ 412 alpha_pal_wrmces(mces); 413 } 414 415 /* 416 * Processor Corrected Errors- BCACHE ECC errors. 417 */ 418 419 static void 420 kn8ae_softerr(mces, type, logout, framep) 421 unsigned long mces; 422 unsigned long type; 423 unsigned long logout; 424 struct trapframe *framep; 425 { 426 int whami, cpuwerr, dof_cnt; 427 mc_hdr_ev5 *hdr; 428 mc_cc_ev5 *mptr; 429 struct tlsb_mchk_soft *ptr; 430 431 hdr = (mc_hdr_ev5 *) logout; 432 mptr = (mc_cc_ev5 *) (logout + sizeof (*hdr)); 433 ptr = (struct tlsb_mchk_soft *) 434 (logout + sizeof (*hdr) + sizeof (*mptr)); 435 whami = alpha_pal_whami(); 436 437 printf("kn8ae: CPU ID %d processor correctable error\n", whami); 438 printf(" Machine Check Code 0x%lx\n", hdr->mcheck_code); 439 printf(fmt1, "EI Status", mptr->ei_stat); 440 printf(fmt1, "EI Address", mptr->ei_addr); 441 printf(fmt1, "Fill Syndrome", mptr->fill_syndrome); 442 printf(fmt1, "Interrupt Status Reg.", mptr->isr); 443 printf("\n"); 444 dof_cnt = (ptr->rsvdheader & 0xffffffff00000000) >> 32; 445 cpuwerr = ptr->rsvdheader & 0xffff; 446 447 printf(fmt1, "CPU W/Error.", cpuwerr); 448 printf(fmt1, "DOF Count.", dof_cnt); 449 printf(fmt1, "TLDEV", ptr->tldev); 450 printf(fmt1, "TLSB Bus Error", ptr->tlber); 451 printf(fmt1, "TLSB Error Syndrome 0", ptr->tlesr0); 452 printf(fmt1, "TLSB Error Syndrome 1", ptr->tlesr1); 453 printf(fmt1, "TLSB Error Syndrome 2", ptr->tlesr2); 454 printf(fmt1, "TLSB Error Syndrome 3", ptr->tlesr3); 455 456 /* 457 * Clear TLSB bits on all CPU TLSB nodes. 458 */ 459 clear_tlsb_ebits(1); 460 461 /* 462 * Clear error by rewriting register. 463 */ 464 alpha_pal_wrmces(mces); 465 } 466 467 /* 468 * KN8AE specific machine check handler 469 */ 470 471 void 472 kn8ae_mcheck(mces, type, logout, framep) 473 unsigned long mces; 474 unsigned long type; 475 unsigned long logout; 476 struct trapframe *framep; 477 { 478 struct mchkinfo *mcp; 479 int get_dwlpx_regs; 480 struct tlsb_mchk_fatal mcs[TLSB_NODE_MAX+1], *ptr; 481 mc_hdr_ev5 *hdr; 482 mc_uc_ev5 *mptr; 483 484 /* 485 * If we expected a machine check, just go handle it in common code. 486 */ 487 mcp = &curcpu()->ci_mcinfo; 488 if (mcp->mc_expected) { 489 machine_check(mces, framep, type, logout); 490 return; 491 } 492 493 get_dwlpx_regs = 0; 494 ptr = NULL; 495 memset(mcs, 0, sizeof (mcs)); 496 497 hdr = (mc_hdr_ev5 *) logout; 498 mptr = (mc_uc_ev5 *) (logout + sizeof (*hdr)); 499 500 /* 501 * If detected by the system, we print out some TLASER registers. 502 */ 503 if (type == ALPHA_SYS_MCHECK) { 504 #if 0 505 int get_lsb_regs = 0; 506 int get_dwlpx_regs = 0; 507 #endif 508 509 ptr = (struct tlsb_mchk_fatal *) 510 (logout + sizeof (*hdr) + sizeof (*mptr)); 511 512 #if 0 513 if (ptr->tlepaerr & TLEPAERR_WSPC_RD) { 514 get_dwlpx_regs++; 515 } 516 if ((ptr->tlepaerr & TLEPAERR_IBOX_TMO) && 517 (mptr->ic_perr_stat & EV5_IC_PERR_IBOXTMO) && 518 (ptr->tlepderr & TLEPDERR_GBTMO)) { 519 get_dwlpx_regs++; 520 } 521 #endif 522 } else { 523 /* 524 * We have a processor machine check- which doesn't 525 * have information with it about any TLSB related 526 * failures. 527 */ 528 } 529 530 /* 531 * Now we can finally print some stuff... 532 */ 533 ev5_logout_print(hdr, mptr); 534 if (type == ALPHA_SYS_MCHECK) { 535 if (ptr->tlepaerr & TLEPAERR_WSPC_RD) { 536 printf("\tWSPC READ error\n"); 537 } 538 if ((ptr->tlepaerr & TLEPAERR_IBOX_TMO) && 539 (mptr->ic_perr_stat & EV5_IC_PERR_IBOXTMO) && 540 (ptr->tlepderr & TLEPDERR_GBTMO)) { 541 printf ("\tWSPC IBOX timeout detected\n"); 542 } 543 #ifdef DIAGNOSTIC 544 printf(fmt1, "TLDEV", ptr->tldev); 545 printf(fmt1, "TLSB Bus Error", ptr->tlber); 546 printf(fmt1, "TLSB CNR", ptr->tlcnr); 547 printf(fmt1, "TLSB VID", ptr->tlvid); 548 printf(fmt1, "TLSB Error Syndrome 0", ptr->tlesr0); 549 printf(fmt1, "TLSB Error Syndrome 1", ptr->tlesr1); 550 printf(fmt1, "TLSB Error Syndrome 2", ptr->tlesr2); 551 printf(fmt1, "TLSB Error Syndrome 3", ptr->tlesr3); 552 printf(fmt1, "TLSB LEP_AERR", ptr->tlepaerr); 553 printf(fmt1, "TLSB MODCONF", ptr->tlmodconfig); 554 printf(fmt1, "TLSB LEP_MERR", ptr->tlepmerr); 555 printf(fmt1, "TLSB LEP_DERR", ptr->tlepderr); 556 printf(fmt1, "TLSB INTRMASK0", ptr->tlintrmask0); 557 printf(fmt1, "TLSB INTRMASK1", ptr->tlintrmask1); 558 printf(fmt1, "TLSB INTRSUM0", ptr->tlintrsum0); 559 printf(fmt1, "TLSB INTRSUM1", ptr->tlintrsum1); 560 printf(fmt1, "TLSB VMG", ptr->tlep_vmg); 561 #endif 562 } else { 563 } 564 565 /* 566 * Now that we've printed all sorts of useful information 567 * and have decided that we really can't do any more to 568 * respond to the error, go on to the common code for 569 * final disposition. Usually this means that we die. 570 */ 571 clear_tlsb_ebits(0); 572 573 machine_check(mces, framep, type, logout); 574 } 575 576 static void 577 dec_kn8ae_mcheck_handler(mces, framep, vector, param) 578 unsigned long mces; 579 struct trapframe *framep; 580 unsigned long vector; 581 unsigned long param; 582 { 583 switch (vector) { 584 case ALPHA_SYS_ERROR: 585 kn8ae_harderr(mces, vector, param, framep); 586 break; 587 588 case ALPHA_PROC_ERROR: 589 kn8ae_softerr(mces, vector, param, framep); 590 break; 591 592 case ALPHA_SYS_MCHECK: 593 case ALPHA_PROC_MCHECK: 594 kn8ae_mcheck(mces, vector, param, framep); 595 break; 596 default: 597 printf("KN8AE_MCHECK: unknown check vector 0x%lx\n", vector); 598 machine_check(mces, framep, vector, param); 599 break; 600 } 601 } 602