1 /* Advanced Host Controller Interface (AHCI) driver, by D.C. van Moolenbroek 2 * - Multithreading support by Arne Welzel 3 * - Native Command Queuing support by Raja Appuswamy 4 */ 5 /* 6 * This driver is based on the following specifications: 7 * - Serial ATA Advanced Host Controller Interface (AHCI) 1.3 8 * - Serial ATA Revision 2.6 9 * - AT Attachment with Packet Interface 7 (ATA/ATAPI-7) 10 * - ATAPI Removable Rewritable Media Devices 1.3 (SFF-8070) 11 * 12 * The driver supports device hot-plug, active device status tracking, 13 * nonremovable ATA and removable ATAPI devices, custom logical sector sizes, 14 * sector-unaligned reads, native command queuing and parallel requests to 15 * different devices. 16 * 17 * It does not implement transparent failure recovery, power management, or 18 * port multiplier support. 19 */ 20 /* 21 * An AHCI controller exposes a number of ports (up to 32), each of which may 22 * or may not have one device attached (port multipliers are not supported). 23 * Each port is maintained independently. 24 * 25 * The following figure depicts the possible transitions between port states. 26 * The NO_PORT state is not included; no transitions can be made from or to it. 27 * 28 * +----------+ +----------+ 29 * | SPIN_UP | ------+ +-----> | BAD_DEV | ------------------+ 30 * +----------+ | | +----------+ | 31 * | | | ^ | 32 * v v | | | 33 * +----------+ +----------+ +----------+ +----------+ | 34 * | NO_DEV | --> | WAIT_DEV | --> | WAIT_ID | --> | GOOD_DEV | | 35 * +----------+ +----------+ +----------+ +----------+ | 36 * ^ | | | | 37 * +----------------+----------------+----------------+--------+ 38 * 39 * At driver startup, all physically present ports are put in SPIN_UP state. 40 * This state differs from NO_DEV in that BDEV_OPEN calls will be deferred 41 * until either the spin-up timer expires, or a device has been identified on 42 * that port. This prevents early BDEV_OPEN calls from failing erroneously at 43 * startup time if the device has not yet been able to announce its presence. 44 * 45 * If a device is detected, either at startup time or after hot-plug, its 46 * signature is checked and it is identified, after which it may be determined 47 * to be a usable ("good") device, which means that the device is considered to 48 * be in a working state. If these steps fail, the device is marked as unusable 49 * ("bad"). At any point in time, the device may be disconnected; the port is 50 * then put back into NO_DEV state. 51 * 52 * A device in working state (GOOD_DEV) may or may not have a medium. All ATA 53 * devices are assumed to be fixed; all ATAPI devices are assumed to have 54 * removable media. To prevent erroneous access to switched devices and media, 55 * the driver makes devices inaccessible until they are fully closed (the open 56 * count is zero) when a device (hot-plug) or medium change is detected. 57 * For hot-plug changes, access is prevented by setting the BARRIER flag until 58 * the device is fully closed and then reopened. For medium changes, access is 59 * prevented by not acknowledging the medium change until the device is fully 60 * closed and reopened. Removable media are not locked in the drive while 61 * opened, because the driver author is uncomfortable with that concept. 62 * 63 * Ports may leave the group of states where a device is connected (that is, 64 * WAIT_ID, GOOD_DEV, and BAD_DEV) in two ways: either due to a hot-unplug 65 * event, or due to a hard reset after a serious failure. For simplicity, we 66 * we perform a hard reset after a hot-unplug event as well, so that the link 67 * to the device is broken. Thus, in both cases, a transition to NO_DEV is 68 * made, after which the link to the device may or may not be reestablished. 69 * In both cases, ongoing requests are cancelled and the BARRIER flag is set. 70 * 71 * The following table lists for each state, whether the port is started 72 * (PxCMD.ST is set), whether a timer is running, what the PxIE mask is to be 73 * set to, and what BDEV_OPEN calls on this port should return. 74 * 75 * State Started Timer PxIE BDEV_OPEN 76 * --------- --------- --------- --------- --------- 77 * NO_PORT no no (none) ENXIO 78 * SPIN_UP no yes PCE (wait) 79 * NO_DEV no no PCE ENXIO 80 * WAIT_DEV no yes PCE (wait) 81 * BAD_DEV no no PRCE ENXIO 82 * WAIT_ID yes yes PRCE+ (wait) 83 * GOOD_DEV yes per-command PRCE+ OK 84 * 85 * In order to continue deferred BDEV_OPEN calls, the BUSY flag must be unset 86 * when changing from SPIN_UP to any state but WAIT_DEV, and when changing from 87 * WAIT_DEV to any state but WAIT_ID, and when changing from WAIT_ID to any 88 * other state. 89 */ 90 /* 91 * The maximum byte size of a single transfer (MAX_TRANSFER) is currently set 92 * to 4MB. This limit has been chosen for a number of reasons: 93 * - The size that can be specified in a Physical Region Descriptor (PRD) is 94 * limited to 4MB for AHCI. Limiting the total transfer size to at most this 95 * size implies that no I/O vector element needs to be split up across PRDs. 96 * This means that the maximum number of needed PRDs can be predetermined. 97 * - The limit is below what can be transferred in a single ATA request, namely 98 * 64k sectors (i.e., at least 32MB). This means that transfer requests need 99 * never be split up into smaller chunks, reducing implementation complexity. 100 * - A single, static timeout can be used for transfers. Very large transfers 101 * can legitimately take up to several minutes -- well beyond the appropriate 102 * timeout range for small transfers. The limit obviates the need for a 103 * timeout scheme that takes into account the transfer size. 104 * - Similarly, the transfer limit reduces the opportunity for buggy/malicious 105 * clients to keep the driver busy for a long time with a single request. 106 * - The limit is high enough for all practical purposes. The transfer setup 107 * overhead is already relatively negligible at this size, and even larger 108 * requests will not help maximize throughput. As NR_IOREQS is currently set 109 * to 64, the limit still allows file systems to perform I/O requests with 110 * vectors completely filled with 64KB-blocks. 111 */ 112 #include <minix/drivers.h> 113 #include <minix/blockdriver_mt.h> 114 #include <minix/drvlib.h> 115 #include <machine/pci.h> 116 #include <sys/ioc_disk.h> 117 #include <sys/mman.h> 118 #include <assert.h> 119 120 #include "ahci.h" 121 122 /* Host Bus Adapter (HBA) state. */ 123 static struct { 124 volatile u32_t *base; /* base address of memory-mapped registers */ 125 size_t size; /* size of memory-mapped register area */ 126 127 int nr_ports; /* addressable number of ports (1..NR_PORTS) */ 128 int nr_cmds; /* maximum number of commands per port */ 129 int has_ncq; /* NCQ support flag */ 130 int has_clo; /* CLO support flag */ 131 132 int irq; /* IRQ number */ 133 int hook_id; /* IRQ hook ID */ 134 } hba_state; 135 136 #define hba_read(r) (hba_state.base[r]) 137 #define hba_write(r, v) (hba_state.base[r] = (v)) 138 139 /* Port state. */ 140 static struct port_state { 141 int state; /* port state */ 142 unsigned int flags; /* port flags */ 143 144 volatile u32_t *reg; /* memory-mapped port registers */ 145 146 u8_t *mem_base; /* primary memory buffer virtual address */ 147 phys_bytes mem_phys; /* primary memory buffer physical address */ 148 vir_bytes mem_size; /* primary memory buffer size */ 149 150 /* the FIS, CL, CT[0] and TMP buffers are all in the primary buffer */ 151 u32_t *fis_base; /* FIS receive buffer virtual address */ 152 phys_bytes fis_phys; /* FIS receive buffer physical address */ 153 u32_t *cl_base; /* command list buffer virtual address */ 154 phys_bytes cl_phys; /* command list buffer physical address */ 155 u8_t *ct_base[NR_CMDS]; /* command table virtual address */ 156 phys_bytes ct_phys[NR_CMDS]; /* command table physical address */ 157 u8_t *tmp_base; /* temporary storage buffer virtual address */ 158 phys_bytes tmp_phys; /* temporary storage buffer physical address */ 159 160 u8_t *pad_base; /* sector padding buffer virtual address */ 161 phys_bytes pad_phys; /* sector padding buffer physical address */ 162 vir_bytes pad_size; /* sector padding buffer size */ 163 164 u64_t lba_count; /* number of valid Logical Block Addresses */ 165 u32_t sector_size; /* medium sector size in bytes */ 166 167 int open_count; /* number of times this port is opened */ 168 169 int device; /* associated device number, or NO_DEVICE */ 170 struct device part[DEV_PER_DRIVE]; /* partition bases and sizes */ 171 struct device subpart[SUB_PER_DRIVE]; /* same for subpartitions */ 172 173 minix_timer_t timer; /* port-specific timeout timer */ 174 int left; /* number of tries left before giving up */ 175 /* (only used for signature probing) */ 176 177 int queue_depth; /* NCQ queue depth */ 178 u32_t pend_mask; /* commands not yet complete */ 179 struct { 180 thread_id_t tid;/* ID of the worker thread */ 181 minix_timer_t timer; /* timer associated with each request */ 182 int result; /* success/failure result of the commands */ 183 } cmd_info[NR_CMDS]; 184 } port_state[NR_PORTS]; 185 186 #define port_read(ps, r) ((ps)->reg[r]) 187 #define port_write(ps, r, v) ((ps)->reg[r] = (v)) 188 189 static int ahci_instance; /* driver instance number */ 190 191 static int ahci_verbose; /* verbosity level (0..4) */ 192 193 /* Timeout-related values. */ 194 static clock_t ahci_spinup_timeout; 195 static clock_t ahci_device_timeout; 196 static clock_t ahci_device_delay; 197 static unsigned int ahci_device_checks; 198 static clock_t ahci_command_timeout; 199 static clock_t ahci_transfer_timeout; 200 static clock_t ahci_flush_timeout; 201 202 /* Timeout environment variable names and default values. */ 203 static struct { 204 char *name; /* environment variable name */ 205 u32_t default_ms; /* default in milliseconds */ 206 clock_t *ptr; /* clock ticks value pointer */ 207 } ahci_timevar[] = { 208 { "ahci_init_timeout", SPINUP_TIMEOUT, &ahci_spinup_timeout }, 209 { "ahci_device_timeout", DEVICE_TIMEOUT, &ahci_device_timeout }, 210 { "ahci_cmd_timeout", COMMAND_TIMEOUT, &ahci_command_timeout }, 211 { "ahci_io_timeout", TRANSFER_TIMEOUT, &ahci_transfer_timeout }, 212 { "ahci_flush_timeout", FLUSH_TIMEOUT, &ahci_flush_timeout } 213 }; 214 215 static int ahci_map[MAX_DRIVES]; /* device-to-port mapping */ 216 217 static int ahci_exiting = FALSE; /* exit after last close? */ 218 219 #define BUILD_ARG(port, tag) (((port) << 8) | (tag)) 220 #define GET_PORT(arg) ((arg) >> 8) 221 #define GET_TAG(arg) ((arg) & 0xFF) 222 223 #define dprintf(v,s) do { \ 224 if (ahci_verbose >= (v)) \ 225 printf s; \ 226 } while (0) 227 228 /* Convert milliseconds to clock ticks. Round up. */ 229 #define millis_to_hz(ms) (((ms) * sys_hz() + 999) / 1000) 230 231 static void port_set_cmd(struct port_state *ps, int cmd, cmd_fis_t *fis, 232 u8_t packet[ATAPI_PACKET_SIZE], prd_t *prdt, int nr_prds, int write); 233 static void port_issue(struct port_state *ps, int cmd, clock_t timeout); 234 static int port_exec(struct port_state *ps, int cmd, clock_t timeout); 235 static void port_timeout(int arg); 236 static void port_disconnect(struct port_state *ps); 237 238 static char *ahci_portname(struct port_state *ps); 239 static int ahci_open(devminor_t minor, int access); 240 static int ahci_close(devminor_t minor); 241 static ssize_t ahci_transfer(devminor_t minor, int do_write, u64_t position, 242 endpoint_t endpt, iovec_t *iovec, unsigned int count, int flags); 243 static struct device *ahci_part(devminor_t minor); 244 static void ahci_alarm(clock_t stamp); 245 static int ahci_ioctl(devminor_t minor, unsigned long request, 246 endpoint_t endpt, cp_grant_id_t grant, endpoint_t user_endpt); 247 static void ahci_intr(unsigned int mask); 248 static int ahci_device(devminor_t minor, device_id_t *id); 249 static struct port_state *ahci_get_port(devminor_t minor); 250 251 /* AHCI driver table. */ 252 static struct blockdriver ahci_dtab = { 253 .bdr_type = BLOCKDRIVER_TYPE_DISK, 254 .bdr_open = ahci_open, 255 .bdr_close = ahci_close, 256 .bdr_transfer = ahci_transfer, 257 .bdr_ioctl = ahci_ioctl, 258 .bdr_part = ahci_part, 259 .bdr_intr = ahci_intr, 260 .bdr_alarm = ahci_alarm, 261 .bdr_device = ahci_device 262 }; 263 264 /*===========================================================================* 265 * atapi_exec * 266 *===========================================================================*/ 267 static int atapi_exec(struct port_state *ps, int cmd, 268 u8_t packet[ATAPI_PACKET_SIZE], size_t size, int write) 269 { 270 /* Execute an ATAPI command. Return OK or error. 271 */ 272 cmd_fis_t fis; 273 prd_t prd[1]; 274 int nr_prds = 0; 275 276 assert(size <= AHCI_TMP_SIZE); 277 278 /* Fill in the command table with a FIS, a packet, and if a data 279 * transfer is requested, also a PRD. 280 */ 281 memset(&fis, 0, sizeof(fis)); 282 fis.cf_cmd = ATA_CMD_PACKET; 283 284 if (size > 0) { 285 fis.cf_feat = ATA_FEAT_PACKET_DMA; 286 if (!write && (ps->flags & FLAG_USE_DMADIR)) 287 fis.cf_feat |= ATA_FEAT_PACKET_DMADIR; 288 289 prd[0].vp_addr = ps->tmp_phys; 290 prd[0].vp_size = size; 291 nr_prds++; 292 } 293 294 /* Start the command, and wait for it to complete or fail. */ 295 port_set_cmd(ps, cmd, &fis, packet, prd, nr_prds, write); 296 297 return port_exec(ps, cmd, ahci_command_timeout); 298 } 299 300 /*===========================================================================* 301 * atapi_test_unit * 302 *===========================================================================*/ 303 static int atapi_test_unit(struct port_state *ps, int cmd) 304 { 305 /* Test whether the ATAPI device and medium are ready. 306 */ 307 u8_t packet[ATAPI_PACKET_SIZE]; 308 309 memset(packet, 0, sizeof(packet)); 310 packet[0] = ATAPI_CMD_TEST_UNIT; 311 312 return atapi_exec(ps, cmd, packet, 0, FALSE); 313 } 314 315 /*===========================================================================* 316 * atapi_request_sense * 317 *===========================================================================*/ 318 static int atapi_request_sense(struct port_state *ps, int cmd, int *sense) 319 { 320 /* Request error (sense) information from an ATAPI device, and return 321 * the sense key. The additional sense codes are not used at this time. 322 */ 323 u8_t packet[ATAPI_PACKET_SIZE]; 324 int r; 325 326 memset(packet, 0, sizeof(packet)); 327 packet[0] = ATAPI_CMD_REQUEST_SENSE; 328 packet[4] = ATAPI_REQUEST_SENSE_LEN; 329 330 r = atapi_exec(ps, cmd, packet, ATAPI_REQUEST_SENSE_LEN, FALSE); 331 332 if (r != OK) 333 return r; 334 335 dprintf(V_REQ, ("%s: ATAPI SENSE: sense %x ASC %x ASCQ %x\n", 336 ahci_portname(ps), ps->tmp_base[2] & 0xF, ps->tmp_base[12], 337 ps->tmp_base[13])); 338 339 *sense = ps->tmp_base[2] & 0xF; 340 341 return OK; 342 } 343 344 /*===========================================================================* 345 * atapi_load_eject * 346 *===========================================================================*/ 347 static int atapi_load_eject(struct port_state *ps, int cmd, int load) 348 { 349 /* Load or eject a medium in an ATAPI device. 350 */ 351 u8_t packet[ATAPI_PACKET_SIZE]; 352 353 memset(packet, 0, sizeof(packet)); 354 packet[0] = ATAPI_CMD_START_STOP; 355 packet[4] = load ? ATAPI_START_STOP_LOAD : ATAPI_START_STOP_EJECT; 356 357 return atapi_exec(ps, cmd, packet, 0, FALSE); 358 } 359 360 /*===========================================================================* 361 * atapi_read_capacity * 362 *===========================================================================*/ 363 static int atapi_read_capacity(struct port_state *ps, int cmd) 364 { 365 /* Retrieve the LBA count and sector size of an ATAPI medium. 366 */ 367 u8_t packet[ATAPI_PACKET_SIZE], *buf; 368 int r; 369 370 memset(packet, 0, sizeof(packet)); 371 packet[0] = ATAPI_CMD_READ_CAPACITY; 372 373 r = atapi_exec(ps, cmd, packet, ATAPI_READ_CAPACITY_LEN, FALSE); 374 if (r != OK) 375 return r; 376 377 /* Store the number of LBA blocks and sector size. */ 378 buf = ps->tmp_base; 379 ps->lba_count = (u64_t) ((buf[0] << 24) | (buf[1] << 16) | 380 (buf[2] << 8) | buf[3]) + 1; 381 ps->sector_size = 382 (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7]; 383 384 if (ps->sector_size == 0 || (ps->sector_size & 1)) { 385 dprintf(V_ERR, ("%s: invalid medium sector size %u\n", 386 ahci_portname(ps), ps->sector_size)); 387 388 return EINVAL; 389 } 390 391 dprintf(V_INFO, 392 ("%s: medium detected (%u byte sectors, %llu MB size)\n", 393 ahci_portname(ps), ps->sector_size, 394 ps->lba_count * ps->sector_size / (1024*1024))); 395 396 return OK; 397 } 398 399 /*===========================================================================* 400 * atapi_check_medium * 401 *===========================================================================*/ 402 static int atapi_check_medium(struct port_state *ps, int cmd) 403 { 404 /* Check whether a medium is present in a removable-media ATAPI device. 405 * If a new medium is detected, get its total and sector size. Return 406 * OK only if a usable medium is present, and an error otherwise. 407 */ 408 int sense; 409 410 /* Perform a readiness check. */ 411 if (atapi_test_unit(ps, cmd) != OK) { 412 ps->flags &= ~FLAG_HAS_MEDIUM; 413 414 /* If the check failed due to a unit attention condition, retry 415 * reading the medium capacity. Otherwise, assume that there is 416 * no medium available. 417 */ 418 if (atapi_request_sense(ps, cmd, &sense) != OK || 419 sense != ATAPI_SENSE_UNIT_ATT) 420 return ENXIO; 421 } 422 423 /* If a medium is newly detected, try reading its capacity now. */ 424 if (!(ps->flags & FLAG_HAS_MEDIUM)) { 425 if (atapi_read_capacity(ps, cmd) != OK) 426 return EIO; 427 428 ps->flags |= FLAG_HAS_MEDIUM; 429 } 430 431 return OK; 432 } 433 434 /*===========================================================================* 435 * atapi_id_check * 436 *===========================================================================*/ 437 static int atapi_id_check(struct port_state *ps, u16_t *buf) 438 { 439 /* Determine whether we support this ATAPI device based on the 440 * identification data it returned, and store some of its properties. 441 */ 442 443 /* The device must be an ATAPI device; it must have removable media; 444 * it must support DMA without DMADIR, or DMADIR for DMA. 445 */ 446 if ((buf[ATA_ID_GCAP] & (ATA_ID_GCAP_ATAPI_MASK | 447 ATA_ID_GCAP_REMOVABLE | ATA_ID_GCAP_INCOMPLETE)) != 448 (ATA_ID_GCAP_ATAPI | ATA_ID_GCAP_REMOVABLE) || 449 ((buf[ATA_ID_CAP] & ATA_ID_CAP_DMA) != ATA_ID_CAP_DMA && 450 (buf[ATA_ID_DMADIR] & (ATA_ID_DMADIR_DMADIR | 451 ATA_ID_DMADIR_DMA)) != (ATA_ID_DMADIR_DMADIR | 452 ATA_ID_DMADIR_DMA))) { 453 454 dprintf(V_ERR, ("%s: unsupported ATAPI device\n", 455 ahci_portname(ps))); 456 457 dprintf(V_DEV, ("%s: GCAP %04x CAP %04x DMADIR %04x\n", 458 ahci_portname(ps), buf[ATA_ID_GCAP], buf[ATA_ID_CAP], 459 buf[ATA_ID_DMADIR])); 460 461 return FALSE; 462 } 463 464 /* Remember whether to use the DMADIR flag when appropriate. */ 465 if (buf[ATA_ID_DMADIR] & ATA_ID_DMADIR_DMADIR) 466 ps->flags |= FLAG_USE_DMADIR; 467 468 /* ATAPI CD-ROM devices are considered read-only. */ 469 if (((buf[ATA_ID_GCAP] & ATA_ID_GCAP_TYPE_MASK) >> 470 ATA_ID_GCAP_TYPE_SHIFT) == ATAPI_TYPE_CDROM) 471 ps->flags |= FLAG_READONLY; 472 473 if ((buf[ATA_ID_SUP1] & ATA_ID_SUP1_VALID_MASK) == ATA_ID_SUP1_VALID && 474 !(ps->flags & FLAG_READONLY)) { 475 /* Save write cache related capabilities of the device. It is 476 * possible, although unlikely, that a device has support for 477 * either of these but not both. 478 */ 479 if (buf[ATA_ID_SUP0] & ATA_ID_SUP0_WCACHE) 480 ps->flags |= FLAG_HAS_WCACHE; 481 482 if (buf[ATA_ID_SUP1] & ATA_ID_SUP1_FLUSH) 483 ps->flags |= FLAG_HAS_FLUSH; 484 } 485 486 return TRUE; 487 } 488 489 /*===========================================================================* 490 * atapi_transfer * 491 *===========================================================================*/ 492 static int atapi_transfer(struct port_state *ps, int cmd, u64_t start_lba, 493 unsigned int count, int write, prd_t *prdt, int nr_prds) 494 { 495 /* Perform data transfer from or to an ATAPI device. 496 */ 497 cmd_fis_t fis; 498 u8_t packet[ATAPI_PACKET_SIZE]; 499 500 /* Fill in a Register Host to Device FIS. */ 501 memset(&fis, 0, sizeof(fis)); 502 fis.cf_cmd = ATA_CMD_PACKET; 503 fis.cf_feat = ATA_FEAT_PACKET_DMA; 504 if (!write && (ps->flags & FLAG_USE_DMADIR)) 505 fis.cf_feat |= ATA_FEAT_PACKET_DMADIR; 506 507 /* Fill in a packet. */ 508 memset(packet, 0, sizeof(packet)); 509 packet[0] = write ? ATAPI_CMD_WRITE : ATAPI_CMD_READ; 510 packet[2] = (start_lba >> 24) & 0xFF; 511 packet[3] = (start_lba >> 16) & 0xFF; 512 packet[4] = (start_lba >> 8) & 0xFF; 513 packet[5] = start_lba & 0xFF; 514 packet[6] = (count >> 24) & 0xFF; 515 packet[7] = (count >> 16) & 0xFF; 516 packet[8] = (count >> 8) & 0xFF; 517 packet[9] = count & 0xFF; 518 519 /* Start the command, and wait for it to complete or fail. */ 520 port_set_cmd(ps, cmd, &fis, packet, prdt, nr_prds, write); 521 522 return port_exec(ps, cmd, ahci_transfer_timeout); 523 } 524 525 /*===========================================================================* 526 * ata_id_check * 527 *===========================================================================*/ 528 static int ata_id_check(struct port_state *ps, u16_t *buf) 529 { 530 /* Determine whether we support this ATA device based on the 531 * identification data it returned, and store some of its properties. 532 */ 533 534 /* This must be an ATA device; it must not have removable media; 535 * it must support LBA and DMA; it must support the FLUSH CACHE 536 * command; it must support 48-bit addressing. 537 */ 538 if ((buf[ATA_ID_GCAP] & (ATA_ID_GCAP_ATA_MASK | ATA_ID_GCAP_REMOVABLE | 539 ATA_ID_GCAP_INCOMPLETE)) != ATA_ID_GCAP_ATA || 540 (buf[ATA_ID_CAP] & (ATA_ID_CAP_LBA | ATA_ID_CAP_DMA)) != 541 (ATA_ID_CAP_LBA | ATA_ID_CAP_DMA) || 542 (buf[ATA_ID_SUP1] & (ATA_ID_SUP1_VALID_MASK | 543 ATA_ID_SUP1_FLUSH | ATA_ID_SUP1_LBA48)) != 544 (ATA_ID_SUP1_VALID | ATA_ID_SUP1_FLUSH | ATA_ID_SUP1_LBA48)) { 545 546 dprintf(V_ERR, ("%s: unsupported ATA device\n", 547 ahci_portname(ps))); 548 549 dprintf(V_DEV, ("%s: GCAP %04x CAP %04x SUP1 %04x\n", 550 ahci_portname(ps), buf[ATA_ID_GCAP], buf[ATA_ID_CAP], 551 buf[ATA_ID_SUP1])); 552 553 return FALSE; 554 } 555 556 /* Get number of LBA blocks, and sector size. */ 557 ps->lba_count = ((u64_t) buf[ATA_ID_LBA3] << 48) | 558 ((u64_t) buf[ATA_ID_LBA2] << 32) | 559 ((u64_t) buf[ATA_ID_LBA1] << 16) | 560 (u64_t) buf[ATA_ID_LBA0]; 561 562 /* Determine the queue depth of the device. */ 563 if (hba_state.has_ncq && 564 (buf[ATA_ID_SATA_CAP] & ATA_ID_SATA_CAP_NCQ)) { 565 ps->flags |= FLAG_HAS_NCQ; 566 ps->queue_depth = 567 (buf[ATA_ID_QDEPTH] & ATA_ID_QDEPTH_MASK) + 1; 568 if (ps->queue_depth > hba_state.nr_cmds) 569 ps->queue_depth = hba_state.nr_cmds; 570 } 571 572 /* For now, we only support long logical sectors. Long physical sector 573 * support may be added later. Note that the given value is in words. 574 */ 575 if ((buf[ATA_ID_PLSS] & (ATA_ID_PLSS_VALID_MASK | ATA_ID_PLSS_LLS)) == 576 (ATA_ID_PLSS_VALID | ATA_ID_PLSS_LLS)) 577 ps->sector_size = 578 ((buf[ATA_ID_LSS1] << 16) | buf[ATA_ID_LSS0]) << 1; 579 else 580 ps->sector_size = ATA_SECTOR_SIZE; 581 582 if (ps->sector_size < ATA_SECTOR_SIZE) { 583 dprintf(V_ERR, ("%s: invalid sector size %u\n", 584 ahci_portname(ps), ps->sector_size)); 585 586 return FALSE; 587 } 588 589 ps->flags |= FLAG_HAS_MEDIUM | FLAG_HAS_FLUSH; 590 591 /* FLUSH CACHE is mandatory for ATA devices; write caches are not. */ 592 if (buf[ATA_ID_SUP0] & ATA_ID_SUP0_WCACHE) 593 ps->flags |= FLAG_HAS_WCACHE; 594 595 /* Check Force Unit Access capability of the device. */ 596 if ((buf[ATA_ID_ENA2] & (ATA_ID_ENA2_VALID_MASK | ATA_ID_ENA2_FUA)) == 597 (ATA_ID_ENA2_VALID | ATA_ID_ENA2_FUA)) 598 ps->flags |= FLAG_HAS_FUA; 599 600 return TRUE; 601 } 602 603 /*===========================================================================* 604 * ata_transfer * 605 *===========================================================================*/ 606 static int ata_transfer(struct port_state *ps, int cmd, u64_t start_lba, 607 unsigned int count, int write, int force, prd_t *prdt, int nr_prds) 608 { 609 /* Perform data transfer from or to an ATA device. 610 */ 611 cmd_fis_t fis; 612 613 assert(count <= ATA_MAX_SECTORS); 614 615 /* Special case for sector counts: 65536 is specified as 0. */ 616 if (count == ATA_MAX_SECTORS) 617 count = 0; 618 619 memset(&fis, 0, sizeof(fis)); 620 fis.cf_dev = ATA_DEV_LBA; 621 if (ps->flags & FLAG_HAS_NCQ) { 622 if (write) { 623 if (force && (ps->flags & FLAG_HAS_FUA)) 624 fis.cf_dev |= ATA_DEV_FUA; 625 626 fis.cf_cmd = ATA_CMD_WRITE_FPDMA_QUEUED; 627 } else { 628 fis.cf_cmd = ATA_CMD_READ_FPDMA_QUEUED; 629 } 630 } 631 else { 632 if (write) { 633 if (force && (ps->flags & FLAG_HAS_FUA)) 634 fis.cf_cmd = ATA_CMD_WRITE_DMA_FUA_EXT; 635 else 636 fis.cf_cmd = ATA_CMD_WRITE_DMA_EXT; 637 } 638 else { 639 fis.cf_cmd = ATA_CMD_READ_DMA_EXT; 640 } 641 } 642 fis.cf_lba = start_lba & 0x00FFFFFFUL; 643 fis.cf_lba_exp = (start_lba >> 24) & 0x00FFFFFFUL; 644 fis.cf_sec = count & 0xFF; 645 fis.cf_sec_exp = (count >> 8) & 0xFF; 646 647 /* Start the command, and wait for it to complete or fail. */ 648 port_set_cmd(ps, cmd, &fis, NULL /*packet*/, prdt, nr_prds, write); 649 650 return port_exec(ps, cmd, ahci_transfer_timeout); 651 } 652 653 /*===========================================================================* 654 * gen_identify * 655 *===========================================================================*/ 656 static int gen_identify(struct port_state *ps, int blocking) 657 { 658 /* Identify an ATA or ATAPI device. If the blocking flag is set, block 659 * until the command has completed; otherwise return immediately. 660 */ 661 cmd_fis_t fis; 662 prd_t prd; 663 664 /* Set up a command, and a single PRD for the result. */ 665 memset(&fis, 0, sizeof(fis)); 666 667 if (ps->flags & FLAG_ATAPI) 668 fis.cf_cmd = ATA_CMD_IDENTIFY_PACKET; 669 else 670 fis.cf_cmd = ATA_CMD_IDENTIFY; 671 672 prd.vp_addr = ps->tmp_phys; 673 prd.vp_size = ATA_ID_SIZE; 674 675 /* Start the command, and possibly wait for the result. */ 676 port_set_cmd(ps, 0, &fis, NULL /*packet*/, &prd, 1, FALSE /*write*/); 677 678 if (blocking) 679 return port_exec(ps, 0, ahci_command_timeout); 680 681 port_issue(ps, 0, ahci_command_timeout); 682 683 return OK; 684 } 685 686 /*===========================================================================* 687 * gen_flush_wcache * 688 *===========================================================================*/ 689 static int gen_flush_wcache(struct port_state *ps) 690 { 691 /* Flush the device's write cache. 692 */ 693 cmd_fis_t fis; 694 695 /* The FLUSH CACHE command may not be supported by all (writable ATAPI) 696 * devices. 697 */ 698 if (!(ps->flags & FLAG_HAS_FLUSH)) 699 return EINVAL; 700 701 /* Use the FLUSH CACHE command for both ATA and ATAPI. We are not 702 * interested in the disk location of a failure, so there is no reason 703 * to use the ATA-only FLUSH CACHE EXT command. Either way, the command 704 * may indeed fail due to a disk error, in which case it should be 705 * repeated. For now, we shift this responsibility onto the caller. 706 */ 707 memset(&fis, 0, sizeof(fis)); 708 fis.cf_cmd = ATA_CMD_FLUSH_CACHE; 709 710 /* Start the command, and wait for it to complete or fail. 711 * The flush command may take longer than regular I/O commands. 712 */ 713 port_set_cmd(ps, 0, &fis, NULL /*packet*/, NULL /*prdt*/, 0, 714 FALSE /*write*/); 715 716 return port_exec(ps, 0, ahci_flush_timeout); 717 } 718 719 /*===========================================================================* 720 * gen_get_wcache * 721 *===========================================================================*/ 722 static int gen_get_wcache(struct port_state *ps, int *val) 723 { 724 /* Retrieve the status of the device's write cache. 725 */ 726 int r; 727 728 /* Write caches are not mandatory. */ 729 if (!(ps->flags & FLAG_HAS_WCACHE)) 730 return EINVAL; 731 732 /* Retrieve information about the device. */ 733 if ((r = gen_identify(ps, TRUE /*blocking*/)) != OK) 734 return r; 735 736 /* Return the current setting. */ 737 *val = !!(((u16_t *) ps->tmp_base)[ATA_ID_ENA0] & ATA_ID_ENA0_WCACHE); 738 739 return OK; 740 } 741 742 /*===========================================================================* 743 * gen_set_wcache * 744 *===========================================================================*/ 745 static int gen_set_wcache(struct port_state *ps, int enable) 746 { 747 /* Enable or disable the device's write cache. 748 */ 749 cmd_fis_t fis; 750 clock_t timeout; 751 752 /* Write caches are not mandatory. */ 753 if (!(ps->flags & FLAG_HAS_WCACHE)) 754 return EINVAL; 755 756 /* Disabling the write cache causes a (blocking) cache flush. Cache 757 * flushes may take much longer than regular commands. 758 */ 759 timeout = enable ? ahci_command_timeout : ahci_flush_timeout; 760 761 /* Set up a command. */ 762 memset(&fis, 0, sizeof(fis)); 763 fis.cf_cmd = ATA_CMD_SET_FEATURES; 764 fis.cf_feat = enable ? ATA_SF_EN_WCACHE : ATA_SF_DI_WCACHE; 765 766 /* Start the command, and wait for it to complete or fail. */ 767 port_set_cmd(ps, 0, &fis, NULL /*packet*/, NULL /*prdt*/, 0, 768 FALSE /*write*/); 769 770 return port_exec(ps, 0, timeout); 771 } 772 773 /*===========================================================================* 774 * ct_set_fis * 775 *===========================================================================*/ 776 static vir_bytes ct_set_fis(u8_t *ct, cmd_fis_t *fis, unsigned int tag) 777 { 778 /* Fill in the Frame Information Structure part of a command table, 779 * and return the resulting FIS size (in bytes). We only support the 780 * command Register - Host to Device FIS type. 781 */ 782 783 memset(ct, 0, ATA_H2D_SIZE); 784 ct[ATA_FIS_TYPE] = ATA_FIS_TYPE_H2D; 785 ct[ATA_H2D_FLAGS] = ATA_H2D_FLAGS_C; 786 ct[ATA_H2D_CMD] = fis->cf_cmd; 787 ct[ATA_H2D_LBA_LOW] = fis->cf_lba & 0xFF; 788 ct[ATA_H2D_LBA_MID] = (fis->cf_lba >> 8) & 0xFF; 789 ct[ATA_H2D_LBA_HIGH] = (fis->cf_lba >> 16) & 0xFF; 790 ct[ATA_H2D_DEV] = fis->cf_dev; 791 ct[ATA_H2D_LBA_LOW_EXP] = fis->cf_lba_exp & 0xFF; 792 ct[ATA_H2D_LBA_MID_EXP] = (fis->cf_lba_exp >> 8) & 0xFF; 793 ct[ATA_H2D_LBA_HIGH_EXP] = (fis->cf_lba_exp >> 16) & 0xFF; 794 ct[ATA_H2D_CTL] = fis->cf_ctl; 795 796 if (ATA_IS_FPDMA_CMD(fis->cf_cmd)) { 797 ct[ATA_H2D_FEAT] = fis->cf_sec; 798 ct[ATA_H2D_FEAT_EXP] = fis->cf_sec_exp; 799 ct[ATA_H2D_SEC] = tag << ATA_SEC_TAG_SHIFT; 800 ct[ATA_H2D_SEC_EXP] = 0; 801 } else { 802 ct[ATA_H2D_FEAT] = fis->cf_feat; 803 ct[ATA_H2D_FEAT_EXP] = fis->cf_feat_exp; 804 ct[ATA_H2D_SEC] = fis->cf_sec; 805 ct[ATA_H2D_SEC_EXP] = fis->cf_sec_exp; 806 } 807 808 return ATA_H2D_SIZE; 809 } 810 811 /*===========================================================================* 812 * ct_set_packet * 813 *===========================================================================*/ 814 static void ct_set_packet(u8_t *ct, u8_t packet[ATAPI_PACKET_SIZE]) 815 { 816 /* Fill in the packet part of a command table. 817 */ 818 819 memcpy(&ct[AHCI_CT_PACKET_OFF], packet, ATAPI_PACKET_SIZE); 820 } 821 822 /*===========================================================================* 823 * ct_set_prdt * 824 *===========================================================================*/ 825 static void ct_set_prdt(u8_t *ct, prd_t *prdt, int nr_prds) 826 { 827 /* Fill in the PRDT part of a command table. 828 */ 829 u32_t *p; 830 int i; 831 832 p = (u32_t *) &ct[AHCI_CT_PRDT_OFF]; 833 834 for (i = 0; i < nr_prds; i++, prdt++) { 835 *p++ = prdt->vp_addr; 836 *p++ = 0; 837 *p++ = 0; 838 *p++ = prdt->vp_size - 1; 839 } 840 } 841 842 /*===========================================================================* 843 * port_set_cmd * 844 *===========================================================================*/ 845 static void port_set_cmd(struct port_state *ps, int cmd, cmd_fis_t *fis, 846 u8_t packet[ATAPI_PACKET_SIZE], prd_t *prdt, int nr_prds, int write) 847 { 848 /* Prepare the given command for execution, by constructing a command 849 * table and setting up a command list entry pointing to the table. 850 */ 851 u8_t *ct; 852 u32_t *cl; 853 vir_bytes size; 854 855 /* Set a port-specific flag that tells us if the command being 856 * processed is a NCQ command or not. 857 */ 858 if (ATA_IS_FPDMA_CMD(fis->cf_cmd)) { 859 ps->flags |= FLAG_NCQ_MODE; 860 } else { 861 assert(!ps->pend_mask); 862 ps->flags &= ~FLAG_NCQ_MODE; 863 } 864 865 /* Construct a command table, consisting of a command FIS, optionally 866 * a packet, and optionally a number of PRDs (making up the actual PRD 867 * table). 868 */ 869 ct = ps->ct_base[cmd]; 870 871 assert(ct != NULL); 872 assert(nr_prds <= NR_PRDS); 873 874 size = ct_set_fis(ct, fis, cmd); 875 876 if (packet != NULL) 877 ct_set_packet(ct, packet); 878 879 ct_set_prdt(ct, prdt, nr_prds); 880 881 /* Construct a command list entry, pointing to the command's table. 882 * Current assumptions: callers always provide a Register - Host to 883 * Device type FIS, and all non-NCQ commands are prefetchable. 884 */ 885 cl = &ps->cl_base[cmd * AHCI_CL_ENTRY_DWORDS]; 886 887 memset(cl, 0, AHCI_CL_ENTRY_SIZE); 888 cl[0] = (nr_prds << AHCI_CL_PRDTL_SHIFT) | 889 ((!ATA_IS_FPDMA_CMD(fis->cf_cmd) && 890 (nr_prds > 0 || packet != NULL)) ? AHCI_CL_PREFETCHABLE : 0) | 891 (write ? AHCI_CL_WRITE : 0) | 892 ((packet != NULL) ? AHCI_CL_ATAPI : 0) | 893 ((size / sizeof(u32_t)) << AHCI_CL_CFL_SHIFT); 894 cl[2] = ps->ct_phys[cmd]; 895 } 896 897 /*===========================================================================* 898 * port_finish_cmd * 899 *===========================================================================*/ 900 static void port_finish_cmd(struct port_state *ps, int cmd, int result) 901 { 902 /* Finish a command that has either succeeded or failed. 903 */ 904 905 assert(cmd < ps->queue_depth); 906 907 dprintf(V_REQ, ("%s: command %d %s\n", ahci_portname(ps), 908 cmd, (result == RESULT_SUCCESS) ? "succeeded" : "failed")); 909 910 /* Update the command result, and clear it from the pending list. */ 911 ps->cmd_info[cmd].result = result; 912 913 assert(ps->pend_mask & (1 << cmd)); 914 ps->pend_mask &= ~(1 << cmd); 915 916 /* Wake up the thread, unless it is the main thread. This can happen 917 * during initialization, as the gen_identify function is called by the 918 * main thread itself. 919 */ 920 if (ps->state != STATE_WAIT_ID) 921 blockdriver_mt_wakeup(ps->cmd_info[cmd].tid); 922 } 923 924 /*===========================================================================* 925 * port_fail_cmds * 926 *===========================================================================*/ 927 static void port_fail_cmds(struct port_state *ps) 928 { 929 /* Fail all ongoing commands for a device. 930 */ 931 int i; 932 933 for (i = 0; ps->pend_mask != 0 && i < ps->queue_depth; i++) 934 if (ps->pend_mask & (1 << i)) 935 port_finish_cmd(ps, i, RESULT_FAILURE); 936 } 937 938 /*===========================================================================* 939 * port_check_cmds * 940 *===========================================================================*/ 941 static void port_check_cmds(struct port_state *ps) 942 { 943 /* Check what commands have completed, and finish them. 944 */ 945 u32_t mask, done; 946 int i; 947 948 /* See which commands have completed. */ 949 if (ps->flags & FLAG_NCQ_MODE) 950 mask = port_read(ps, AHCI_PORT_SACT); 951 else 952 mask = port_read(ps, AHCI_PORT_CI); 953 954 /* Wake up threads corresponding to completed commands. */ 955 done = ps->pend_mask & ~mask; 956 957 for (i = 0; i < ps->queue_depth; i++) 958 if (done & (1 << i)) 959 port_finish_cmd(ps, i, RESULT_SUCCESS); 960 } 961 962 /*===========================================================================* 963 * port_find_cmd * 964 *===========================================================================*/ 965 static int port_find_cmd(struct port_state *ps) 966 { 967 /* Find a free command tag to queue the current request. 968 */ 969 int i; 970 971 for (i = 0; i < ps->queue_depth; i++) 972 if (!(ps->pend_mask & (1 << i))) 973 break; 974 975 /* We should always be able to find a free slot, since a thread runs 976 * only when it is free, and thus, only because a slot is available. 977 */ 978 assert(i < ps->queue_depth); 979 980 return i; 981 } 982 983 /*===========================================================================* 984 * port_get_padbuf * 985 *===========================================================================*/ 986 static int port_get_padbuf(struct port_state *ps, size_t size) 987 { 988 /* Make available a temporary buffer for use by this port. Enlarge the 989 * previous buffer if applicable and necessary, potentially changing 990 * its physical address. 991 */ 992 993 if (ps->pad_base != NULL && ps->pad_size >= size) 994 return OK; 995 996 if (ps->pad_base != NULL) 997 free_contig(ps->pad_base, ps->pad_size); 998 999 ps->pad_size = size; 1000 ps->pad_base = alloc_contig(ps->pad_size, 0, &ps->pad_phys); 1001 1002 if (ps->pad_base == NULL) { 1003 dprintf(V_ERR, ("%s: unable to allocate a padding buffer of " 1004 "size %lu\n", ahci_portname(ps), 1005 (unsigned long) size)); 1006 1007 return ENOMEM; 1008 } 1009 1010 dprintf(V_INFO, ("%s: allocated padding buffer of size %lu\n", 1011 ahci_portname(ps), (unsigned long) size)); 1012 1013 return OK; 1014 } 1015 1016 /*===========================================================================* 1017 * sum_iovec * 1018 *===========================================================================*/ 1019 static int sum_iovec(struct port_state *ps, endpoint_t endpt, 1020 iovec_s_t *iovec, int nr_req, vir_bytes *total) 1021 { 1022 /* Retrieve the total size of the given I/O vector. Check for alignment 1023 * requirements along the way. Return OK (and the total request size) 1024 * or an error. 1025 */ 1026 vir_bytes size, bytes; 1027 int i; 1028 1029 bytes = 0; 1030 1031 for (i = 0; i < nr_req; i++) { 1032 size = iovec[i].iov_size; 1033 1034 if (size == 0 || (size & 1) || size > LONG_MAX) { 1035 dprintf(V_ERR, ("%s: bad size %lu in iovec from %d\n", 1036 ahci_portname(ps), size, endpt)); 1037 return EINVAL; 1038 } 1039 1040 bytes += size; 1041 1042 if (bytes > LONG_MAX) { 1043 dprintf(V_ERR, ("%s: iovec size overflow from %d\n", 1044 ahci_portname(ps), endpt)); 1045 return EINVAL; 1046 } 1047 } 1048 1049 *total = bytes; 1050 return OK; 1051 } 1052 1053 /*===========================================================================* 1054 * setup_prdt * 1055 *===========================================================================*/ 1056 static int setup_prdt(struct port_state *ps, endpoint_t endpt, 1057 iovec_s_t *iovec, int nr_req, vir_bytes size, vir_bytes lead, 1058 int write, prd_t *prdt) 1059 { 1060 /* Convert (the first part of) an I/O vector to a Physical Region 1061 * Descriptor Table describing array that can later be used to set the 1062 * command's real PRDT. The resulting table as a whole should be 1063 * sector-aligned; leading and trailing local buffers may have to be 1064 * used for padding as appropriate. Return the number of PRD entries, 1065 * or a negative error code. 1066 */ 1067 struct vumap_vir vvec[NR_PRDS]; 1068 size_t bytes, trail; 1069 int i, r, pcount, nr_prds = 0; 1070 1071 if (lead > 0) { 1072 /* Allocate a buffer for the data we don't want. */ 1073 if ((r = port_get_padbuf(ps, ps->sector_size)) != OK) 1074 return r; 1075 1076 prdt[nr_prds].vp_addr = ps->pad_phys; 1077 prdt[nr_prds].vp_size = lead; 1078 nr_prds++; 1079 } 1080 1081 /* The sum of lead, size, trail has to be sector-aligned. */ 1082 trail = (ps->sector_size - (lead + size)) % ps->sector_size; 1083 1084 /* Get the physical addresses of the given buffers. */ 1085 for (i = 0; i < nr_req && size > 0; i++) { 1086 bytes = MIN(iovec[i].iov_size, size); 1087 1088 if (endpt == SELF) 1089 vvec[i].vv_addr = (vir_bytes) iovec[i].iov_grant; 1090 else 1091 vvec[i].vv_grant = iovec[i].iov_grant; 1092 1093 vvec[i].vv_size = bytes; 1094 1095 size -= bytes; 1096 } 1097 1098 pcount = i; 1099 1100 if ((r = sys_vumap(endpt, vvec, i, 0, write ? VUA_READ : VUA_WRITE, 1101 &prdt[nr_prds], &pcount)) != OK) { 1102 dprintf(V_ERR, ("%s: unable to map memory from %d (%d)\n", 1103 ahci_portname(ps), endpt, r)); 1104 return r; 1105 } 1106 1107 assert(pcount > 0 && pcount <= i); 1108 1109 /* Make sure all buffers are physically contiguous and word-aligned. */ 1110 for (i = 0; i < pcount; i++) { 1111 if (vvec[i].vv_size != prdt[nr_prds].vp_size) { 1112 dprintf(V_ERR, ("%s: non-contiguous memory from %d\n", 1113 ahci_portname(ps), endpt)); 1114 return EINVAL; 1115 } 1116 1117 if (prdt[nr_prds].vp_addr & 1) { 1118 dprintf(V_ERR, ("%s: bad physical address from %d\n", 1119 ahci_portname(ps), endpt)); 1120 return EINVAL; 1121 } 1122 1123 nr_prds++; 1124 } 1125 1126 if (trail > 0) { 1127 assert(nr_prds < NR_PRDS); 1128 prdt[nr_prds].vp_addr = ps->pad_phys + lead; 1129 prdt[nr_prds].vp_size = trail; 1130 nr_prds++; 1131 } 1132 1133 return nr_prds; 1134 } 1135 1136 /*===========================================================================* 1137 * port_transfer * 1138 *===========================================================================*/ 1139 static ssize_t port_transfer(struct port_state *ps, u64_t pos, u64_t eof, 1140 endpoint_t endpt, iovec_s_t *iovec, int nr_req, int write, int flags) 1141 { 1142 /* Perform an I/O transfer on a port. 1143 */ 1144 prd_t prdt[NR_PRDS]; 1145 vir_bytes size, lead; 1146 unsigned int count, nr_prds; 1147 u64_t start_lba; 1148 int r, cmd; 1149 1150 /* Get the total request size from the I/O vector. */ 1151 if ((r = sum_iovec(ps, endpt, iovec, nr_req, &size)) != OK) 1152 return r; 1153 1154 dprintf(V_REQ, ("%s: %s for %lu bytes at pos %llx\n", 1155 ahci_portname(ps), write ? "write" : "read", size, pos)); 1156 1157 assert(ps->state == STATE_GOOD_DEV); 1158 assert(ps->flags & FLAG_HAS_MEDIUM); 1159 assert(ps->sector_size > 0); 1160 1161 /* Limit the maximum size of a single transfer. 1162 * See the comments at the top of this file for details. 1163 */ 1164 if (size > MAX_TRANSFER) 1165 size = MAX_TRANSFER; 1166 1167 /* If necessary, reduce the request size so that the request does not 1168 * extend beyond the end of the partition. The caller already 1169 * guarantees that the starting position lies within the partition. 1170 */ 1171 if (pos + size > eof) 1172 size = (vir_bytes) (eof - pos); 1173 1174 start_lba = pos / ps->sector_size; 1175 lead = (vir_bytes) (pos % ps->sector_size); 1176 count = (lead + size + ps->sector_size - 1) / ps->sector_size; 1177 1178 /* Position must be word-aligned for read requests, and sector-aligned 1179 * for write requests. We do not support read-modify-write for writes. 1180 */ 1181 if ((lead & 1) || (write && lead != 0)) { 1182 dprintf(V_ERR, ("%s: unaligned position from %d\n", 1183 ahci_portname(ps), endpt)); 1184 return EINVAL; 1185 } 1186 1187 /* Write requests must be sector-aligned. Word alignment of the size is 1188 * already guaranteed by sum_iovec(). 1189 */ 1190 if (write && (size % ps->sector_size) != 0) { 1191 dprintf(V_ERR, ("%s: unaligned size %lu from %d\n", 1192 ahci_portname(ps), size, endpt)); 1193 return EINVAL; 1194 } 1195 1196 /* Create a vector of physical addresses and sizes for the transfer. */ 1197 nr_prds = r = setup_prdt(ps, endpt, iovec, nr_req, size, lead, write, 1198 prdt); 1199 1200 if (r < 0) return r; 1201 1202 /* Perform the actual transfer. */ 1203 cmd = port_find_cmd(ps); 1204 1205 if (ps->flags & FLAG_ATAPI) 1206 r = atapi_transfer(ps, cmd, start_lba, count, write, prdt, 1207 nr_prds); 1208 else 1209 r = ata_transfer(ps, cmd, start_lba, count, write, 1210 !!(flags & BDEV_FORCEWRITE), prdt, nr_prds); 1211 1212 if (r != OK) return r; 1213 1214 return size; 1215 } 1216 1217 /*===========================================================================* 1218 * port_hardreset * 1219 *===========================================================================*/ 1220 static void port_hardreset(struct port_state *ps) 1221 { 1222 /* Perform a port-level (hard) reset on the given port. 1223 */ 1224 1225 port_write(ps, AHCI_PORT_SCTL, AHCI_PORT_SCTL_DET_INIT); 1226 1227 micro_delay(COMRESET_DELAY * 1000); /* COMRESET_DELAY is in ms */ 1228 1229 port_write(ps, AHCI_PORT_SCTL, AHCI_PORT_SCTL_DET_NONE); 1230 } 1231 1232 /*===========================================================================* 1233 * port_override * 1234 *===========================================================================*/ 1235 static void port_override(struct port_state *ps) 1236 { 1237 /* Override the port's BSY and/or DRQ flags. This may only be done 1238 * prior to starting the port. 1239 */ 1240 u32_t cmd; 1241 1242 cmd = port_read(ps, AHCI_PORT_CMD); 1243 port_write(ps, AHCI_PORT_CMD, cmd | AHCI_PORT_CMD_CLO); 1244 1245 SPIN_UNTIL(!(port_read(ps, AHCI_PORT_CMD) & AHCI_PORT_CMD_CLO), 1246 PORTREG_DELAY); 1247 1248 dprintf(V_INFO, ("%s: overridden\n", ahci_portname(ps))); 1249 } 1250 1251 /*===========================================================================* 1252 * port_start * 1253 *===========================================================================*/ 1254 static void port_start(struct port_state *ps) 1255 { 1256 /* Start the given port, allowing for the execution of commands and the 1257 * transfer of data on that port. 1258 */ 1259 u32_t cmd; 1260 1261 /* Reset status registers. */ 1262 port_write(ps, AHCI_PORT_SERR, ~0); 1263 port_write(ps, AHCI_PORT_IS, ~0); 1264 1265 /* Start the port. */ 1266 cmd = port_read(ps, AHCI_PORT_CMD); 1267 port_write(ps, AHCI_PORT_CMD, cmd | AHCI_PORT_CMD_ST); 1268 1269 dprintf(V_INFO, ("%s: started\n", ahci_portname(ps))); 1270 } 1271 1272 /*===========================================================================* 1273 * port_stop * 1274 *===========================================================================*/ 1275 static void port_stop(struct port_state *ps) 1276 { 1277 /* Stop the given port, if not already stopped. 1278 */ 1279 u32_t cmd; 1280 1281 cmd = port_read(ps, AHCI_PORT_CMD); 1282 1283 if (cmd & (AHCI_PORT_CMD_CR | AHCI_PORT_CMD_ST)) { 1284 port_write(ps, AHCI_PORT_CMD, cmd & ~AHCI_PORT_CMD_ST); 1285 1286 SPIN_UNTIL(!(port_read(ps, AHCI_PORT_CMD) & AHCI_PORT_CMD_CR), 1287 PORTREG_DELAY); 1288 1289 dprintf(V_INFO, ("%s: stopped\n", ahci_portname(ps))); 1290 } 1291 } 1292 1293 /*===========================================================================* 1294 * port_restart * 1295 *===========================================================================*/ 1296 static void port_restart(struct port_state *ps) 1297 { 1298 /* Restart a port after a fatal error has occurred. 1299 */ 1300 1301 /* Fail all outstanding commands. */ 1302 port_fail_cmds(ps); 1303 1304 /* Stop the port. */ 1305 port_stop(ps); 1306 1307 /* If the BSY and/or DRQ flags are set, reset the port. */ 1308 if (port_read(ps, AHCI_PORT_TFD) & 1309 (AHCI_PORT_TFD_STS_BSY | AHCI_PORT_TFD_STS_DRQ)) { 1310 1311 dprintf(V_ERR, ("%s: port reset\n", ahci_portname(ps))); 1312 1313 /* To keep this driver simple, we do not transparently recover 1314 * ongoing requests. Instead, we mark the failing device as 1315 * disconnected, and reset it. If the reset succeeds, the 1316 * device (or, perhaps, eventually, another device) will come 1317 * back up. Any current and future requests to this port will 1318 * be failed until the port is fully closed and reopened. 1319 */ 1320 port_disconnect(ps); 1321 1322 /* Trigger a port reset. */ 1323 port_hardreset(ps); 1324 1325 return; 1326 } 1327 1328 /* Start the port. */ 1329 port_start(ps); 1330 } 1331 1332 /*===========================================================================* 1333 * print_string * 1334 *===========================================================================*/ 1335 static void print_string(u16_t *buf, int start, int end) 1336 { 1337 /* Print a string that is stored as little-endian words and padded with 1338 * trailing spaces. 1339 */ 1340 int i, last = 0; 1341 1342 while (end >= start && buf[end] == 0x2020) end--; 1343 1344 if (end >= start && (buf[end] & 0xFF) == 0x20) end--, last++; 1345 1346 for (i = start; i <= end; i++) 1347 printf("%c%c", buf[i] >> 8, buf[i] & 0xFF); 1348 1349 if (last) 1350 printf("%c", buf[i] >> 8); 1351 } 1352 1353 /*===========================================================================* 1354 * port_id_check * 1355 *===========================================================================*/ 1356 static void port_id_check(struct port_state *ps, int success) 1357 { 1358 /* The device identification command has either completed or timed out. 1359 * Decide whether this device is usable or not, and store some of its 1360 * properties. 1361 */ 1362 u16_t *buf; 1363 1364 assert(ps->state == STATE_WAIT_ID); 1365 1366 ps->flags &= ~FLAG_BUSY; 1367 cancel_timer(&ps->cmd_info[0].timer); 1368 1369 if (!success) { 1370 if (!(ps->flags & FLAG_ATAPI) && 1371 port_read(ps, AHCI_PORT_SIG) != ATA_SIG_ATA) { 1372 dprintf(V_INFO, ("%s: may not be ATA, trying ATAPI\n", 1373 ahci_portname(ps))); 1374 1375 ps->flags |= FLAG_ATAPI; 1376 1377 (void) gen_identify(ps, FALSE /*blocking*/); 1378 return; 1379 } 1380 1381 dprintf(V_ERR, 1382 ("%s: unable to identify\n", ahci_portname(ps))); 1383 } 1384 1385 /* If the identify command itself succeeded, check the results and 1386 * store some properties. 1387 */ 1388 if (success) { 1389 buf = (u16_t *) ps->tmp_base; 1390 1391 if (ps->flags & FLAG_ATAPI) 1392 success = atapi_id_check(ps, buf); 1393 else 1394 success = ata_id_check(ps, buf); 1395 } 1396 1397 /* If the device has not been identified successfully, mark it as an 1398 * unusable device. 1399 */ 1400 if (!success) { 1401 port_stop(ps); 1402 1403 ps->state = STATE_BAD_DEV; 1404 port_write(ps, AHCI_PORT_IE, AHCI_PORT_IE_PRCE); 1405 1406 return; 1407 } 1408 1409 /* The device has been identified successfully, and hence usable. */ 1410 ps->state = STATE_GOOD_DEV; 1411 1412 /* Print some information about the device. */ 1413 if (ahci_verbose >= V_INFO) { 1414 printf("%s: ATA%s, ", ahci_portname(ps), 1415 (ps->flags & FLAG_ATAPI) ? "PI" : ""); 1416 print_string(buf, 27, 46); 1417 if (ahci_verbose >= V_DEV) { 1418 printf(" ("); 1419 print_string(buf, 10, 19); 1420 printf(", "); 1421 print_string(buf, 23, 26); 1422 printf(")"); 1423 } 1424 1425 if (ps->flags & FLAG_HAS_MEDIUM) 1426 printf(", %u byte sectors, %llu MB size", 1427 ps->sector_size, 1428 ps->lba_count * ps->sector_size / (1024*1024)); 1429 1430 printf("\n"); 1431 } 1432 } 1433 1434 /*===========================================================================* 1435 * port_connect * 1436 *===========================================================================*/ 1437 static void port_connect(struct port_state *ps) 1438 { 1439 /* A device has been found to be attached to this port. Start the port, 1440 * and do timed polling for its signature to become available. 1441 */ 1442 u32_t status, sig; 1443 1444 dprintf(V_INFO, ("%s: device connected\n", ahci_portname(ps))); 1445 1446 port_start(ps); 1447 1448 /* The next check covers a purely hypothetical race condition, where 1449 * the device would disappear right before we try to start it. This is 1450 * possible because we have to clear PxSERR, and with that, the DIAG.N 1451 * bit. Double-check the port status, and if it is not as we expect, 1452 * infer a disconnection. 1453 */ 1454 status = port_read(ps, AHCI_PORT_SSTS) & AHCI_PORT_SSTS_DET_MASK; 1455 1456 if (status != AHCI_PORT_SSTS_DET_PHY) { 1457 dprintf(V_ERR, ("%s: device vanished!\n", ahci_portname(ps))); 1458 1459 port_stop(ps); 1460 1461 ps->state = STATE_NO_DEV; 1462 ps->flags &= ~FLAG_BUSY; 1463 1464 return; 1465 } 1466 1467 /* Clear all state flags except the busy flag, which may be relevant if 1468 * a BDEV_OPEN call is waiting for the device to become ready; the 1469 * barrier flag, which prevents access to the device until it is 1470 * completely closed and (re)opened; and, the thread suspension flag. 1471 */ 1472 ps->flags &= (FLAG_BUSY | FLAG_BARRIER | FLAG_SUSPENDED); 1473 1474 /* Check the port's signature. We only use the signature to speed up 1475 * identification; we will try both ATA and ATAPI if the signature is 1476 * neither ATA nor ATAPI. 1477 */ 1478 sig = port_read(ps, AHCI_PORT_SIG); 1479 1480 if (sig == ATA_SIG_ATAPI) 1481 ps->flags |= FLAG_ATAPI; 1482 1483 /* Attempt to identify the device. Do this using continuation, because 1484 * we may already be called from port_wait() here, and could end up 1485 * confusing the timer expiration procedure. 1486 */ 1487 ps->state = STATE_WAIT_ID; 1488 port_write(ps, AHCI_PORT_IE, AHCI_PORT_IE_MASK); 1489 1490 (void) gen_identify(ps, FALSE /*blocking*/); 1491 } 1492 1493 /*===========================================================================* 1494 * port_disconnect * 1495 *===========================================================================*/ 1496 static void port_disconnect(struct port_state *ps) 1497 { 1498 /* The device has detached from this port. It has already been stopped. 1499 */ 1500 1501 dprintf(V_INFO, ("%s: device disconnected\n", ahci_portname(ps))); 1502 1503 ps->state = STATE_NO_DEV; 1504 port_write(ps, AHCI_PORT_IE, AHCI_PORT_IE_PCE); 1505 ps->flags &= ~FLAG_BUSY; 1506 1507 /* Fail any ongoing request. The caller may already have done this. */ 1508 port_fail_cmds(ps); 1509 1510 /* Block any further access until the device is completely closed and 1511 * reopened. This prevents arbitrary I/O to a newly plugged-in device 1512 * without upper layers noticing. 1513 */ 1514 ps->flags |= FLAG_BARRIER; 1515 1516 /* Inform the blockdriver library to reduce the number of threads. */ 1517 blockdriver_mt_set_workers(ps->device, 1); 1518 } 1519 1520 /*===========================================================================* 1521 * port_dev_check * 1522 *===========================================================================*/ 1523 static void port_dev_check(struct port_state *ps) 1524 { 1525 /* Perform device detection by means of polling. 1526 */ 1527 u32_t status, tfd; 1528 1529 assert(ps->state == STATE_WAIT_DEV); 1530 1531 status = port_read(ps, AHCI_PORT_SSTS) & AHCI_PORT_SSTS_DET_MASK; 1532 1533 dprintf(V_DEV, ("%s: polled status %u\n", ahci_portname(ps), status)); 1534 1535 switch (status) { 1536 case AHCI_PORT_SSTS_DET_PHY: 1537 tfd = port_read(ps, AHCI_PORT_TFD); 1538 1539 /* If a Phy connection has been established, and the BSY and 1540 * DRQ flags are cleared, the device is ready. 1541 */ 1542 if (!(tfd & (AHCI_PORT_TFD_STS_BSY | AHCI_PORT_TFD_STS_DRQ))) { 1543 port_connect(ps); 1544 1545 return; 1546 } 1547 1548 /* fall-through */ 1549 case AHCI_PORT_SSTS_DET_DET: 1550 /* A device has been detected, but it is not ready yet. Try for 1551 * a while before giving up. This may take seconds. 1552 */ 1553 if (ps->left > 0) { 1554 ps->left--; 1555 set_timer(&ps->cmd_info[0].timer, ahci_device_delay, 1556 port_timeout, BUILD_ARG(ps - port_state, 0)); 1557 return; 1558 } 1559 } 1560 1561 dprintf(V_INFO, ("%s: device not ready\n", ahci_portname(ps))); 1562 1563 /* We get here on timeout, and if the HBA reports that there is no 1564 * device present at all. In all cases, we change to another state. 1565 */ 1566 if (status == AHCI_PORT_SSTS_DET_PHY) { 1567 /* Some devices may not correctly clear BSY/DRQ. Upon timeout, 1568 * if we can override these flags, do so and start the 1569 * identification process anyway. 1570 */ 1571 if (hba_state.has_clo) { 1572 port_override(ps); 1573 1574 port_connect(ps); 1575 1576 return; 1577 } 1578 1579 /* A device is present and initialized, but not ready. */ 1580 ps->state = STATE_BAD_DEV; 1581 port_write(ps, AHCI_PORT_IE, AHCI_PORT_IE_PRCE); 1582 } else { 1583 /* A device may or may not be present, but it does not appear 1584 * to be ready in any case. Ignore it until the next device 1585 * initialization event. 1586 */ 1587 ps->state = STATE_NO_DEV; 1588 ps->flags &= ~FLAG_BUSY; 1589 } 1590 } 1591 1592 /*===========================================================================* 1593 * port_intr * 1594 *===========================================================================*/ 1595 static void port_intr(struct port_state *ps) 1596 { 1597 /* Process an interrupt on this port. 1598 */ 1599 u32_t smask, emask; 1600 int success; 1601 1602 if (ps->state == STATE_NO_PORT) { 1603 dprintf(V_ERR, ("%s: interrupt for invalid port!\n", 1604 ahci_portname(ps))); 1605 1606 return; 1607 } 1608 1609 smask = port_read(ps, AHCI_PORT_IS); 1610 emask = smask & port_read(ps, AHCI_PORT_IE); 1611 1612 /* Clear the interrupt flags that we saw were set. */ 1613 port_write(ps, AHCI_PORT_IS, smask); 1614 1615 dprintf(V_REQ, ("%s: interrupt (%08x)\n", ahci_portname(ps), smask)); 1616 1617 /* Check if any commands have completed. */ 1618 port_check_cmds(ps); 1619 1620 if (emask & AHCI_PORT_IS_PCS) { 1621 /* Clear the X diagnostics bit to clear this interrupt. */ 1622 port_write(ps, AHCI_PORT_SERR, AHCI_PORT_SERR_DIAG_X); 1623 1624 dprintf(V_DEV, ("%s: device attached\n", ahci_portname(ps))); 1625 1626 switch (ps->state) { 1627 case STATE_SPIN_UP: 1628 case STATE_NO_DEV: 1629 /* Reportedly, a device has shown up. Start polling its 1630 * status until it has become ready. 1631 */ 1632 1633 if (ps->state == STATE_SPIN_UP) 1634 cancel_timer(&ps->cmd_info[0].timer); 1635 1636 ps->state = STATE_WAIT_DEV; 1637 ps->left = ahci_device_checks; 1638 1639 port_dev_check(ps); 1640 1641 break; 1642 1643 case STATE_WAIT_DEV: 1644 /* Nothing else to do. */ 1645 break; 1646 1647 default: 1648 /* Impossible. */ 1649 assert(0); 1650 } 1651 } else if (emask & AHCI_PORT_IS_PRCS) { 1652 /* Clear the N diagnostics bit to clear this interrupt. */ 1653 port_write(ps, AHCI_PORT_SERR, AHCI_PORT_SERR_DIAG_N); 1654 1655 dprintf(V_DEV, ("%s: device detached\n", ahci_portname(ps))); 1656 1657 switch (ps->state) { 1658 case STATE_WAIT_ID: 1659 case STATE_GOOD_DEV: 1660 /* The device is no longer ready. Stop the port, cancel 1661 * ongoing requests, and disconnect the device. 1662 */ 1663 port_stop(ps); 1664 1665 /* fall-through */ 1666 case STATE_BAD_DEV: 1667 port_disconnect(ps); 1668 1669 /* The device has become unusable to us at this point. 1670 * Reset the port to make sure that once the device (or 1671 * another device) becomes usable again, we will get a 1672 * PCS interrupt as well. 1673 */ 1674 port_hardreset(ps); 1675 1676 break; 1677 1678 default: 1679 /* Impossible. */ 1680 assert(0); 1681 } 1682 } else if (smask & AHCI_PORT_IS_MASK) { 1683 /* We assume that any other interrupt indicates command 1684 * completion or (command or device) failure. Unfortunately, if 1685 * an NCQ command failed, we cannot easily determine which one 1686 * it was. For that reason, after completing all successfully 1687 * finished commands (above), we fail all other outstanding 1688 * commands and restart the port. This can possibly be improved 1689 * later by obtaining per-command status results from the HBA. 1690 */ 1691 1692 success = !(port_read(ps, AHCI_PORT_TFD) & 1693 (AHCI_PORT_TFD_STS_ERR | AHCI_PORT_TFD_STS_DF)); 1694 1695 /* Check now for failure. There are fatal failures, and there 1696 * are failures that set the TFD.STS.ERR field using a D2H 1697 * FIS. In both cases, we just restart the port, failing all 1698 * commands in the process. 1699 */ 1700 if ((port_read(ps, AHCI_PORT_TFD) & 1701 (AHCI_PORT_TFD_STS_ERR | AHCI_PORT_TFD_STS_DF)) || 1702 (smask & AHCI_PORT_IS_RESTART)) { 1703 port_restart(ps); 1704 } 1705 1706 /* If we were waiting for ID verification, check now. */ 1707 if (ps->state == STATE_WAIT_ID) 1708 port_id_check(ps, success); 1709 } 1710 } 1711 1712 /*===========================================================================* 1713 * port_timeout * 1714 *===========================================================================*/ 1715 static void port_timeout(int arg) 1716 { 1717 /* A timeout has occurred on this port. Figure out what the timeout is 1718 * for, and take appropriate action. 1719 */ 1720 struct port_state *ps; 1721 int port, cmd; 1722 1723 port = GET_PORT(arg); 1724 cmd = GET_TAG(arg); 1725 1726 assert(port >= 0 && port < hba_state.nr_ports); 1727 1728 ps = &port_state[port]; 1729 1730 /* Regardless of the outcome of this timeout, wake up the thread if it 1731 * is suspended. This applies only during the initialization. 1732 */ 1733 if (ps->flags & FLAG_SUSPENDED) { 1734 assert(cmd == 0); 1735 blockdriver_mt_wakeup(ps->cmd_info[0].tid); 1736 } 1737 1738 /* If detection of a device after startup timed out, give up on initial 1739 * detection and only look for hot plug events from now on. 1740 */ 1741 if (ps->state == STATE_SPIN_UP) { 1742 /* One exception: if the PCS interrupt bit is set here, then we 1743 * are probably running on VirtualBox, which is currently not 1744 * always raising interrupts when setting interrupt bits (!). 1745 */ 1746 if (port_read(ps, AHCI_PORT_IS) & AHCI_PORT_IS_PCS) { 1747 dprintf(V_INFO, ("%s: bad controller, no interrupt\n", 1748 ahci_portname(ps))); 1749 1750 ps->state = STATE_WAIT_DEV; 1751 ps->left = ahci_device_checks; 1752 1753 port_dev_check(ps); 1754 1755 return; 1756 } else { 1757 dprintf(V_INFO, ("%s: spin-up timeout\n", 1758 ahci_portname(ps))); 1759 1760 /* If the busy flag is set, a BDEV_OPEN request is 1761 * waiting for the detection to finish; clear the busy 1762 * flag to return an error to the caller. 1763 */ 1764 ps->state = STATE_NO_DEV; 1765 ps->flags &= ~FLAG_BUSY; 1766 } 1767 1768 return; 1769 } 1770 1771 /* If we are waiting for a device to become connected and initialized, 1772 * check now. 1773 */ 1774 if (ps->state == STATE_WAIT_DEV) { 1775 port_dev_check(ps); 1776 1777 return; 1778 } 1779 1780 dprintf(V_ERR, ("%s: timeout\n", ahci_portname(ps))); 1781 1782 /* Restart the port, failing all current commands. */ 1783 port_restart(ps); 1784 1785 /* Finish up the identify operation. */ 1786 if (ps->state == STATE_WAIT_ID) 1787 port_id_check(ps, FALSE); 1788 } 1789 1790 /*===========================================================================* 1791 * port_wait * 1792 *===========================================================================*/ 1793 static void port_wait(struct port_state *ps) 1794 { 1795 /* Suspend the current thread until the given port is no longer busy, 1796 * due to either command completion or timeout. 1797 */ 1798 1799 ps->flags |= FLAG_SUSPENDED; 1800 1801 while (ps->flags & FLAG_BUSY) 1802 blockdriver_mt_sleep(); 1803 1804 ps->flags &= ~FLAG_SUSPENDED; 1805 } 1806 1807 /*===========================================================================* 1808 * port_issue * 1809 *===========================================================================*/ 1810 static void port_issue(struct port_state *ps, int cmd, clock_t timeout) 1811 { 1812 /* Issue a command to the port, and set a timer to trigger a timeout 1813 * if the command takes too long to complete. 1814 */ 1815 1816 /* Set the corresponding NCQ command bit, if applicable. */ 1817 if (ps->flags & FLAG_HAS_NCQ) 1818 port_write(ps, AHCI_PORT_SACT, 1 << cmd); 1819 1820 /* Make sure that the compiler does not delay any previous write 1821 * operations until after the write to the command issue register. 1822 */ 1823 __insn_barrier(); 1824 1825 /* Tell the controller that a new command is ready. */ 1826 port_write(ps, AHCI_PORT_CI, 1 << cmd); 1827 1828 /* Update pending commands. */ 1829 ps->pend_mask |= 1 << cmd; 1830 1831 /* Set a timer in case the command does not complete at all. */ 1832 set_timer(&ps->cmd_info[cmd].timer, timeout, port_timeout, 1833 BUILD_ARG(ps - port_state, cmd)); 1834 } 1835 1836 /*===========================================================================* 1837 * port_exec * 1838 *===========================================================================*/ 1839 static int port_exec(struct port_state *ps, int cmd, clock_t timeout) 1840 { 1841 /* Execute a command on a port, wait for the command to complete or for 1842 * a timeout, and return whether the command succeeded or not. 1843 */ 1844 1845 port_issue(ps, cmd, timeout); 1846 1847 /* Put the thread to sleep until a timeout or a command completion 1848 * happens. Earlier, we used to call port_wait which set the suspended 1849 * flag. We now abandon it since the flag has to work on a per-thread, 1850 * and hence per-tag basis and not on a per-port basis. Instead, we 1851 * retain that call only to defer open calls during device/driver 1852 * initialization. Instead, we call sleep here directly. Before 1853 * sleeping, we register the thread. 1854 */ 1855 ps->cmd_info[cmd].tid = blockdriver_mt_get_tid(); 1856 1857 blockdriver_mt_sleep(); 1858 1859 /* Cancelling a timer that just triggered, does no harm. */ 1860 cancel_timer(&ps->cmd_info[cmd].timer); 1861 1862 assert(!(ps->flags & FLAG_BUSY)); 1863 1864 dprintf(V_REQ, ("%s: end of command -- %s\n", ahci_portname(ps), 1865 (ps->cmd_info[cmd].result == RESULT_FAILURE) ? 1866 "failure" : "success")); 1867 1868 if (ps->cmd_info[cmd].result == RESULT_FAILURE) 1869 return EIO; 1870 1871 return OK; 1872 } 1873 1874 /*===========================================================================* 1875 * port_alloc * 1876 *===========================================================================*/ 1877 static void port_alloc(struct port_state *ps) 1878 { 1879 /* Allocate memory for the given port, and enable FIS receipt. We try 1880 * to cram everything into one 4K-page in order to limit memory usage 1881 * as much as possible. More memory may be allocated on demand later, 1882 * but allocation failure should be fatal only here. Note that we do 1883 * not allocate memory for sector padding here, because we do not know 1884 * the device's sector size yet. 1885 */ 1886 size_t fis_off, tmp_off, ct_off; int i; 1887 size_t ct_offs[NR_CMDS]; 1888 u32_t cmd; 1889 1890 fis_off = AHCI_CL_SIZE + AHCI_FIS_SIZE - 1; 1891 fis_off -= fis_off % AHCI_FIS_SIZE; 1892 1893 tmp_off = fis_off + AHCI_FIS_SIZE + AHCI_TMP_ALIGN - 1; 1894 tmp_off -= tmp_off % AHCI_TMP_ALIGN; 1895 1896 /* Allocate memory for all the commands. */ 1897 ct_off = tmp_off + AHCI_TMP_SIZE; 1898 for (i = 0; i < NR_CMDS; i++) { 1899 ct_off += AHCI_CT_ALIGN - 1; 1900 ct_off -= ct_off % AHCI_CT_ALIGN; 1901 ct_offs[i] = ct_off; 1902 ps->mem_size = ct_off + AHCI_CT_SIZE; 1903 ct_off = ps->mem_size; 1904 } 1905 1906 ps->mem_base = alloc_contig(ps->mem_size, AC_ALIGN4K, &ps->mem_phys); 1907 if (ps->mem_base == NULL) 1908 panic("unable to allocate port memory"); 1909 memset(ps->mem_base, 0, ps->mem_size); 1910 1911 ps->cl_base = (u32_t *) ps->mem_base; 1912 ps->cl_phys = ps->mem_phys; 1913 assert(ps->cl_phys % AHCI_CL_SIZE == 0); 1914 1915 ps->fis_base = (u32_t *) (ps->mem_base + fis_off); 1916 ps->fis_phys = ps->mem_phys + fis_off; 1917 assert(ps->fis_phys % AHCI_FIS_SIZE == 0); 1918 1919 ps->tmp_base = (u8_t *) (ps->mem_base + tmp_off); 1920 ps->tmp_phys = ps->mem_phys + tmp_off; 1921 assert(ps->tmp_phys % AHCI_TMP_ALIGN == 0); 1922 1923 for (i = 0; i < NR_CMDS; i++) { 1924 ps->ct_base[i] = ps->mem_base + ct_offs[i]; 1925 ps->ct_phys[i] = ps->mem_phys + ct_offs[i]; 1926 assert(ps->ct_phys[i] % AHCI_CT_ALIGN == 0); 1927 } 1928 1929 /* Tell the controller about some of the physical addresses. */ 1930 port_write(ps, AHCI_PORT_FBU, 0); 1931 port_write(ps, AHCI_PORT_FB, ps->fis_phys); 1932 1933 port_write(ps, AHCI_PORT_CLBU, 0); 1934 port_write(ps, AHCI_PORT_CLB, ps->cl_phys); 1935 1936 /* Enable FIS receive. */ 1937 cmd = port_read(ps, AHCI_PORT_CMD); 1938 port_write(ps, AHCI_PORT_CMD, cmd | AHCI_PORT_CMD_FRE); 1939 1940 ps->pad_base = NULL; 1941 ps->pad_size = 0; 1942 } 1943 1944 /*===========================================================================* 1945 * port_free * 1946 *===========================================================================*/ 1947 static void port_free(struct port_state *ps) 1948 { 1949 /* Disable FIS receipt for the given port, and free previously 1950 * allocated memory. 1951 */ 1952 u32_t cmd; 1953 1954 /* Disable FIS receive. */ 1955 cmd = port_read(ps, AHCI_PORT_CMD); 1956 1957 if (cmd & (AHCI_PORT_CMD_FR | AHCI_PORT_CMD_FRE)) { 1958 port_write(ps, AHCI_PORT_CMD, cmd & ~AHCI_PORT_CMD_FRE); 1959 1960 SPIN_UNTIL(!(port_read(ps, AHCI_PORT_CMD) & AHCI_PORT_CMD_FR), 1961 PORTREG_DELAY); 1962 } 1963 1964 if (ps->pad_base != NULL) 1965 free_contig(ps->pad_base, ps->pad_size); 1966 1967 free_contig(ps->mem_base, ps->mem_size); 1968 } 1969 1970 /*===========================================================================* 1971 * port_init * 1972 *===========================================================================*/ 1973 static void port_init(struct port_state *ps) 1974 { 1975 /* Initialize the given port. 1976 */ 1977 u32_t cmd; 1978 int i; 1979 1980 /* Initialize the port state structure. */ 1981 ps->queue_depth = 1; 1982 ps->state = STATE_SPIN_UP; 1983 ps->flags = FLAG_BUSY; 1984 ps->sector_size = 0; 1985 ps->open_count = 0; 1986 ps->pend_mask = 0; 1987 for (i = 0; i < NR_CMDS; i++) 1988 init_timer(&ps->cmd_info[i].timer); 1989 1990 ps->reg = (u32_t *) ((u8_t *) hba_state.base + 1991 AHCI_MEM_BASE_SIZE + AHCI_MEM_PORT_SIZE * (ps - port_state)); 1992 1993 /* Allocate memory for the port. */ 1994 port_alloc(ps); 1995 1996 /* Just listen for device connection events for now. */ 1997 port_write(ps, AHCI_PORT_IE, AHCI_PORT_IE_PCE); 1998 1999 /* Enable device spin-up for HBAs that support staggered spin-up. 2000 * This is a no-op for HBAs that do not support it. 2001 */ 2002 cmd = port_read(ps, AHCI_PORT_CMD); 2003 port_write(ps, AHCI_PORT_CMD, cmd | AHCI_PORT_CMD_SUD); 2004 2005 /* Trigger a port reset. */ 2006 port_hardreset(ps); 2007 2008 set_timer(&ps->cmd_info[0].timer, ahci_spinup_timeout, 2009 port_timeout, BUILD_ARG(ps - port_state, 0)); 2010 } 2011 2012 /*===========================================================================* 2013 * ahci_probe * 2014 *===========================================================================*/ 2015 static int ahci_probe(int skip) 2016 { 2017 /* Find a matching PCI device. 2018 */ 2019 int r, devind; 2020 u16_t vid, did; 2021 2022 pci_init(); 2023 2024 r = pci_first_dev(&devind, &vid, &did); 2025 if (r <= 0) 2026 return -1; 2027 2028 while (skip--) { 2029 r = pci_next_dev(&devind, &vid, &did); 2030 if (r <= 0) 2031 return -1; 2032 } 2033 2034 pci_reserve(devind); 2035 2036 return devind; 2037 } 2038 2039 /*===========================================================================* 2040 * ahci_reset * 2041 *===========================================================================*/ 2042 static void ahci_reset(void) 2043 { 2044 /* Reset the HBA. Do not enable AHCI mode afterwards. 2045 */ 2046 u32_t ghc; 2047 2048 ghc = hba_read(AHCI_HBA_GHC); 2049 2050 hba_write(AHCI_HBA_GHC, ghc | AHCI_HBA_GHC_AE); 2051 2052 hba_write(AHCI_HBA_GHC, ghc | AHCI_HBA_GHC_AE | AHCI_HBA_GHC_HR); 2053 2054 SPIN_UNTIL(!(hba_read(AHCI_HBA_GHC) & AHCI_HBA_GHC_HR), RESET_DELAY); 2055 2056 if (hba_read(AHCI_HBA_GHC) & AHCI_HBA_GHC_HR) 2057 panic("unable to reset HBA"); 2058 } 2059 2060 /*===========================================================================* 2061 * ahci_init * 2062 *===========================================================================*/ 2063 static void ahci_init(int devind) 2064 { 2065 /* Initialize the device. 2066 */ 2067 u32_t base, size, cap, ghc, mask; 2068 int r, port, ioflag; 2069 2070 if ((r = pci_get_bar(devind, PCI_BAR_6, &base, &size, &ioflag)) != OK) 2071 panic("unable to retrieve BAR: %d", r); 2072 2073 if (ioflag) 2074 panic("invalid BAR type"); 2075 2076 /* There must be at least one port, and at most NR_PORTS ports. Limit 2077 * the actual total number of ports to the size of the exposed area. 2078 */ 2079 if (size < AHCI_MEM_BASE_SIZE + AHCI_MEM_PORT_SIZE) 2080 panic("HBA memory size too small: %u", size); 2081 2082 size = MIN(size, AHCI_MEM_BASE_SIZE + AHCI_MEM_PORT_SIZE * NR_PORTS); 2083 2084 hba_state.nr_ports = (size - AHCI_MEM_BASE_SIZE) / AHCI_MEM_PORT_SIZE; 2085 2086 /* Map the register area into local memory. */ 2087 hba_state.base = (u32_t *) vm_map_phys(SELF, (void *) base, size); 2088 hba_state.size = size; 2089 if (hba_state.base == MAP_FAILED) 2090 panic("unable to map HBA memory"); 2091 2092 /* Retrieve, allocate and enable the controller's IRQ. */ 2093 hba_state.irq = pci_attr_r8(devind, PCI_ILR); 2094 hba_state.hook_id = 0; 2095 2096 if ((r = sys_irqsetpolicy(hba_state.irq, 0, &hba_state.hook_id)) != OK) 2097 panic("unable to register IRQ: %d", r); 2098 2099 if ((r = sys_irqenable(&hba_state.hook_id)) != OK) 2100 panic("unable to enable IRQ: %d", r); 2101 2102 /* Reset the HBA. */ 2103 ahci_reset(); 2104 2105 /* Enable AHCI and interrupts. */ 2106 ghc = hba_read(AHCI_HBA_GHC); 2107 hba_write(AHCI_HBA_GHC, ghc | AHCI_HBA_GHC_AE | AHCI_HBA_GHC_IE); 2108 2109 /* Limit the maximum number of commands to the controller's value. */ 2110 /* Note that we currently use only one command anyway. */ 2111 cap = hba_read(AHCI_HBA_CAP); 2112 hba_state.has_ncq = !!(cap & AHCI_HBA_CAP_SNCQ); 2113 hba_state.has_clo = !!(cap & AHCI_HBA_CAP_SCLO); 2114 hba_state.nr_cmds = MIN(NR_CMDS, 2115 ((cap >> AHCI_HBA_CAP_NCS_SHIFT) & AHCI_HBA_CAP_NCS_MASK) + 1); 2116 2117 dprintf(V_INFO, ("AHCI%u: HBA v%d.%d%d, %ld ports, %ld commands, " 2118 "%s queuing, IRQ %d\n", 2119 ahci_instance, 2120 (int) (hba_read(AHCI_HBA_VS) >> 16), 2121 (int) ((hba_read(AHCI_HBA_VS) >> 8) & 0xFF), 2122 (int) (hba_read(AHCI_HBA_VS) & 0xFF), 2123 ((cap >> AHCI_HBA_CAP_NP_SHIFT) & AHCI_HBA_CAP_NP_MASK) + 1, 2124 ((cap >> AHCI_HBA_CAP_NCS_SHIFT) & AHCI_HBA_CAP_NCS_MASK) + 1, 2125 hba_state.has_ncq ? "supports" : "no", hba_state.irq)); 2126 2127 dprintf(V_INFO, ("AHCI%u: CAP %08x, CAP2 %08x, PI %08x\n", 2128 ahci_instance, cap, hba_read(AHCI_HBA_CAP2), 2129 hba_read(AHCI_HBA_PI))); 2130 2131 /* Initialize each of the implemented ports. We ignore CAP.NP. */ 2132 mask = hba_read(AHCI_HBA_PI); 2133 2134 for (port = 0; port < hba_state.nr_ports; port++) { 2135 port_state[port].device = NO_DEVICE; 2136 port_state[port].state = STATE_NO_PORT; 2137 2138 if (mask & (1 << port)) 2139 port_init(&port_state[port]); 2140 } 2141 } 2142 2143 /*===========================================================================* 2144 * ahci_stop * 2145 *===========================================================================*/ 2146 static void ahci_stop(void) 2147 { 2148 /* Disable AHCI, and clean up resources to the extent possible. 2149 */ 2150 struct port_state *ps; 2151 int r, port; 2152 2153 for (port = 0; port < hba_state.nr_ports; port++) { 2154 ps = &port_state[port]; 2155 2156 if (ps->state != STATE_NO_PORT) { 2157 port_stop(ps); 2158 2159 port_free(ps); 2160 } 2161 } 2162 2163 ahci_reset(); 2164 2165 if ((r = vm_unmap_phys(SELF, (void *) hba_state.base, 2166 hba_state.size)) != OK) 2167 panic("unable to unmap HBA memory: %d", r); 2168 2169 if ((r = sys_irqrmpolicy(&hba_state.hook_id)) != OK) 2170 panic("unable to deregister IRQ: %d", r); 2171 } 2172 2173 /*===========================================================================* 2174 * ahci_alarm * 2175 *===========================================================================*/ 2176 static void ahci_alarm(clock_t stamp) 2177 { 2178 /* Process an alarm. 2179 */ 2180 2181 /* Call the port-specific handler for each port that timed out. */ 2182 expire_timers(stamp); 2183 } 2184 2185 /*===========================================================================* 2186 * ahci_intr * 2187 *===========================================================================*/ 2188 static void ahci_intr(unsigned int UNUSED(mask)) 2189 { 2190 /* Process an interrupt. 2191 */ 2192 struct port_state *ps; 2193 u32_t mask; 2194 int r, port; 2195 2196 /* Handle an interrupt for each port that has the interrupt bit set. */ 2197 mask = hba_read(AHCI_HBA_IS); 2198 2199 for (port = 0; port < hba_state.nr_ports; port++) { 2200 if (mask & (1 << port)) { 2201 ps = &port_state[port]; 2202 2203 port_intr(ps); 2204 2205 /* After processing an interrupt, wake up the device 2206 * thread if it is suspended and now no longer busy. 2207 */ 2208 if ((ps->flags & (FLAG_SUSPENDED | FLAG_BUSY)) == 2209 FLAG_SUSPENDED) 2210 blockdriver_mt_wakeup(ps->cmd_info[0].tid); 2211 } 2212 } 2213 2214 /* Clear the bits that we processed. */ 2215 hba_write(AHCI_HBA_IS, mask); 2216 2217 /* Reenable the interrupt. */ 2218 if ((r = sys_irqenable(&hba_state.hook_id)) != OK) 2219 panic("unable to enable IRQ: %d", r); 2220 } 2221 2222 /*===========================================================================* 2223 * ahci_get_params * 2224 *===========================================================================*/ 2225 static void ahci_get_params(void) 2226 { 2227 /* Retrieve and parse parameters passed to this driver, except the 2228 * device-to-port mapping, which has to be parsed later. 2229 */ 2230 long v; 2231 unsigned int i; 2232 2233 /* Find out which driver instance we are. */ 2234 v = 0; 2235 (void) env_parse("instance", "d", 0, &v, 0, 255); 2236 ahci_instance = (int) v; 2237 2238 /* Initialize the verbosity level. */ 2239 v = V_ERR; 2240 (void) env_parse("ahci_verbose", "d", 0, &v, V_NONE, V_REQ); 2241 ahci_verbose = (int) v; 2242 2243 /* Initialize timeout-related values. */ 2244 for (i = 0; i < sizeof(ahci_timevar) / sizeof(ahci_timevar[0]); i++) { 2245 v = ahci_timevar[i].default_ms; 2246 2247 (void) env_parse(ahci_timevar[i].name, "d", 0, &v, 1, 2248 LONG_MAX); 2249 2250 *ahci_timevar[i].ptr = millis_to_hz(v); 2251 } 2252 2253 ahci_device_delay = millis_to_hz(DEVICE_DELAY); 2254 ahci_device_checks = (ahci_device_timeout + ahci_device_delay - 1) / 2255 ahci_device_delay; 2256 } 2257 2258 /*===========================================================================* 2259 * ahci_set_mapping * 2260 *===========================================================================*/ 2261 static void ahci_set_mapping(void) 2262 { 2263 /* Construct a mapping from device nodes to port numbers. 2264 */ 2265 char key[16], val[32], *p; 2266 unsigned int port; 2267 int i, j; 2268 2269 /* Start off with a mapping that includes implemented ports only, in 2270 * order. We choose this mapping over an identity mapping to maximize 2271 * the chance that the user will be able to access the first MAX_DRIVES 2272 * devices. Note that we can only do this after initializing the HBA. 2273 */ 2274 for (i = j = 0; i < NR_PORTS && j < MAX_DRIVES; i++) 2275 if (port_state[i].state != STATE_NO_PORT) 2276 ahci_map[j++] = i; 2277 2278 for ( ; j < MAX_DRIVES; j++) 2279 ahci_map[j] = NO_PORT; 2280 2281 /* See if the user specified a custom mapping. Unlike all other 2282 * configuration options, this is a per-instance setting. 2283 */ 2284 strlcpy(key, "ahci0_map", sizeof(key)); 2285 key[4] += ahci_instance; 2286 2287 if (env_get_param(key, val, sizeof(val)) == OK) { 2288 /* Parse the mapping, which is assumed to be a comma-separated 2289 * list of zero-based port numbers. 2290 */ 2291 p = val; 2292 2293 for (i = 0; i < MAX_DRIVES; i++) { 2294 if (*p) { 2295 port = (unsigned int) strtoul(p, &p, 0); 2296 2297 if (*p) p++; 2298 2299 ahci_map[i] = port % NR_PORTS; 2300 } 2301 else ahci_map[i] = NO_PORT; 2302 } 2303 } 2304 2305 /* Create a reverse mapping. */ 2306 for (i = 0; i < MAX_DRIVES; i++) 2307 if ((j = ahci_map[i]) != NO_PORT) 2308 port_state[j].device = i; 2309 } 2310 2311 /*===========================================================================* 2312 * sef_cb_init_fresh * 2313 *===========================================================================*/ 2314 static int sef_cb_init_fresh(int type, sef_init_info_t *UNUSED(info)) 2315 { 2316 /* Initialize the driver. 2317 */ 2318 int devind; 2319 2320 /* Get command line parameters. */ 2321 ahci_get_params(); 2322 2323 /* Probe for recognized devices, skipping matches as appropriate. */ 2324 devind = ahci_probe(ahci_instance); 2325 2326 if (devind < 0) 2327 panic("no matching device found"); 2328 2329 /* Initialize the device we found. */ 2330 ahci_init(devind); 2331 2332 /* Create a mapping from device nodes to port numbers. */ 2333 ahci_set_mapping(); 2334 2335 /* Announce that we are up. */ 2336 blockdriver_announce(type); 2337 2338 return OK; 2339 } 2340 2341 /*===========================================================================* 2342 * sef_cb_signal_handler * 2343 *===========================================================================*/ 2344 static void sef_cb_signal_handler(int signo) 2345 { 2346 /* In case of a termination signal, shut down this driver. 2347 */ 2348 int port; 2349 2350 if (signo != SIGTERM) return; 2351 2352 /* If any ports are still opened, assume that the system is being shut 2353 * down, and stay up until the last device has been closed. 2354 */ 2355 ahci_exiting = TRUE; 2356 2357 for (port = 0; port < hba_state.nr_ports; port++) 2358 if (port_state[port].open_count > 0) 2359 return; 2360 2361 /* If not, stop the driver and exit immediately. */ 2362 ahci_stop(); 2363 2364 exit(0); 2365 } 2366 2367 /*===========================================================================* 2368 * sef_local_startup * 2369 *===========================================================================*/ 2370 static void sef_local_startup(void) 2371 { 2372 /* Set callbacks and initialize the System Event Framework (SEF). 2373 */ 2374 2375 /* Register init callbacks. */ 2376 sef_setcb_init_fresh(sef_cb_init_fresh); 2377 2378 /* Register signal callbacks. */ 2379 sef_setcb_signal_handler(sef_cb_signal_handler); 2380 2381 /* Enable support for live update. */ 2382 blockdriver_mt_support_lu(); 2383 2384 /* Let SEF perform startup. */ 2385 sef_startup(); 2386 } 2387 2388 /*===========================================================================* 2389 * ahci_portname * 2390 *===========================================================================*/ 2391 static char *ahci_portname(struct port_state *ps) 2392 { 2393 /* Return a printable name for the given port. Whenever we can, print a 2394 * "Dx" device number rather than a "Pxx" port number, because the user 2395 * may not be aware of the mapping currently in use. 2396 */ 2397 static char name[] = "AHCI0-P00"; 2398 2399 name[4] = '0' + ahci_instance; 2400 2401 if (ps->device == NO_DEVICE) { 2402 name[6] = 'P'; 2403 name[7] = '0' + (ps - port_state) / 10; 2404 name[8] = '0' + (ps - port_state) % 10; 2405 } 2406 else { 2407 name[6] = 'D'; 2408 name[7] = '0' + ps->device; 2409 name[8] = 0; 2410 } 2411 2412 return name; 2413 } 2414 2415 /*===========================================================================* 2416 * ahci_map_minor * 2417 *===========================================================================*/ 2418 static struct port_state *ahci_map_minor(devminor_t minor, struct device **dvp) 2419 { 2420 /* Map a minor device number to a port and a pointer to the partition's 2421 * device structure. Return NULL if this minor device number does not 2422 * identify an actual device. 2423 */ 2424 struct port_state *ps; 2425 int port; 2426 2427 ps = NULL; 2428 2429 if (minor >= 0 && minor < NR_MINORS) { 2430 port = ahci_map[minor / DEV_PER_DRIVE]; 2431 2432 if (port == NO_PORT) 2433 return NULL; 2434 2435 ps = &port_state[port]; 2436 *dvp = &ps->part[minor % DEV_PER_DRIVE]; 2437 } 2438 else if ((unsigned) (minor -= MINOR_d0p0s0) < NR_SUBDEVS) { 2439 port = ahci_map[minor / SUB_PER_DRIVE]; 2440 2441 if (port == NO_PORT) 2442 return NULL; 2443 2444 ps = &port_state[port]; 2445 *dvp = &ps->subpart[minor % SUB_PER_DRIVE]; 2446 } 2447 2448 return ps; 2449 } 2450 2451 /*===========================================================================* 2452 * ahci_part * 2453 *===========================================================================*/ 2454 static struct device *ahci_part(devminor_t minor) 2455 { 2456 /* Return a pointer to the partition information structure of the given 2457 * minor device. 2458 */ 2459 struct device *dv; 2460 2461 if (ahci_map_minor(minor, &dv) == NULL) 2462 return NULL; 2463 2464 return dv; 2465 } 2466 2467 /*===========================================================================* 2468 * ahci_open * 2469 *===========================================================================*/ 2470 static int ahci_open(devminor_t minor, int access) 2471 { 2472 /* Open a device. 2473 */ 2474 struct port_state *ps; 2475 int r; 2476 2477 ps = ahci_get_port(minor); 2478 2479 /* Only one open request can be processed at a time, due to the fact 2480 * that it is an exclusive operation. The thread that handles this call 2481 * can therefore freely register itself at slot zero. 2482 */ 2483 ps->cmd_info[0].tid = blockdriver_mt_get_tid(); 2484 2485 /* If we are still in the process of initializing this port or device, 2486 * wait for completion of that phase first. 2487 */ 2488 if (ps->flags & FLAG_BUSY) 2489 port_wait(ps); 2490 2491 /* The device may only be opened if it is now properly functioning. */ 2492 if (ps->state != STATE_GOOD_DEV) 2493 return ENXIO; 2494 2495 /* Some devices may only be opened in read-only mode. */ 2496 if ((ps->flags & FLAG_READONLY) && (access & BDEV_W_BIT)) 2497 return EACCES; 2498 2499 if (ps->open_count == 0) { 2500 /* The first open request. Clear the barrier flag, if set. */ 2501 ps->flags &= ~FLAG_BARRIER; 2502 2503 /* Recheck media only when nobody is using the device. */ 2504 if ((ps->flags & FLAG_ATAPI) && 2505 (r = atapi_check_medium(ps, 0)) != OK) 2506 return r; 2507 2508 /* After rechecking the media, the partition table must always 2509 * be read. This is also a convenient time to do it for 2510 * nonremovable devices. Start by resetting the partition 2511 * tables and setting the working size of the entire device. 2512 */ 2513 memset(ps->part, 0, sizeof(ps->part)); 2514 memset(ps->subpart, 0, sizeof(ps->subpart)); 2515 2516 ps->part[0].dv_size = ps->lba_count * ps->sector_size; 2517 2518 partition(&ahci_dtab, ps->device * DEV_PER_DRIVE, P_PRIMARY, 2519 !!(ps->flags & FLAG_ATAPI)); 2520 2521 blockdriver_mt_set_workers(ps->device, ps->queue_depth); 2522 } 2523 else { 2524 /* If the barrier flag is set, deny new open requests until the 2525 * device is fully closed first. 2526 */ 2527 if (ps->flags & FLAG_BARRIER) 2528 return ENXIO; 2529 } 2530 2531 ps->open_count++; 2532 2533 return OK; 2534 } 2535 2536 /*===========================================================================* 2537 * ahci_close * 2538 *===========================================================================*/ 2539 static int ahci_close(devminor_t minor) 2540 { 2541 /* Close a device. 2542 */ 2543 struct port_state *ps; 2544 int port; 2545 2546 ps = ahci_get_port(minor); 2547 2548 /* Decrease the open count. */ 2549 if (ps->open_count <= 0) { 2550 dprintf(V_ERR, ("%s: closing already-closed port\n", 2551 ahci_portname(ps))); 2552 2553 return EINVAL; 2554 } 2555 2556 ps->open_count--; 2557 2558 if (ps->open_count > 0) 2559 return OK; 2560 2561 /* The device is now fully closed. That also means that the threads for 2562 * this device are not needed anymore, so we reduce the count to one. 2563 */ 2564 blockdriver_mt_set_workers(ps->device, 1); 2565 2566 if (ps->state == STATE_GOOD_DEV && !(ps->flags & FLAG_BARRIER)) { 2567 dprintf(V_INFO, ("%s: flushing write cache\n", 2568 ahci_portname(ps))); 2569 2570 (void) gen_flush_wcache(ps); 2571 } 2572 2573 /* If the entire driver has been told to terminate, check whether all 2574 * devices are now closed. If so, tell libblockdriver to quit after 2575 * replying to the close request. 2576 */ 2577 if (ahci_exiting) { 2578 for (port = 0; port < hba_state.nr_ports; port++) 2579 if (port_state[port].open_count > 0) 2580 break; 2581 2582 if (port == hba_state.nr_ports) { 2583 ahci_stop(); 2584 2585 blockdriver_mt_terminate(); 2586 } 2587 } 2588 2589 return OK; 2590 } 2591 2592 /*===========================================================================* 2593 * ahci_transfer * 2594 *===========================================================================*/ 2595 static ssize_t ahci_transfer(devminor_t minor, int do_write, u64_t position, 2596 endpoint_t endpt, iovec_t *iovec, unsigned int count, int flags) 2597 { 2598 /* Perform data transfer on the selected device. 2599 */ 2600 struct port_state *ps; 2601 struct device *dv; 2602 u64_t pos, eof; 2603 2604 ps = ahci_get_port(minor); 2605 dv = ahci_part(minor); 2606 2607 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER)) 2608 return EIO; 2609 2610 if (count > NR_IOREQS) 2611 return EINVAL; 2612 2613 /* Check for basic end-of-partition condition: if the start position of 2614 * the request is outside the partition, return success immediately. 2615 * The size of the request is obtained, and possibly reduced, later. 2616 */ 2617 if (position >= dv->dv_size) 2618 return OK; 2619 2620 pos = dv->dv_base + position; 2621 eof = dv->dv_base + dv->dv_size; 2622 2623 return port_transfer(ps, pos, eof, endpt, (iovec_s_t *) iovec, count, 2624 do_write, flags); 2625 } 2626 2627 /*===========================================================================* 2628 * ahci_ioctl * 2629 *===========================================================================*/ 2630 static int ahci_ioctl(devminor_t minor, unsigned long request, 2631 endpoint_t endpt, cp_grant_id_t grant, endpoint_t UNUSED(user_endpt)) 2632 { 2633 /* Process I/O control requests. 2634 */ 2635 struct port_state *ps; 2636 int r, val; 2637 2638 ps = ahci_get_port(minor); 2639 2640 switch (request) { 2641 case DIOCEJECT: 2642 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER)) 2643 return EIO; 2644 2645 if (!(ps->flags & FLAG_ATAPI)) 2646 return EINVAL; 2647 2648 return atapi_load_eject(ps, 0, FALSE /*load*/); 2649 2650 case DIOCOPENCT: 2651 return sys_safecopyto(endpt, grant, 0, 2652 (vir_bytes) &ps->open_count, sizeof(ps->open_count)); 2653 2654 case DIOCFLUSH: 2655 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER)) 2656 return EIO; 2657 2658 return gen_flush_wcache(ps); 2659 2660 case DIOCSETWC: 2661 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER)) 2662 return EIO; 2663 2664 if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &val, 2665 sizeof(val))) != OK) 2666 return r; 2667 2668 return gen_set_wcache(ps, val); 2669 2670 case DIOCGETWC: 2671 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER)) 2672 return EIO; 2673 2674 if ((r = gen_get_wcache(ps, &val)) != OK) 2675 return r; 2676 2677 return sys_safecopyto(endpt, grant, 0, (vir_bytes) &val, 2678 sizeof(val)); 2679 } 2680 2681 return ENOTTY; 2682 } 2683 2684 /*===========================================================================* 2685 * ahci_device * 2686 *===========================================================================*/ 2687 static int ahci_device(devminor_t minor, device_id_t *id) 2688 { 2689 /* Map a minor device number to a device ID. 2690 */ 2691 struct port_state *ps; 2692 struct device *dv; 2693 2694 if ((ps = ahci_map_minor(minor, &dv)) == NULL) 2695 return ENXIO; 2696 2697 *id = ps->device; 2698 2699 return OK; 2700 } 2701 2702 /*===========================================================================* 2703 * ahci_get_port * 2704 *===========================================================================*/ 2705 static struct port_state *ahci_get_port(devminor_t minor) 2706 { 2707 /* Get the port structure associated with the given minor device. 2708 * Called only from worker threads, so the minor device is already 2709 * guaranteed to map to a port. 2710 */ 2711 struct port_state *ps; 2712 struct device *dv; 2713 2714 if ((ps = ahci_map_minor(minor, &dv)) == NULL) 2715 panic("device mapping for minor %d disappeared", minor); 2716 2717 return ps; 2718 } 2719 2720 /*===========================================================================* 2721 * main * 2722 *===========================================================================*/ 2723 int main(int argc, char **argv) 2724 { 2725 /* Driver task. 2726 */ 2727 2728 env_setargs(argc, argv); 2729 sef_local_startup(); 2730 2731 blockdriver_mt_task(&ahci_dtab); 2732 2733 return 0; 2734 } 2735