1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (c) 2014-2016 Christoph Hellwig. 4 */ 5 #include <linux/sunrpc/svc.h> 6 #include <linux/blkdev.h> 7 #include <linux/nfs4.h> 8 #include <linux/nfs_fs.h> 9 #include <linux/nfs_xdr.h> 10 #include <linux/pr.h> 11 12 #include "blocklayout.h" 13 14 #define NFSDBG_FACILITY NFSDBG_PNFS_LD 15 16 static void bl_unregister_scsi(struct pnfs_block_dev *dev) 17 { 18 struct block_device *bdev = file_bdev(dev->bdev_file); 19 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 20 21 if (!test_and_clear_bit(PNFS_BDEV_REGISTERED, &dev->flags)) 22 return; 23 24 if (ops->pr_register(bdev, dev->pr_key, 0, false)) 25 pr_err("failed to unregister PR key.\n"); 26 } 27 28 static bool bl_register_scsi(struct pnfs_block_dev *dev) 29 { 30 struct block_device *bdev = file_bdev(dev->bdev_file); 31 const struct pr_ops *ops = bdev->bd_disk->fops->pr_ops; 32 int status; 33 34 if (test_and_set_bit(PNFS_BDEV_REGISTERED, &dev->flags)) 35 return true; 36 37 status = ops->pr_register(bdev, 0, dev->pr_key, true); 38 if (status) { 39 pr_err("pNFS: failed to register key for block device %s.", 40 bdev->bd_disk->disk_name); 41 return false; 42 } 43 return true; 44 } 45 46 static void bl_unregister_dev(struct pnfs_block_dev *dev) 47 { 48 u32 i; 49 50 if (dev->nr_children) { 51 for (i = 0; i < dev->nr_children; i++) 52 bl_unregister_dev(&dev->children[i]); 53 return; 54 } 55 56 if (dev->type == PNFS_BLOCK_VOLUME_SCSI) 57 bl_unregister_scsi(dev); 58 } 59 60 bool bl_register_dev(struct pnfs_block_dev *dev) 61 { 62 u32 i; 63 64 if (dev->nr_children) { 65 for (i = 0; i < dev->nr_children; i++) { 66 if (!bl_register_dev(&dev->children[i])) { 67 while (i > 0) 68 bl_unregister_dev(&dev->children[--i]); 69 return false; 70 } 71 } 72 return true; 73 } 74 75 if (dev->type == PNFS_BLOCK_VOLUME_SCSI) 76 return bl_register_scsi(dev); 77 return true; 78 } 79 80 static void 81 bl_free_device(struct pnfs_block_dev *dev) 82 { 83 bl_unregister_dev(dev); 84 85 if (dev->nr_children) { 86 int i; 87 88 for (i = 0; i < dev->nr_children; i++) 89 bl_free_device(&dev->children[i]); 90 kfree(dev->children); 91 } else { 92 if (dev->bdev_file) 93 fput(dev->bdev_file); 94 } 95 } 96 97 void 98 bl_free_deviceid_node(struct nfs4_deviceid_node *d) 99 { 100 struct pnfs_block_dev *dev = 101 container_of(d, struct pnfs_block_dev, node); 102 103 bl_free_device(dev); 104 kfree_rcu(dev, node.rcu); 105 } 106 107 static int 108 nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b) 109 { 110 __be32 *p; 111 int i; 112 113 p = xdr_inline_decode(xdr, 4); 114 if (!p) 115 return -EIO; 116 b->type = be32_to_cpup(p++); 117 118 switch (b->type) { 119 case PNFS_BLOCK_VOLUME_SIMPLE: 120 p = xdr_inline_decode(xdr, 4); 121 if (!p) 122 return -EIO; 123 b->simple.nr_sigs = be32_to_cpup(p++); 124 if (!b->simple.nr_sigs || b->simple.nr_sigs > PNFS_BLOCK_MAX_UUIDS) { 125 dprintk("Bad signature count: %d\n", b->simple.nr_sigs); 126 return -EIO; 127 } 128 129 b->simple.len = 4 + 4; 130 for (i = 0; i < b->simple.nr_sigs; i++) { 131 p = xdr_inline_decode(xdr, 8 + 4); 132 if (!p) 133 return -EIO; 134 p = xdr_decode_hyper(p, &b->simple.sigs[i].offset); 135 b->simple.sigs[i].sig_len = be32_to_cpup(p++); 136 if (b->simple.sigs[i].sig_len > PNFS_BLOCK_UUID_LEN) { 137 pr_info("signature too long: %d\n", 138 b->simple.sigs[i].sig_len); 139 return -EIO; 140 } 141 142 p = xdr_inline_decode(xdr, b->simple.sigs[i].sig_len); 143 if (!p) 144 return -EIO; 145 memcpy(&b->simple.sigs[i].sig, p, 146 b->simple.sigs[i].sig_len); 147 148 b->simple.len += 8 + 4 + \ 149 (XDR_QUADLEN(b->simple.sigs[i].sig_len) << 2); 150 } 151 break; 152 case PNFS_BLOCK_VOLUME_SLICE: 153 p = xdr_inline_decode(xdr, 8 + 8 + 4); 154 if (!p) 155 return -EIO; 156 p = xdr_decode_hyper(p, &b->slice.start); 157 p = xdr_decode_hyper(p, &b->slice.len); 158 b->slice.volume = be32_to_cpup(p++); 159 break; 160 case PNFS_BLOCK_VOLUME_CONCAT: 161 p = xdr_inline_decode(xdr, 4); 162 if (!p) 163 return -EIO; 164 165 b->concat.volumes_count = be32_to_cpup(p++); 166 if (b->concat.volumes_count > PNFS_BLOCK_MAX_DEVICES) { 167 dprintk("Too many volumes: %d\n", b->concat.volumes_count); 168 return -EIO; 169 } 170 171 p = xdr_inline_decode(xdr, b->concat.volumes_count * 4); 172 if (!p) 173 return -EIO; 174 for (i = 0; i < b->concat.volumes_count; i++) 175 b->concat.volumes[i] = be32_to_cpup(p++); 176 break; 177 case PNFS_BLOCK_VOLUME_STRIPE: 178 p = xdr_inline_decode(xdr, 8 + 4); 179 if (!p) 180 return -EIO; 181 182 p = xdr_decode_hyper(p, &b->stripe.chunk_size); 183 b->stripe.volumes_count = be32_to_cpup(p++); 184 if (b->stripe.volumes_count > PNFS_BLOCK_MAX_DEVICES) { 185 dprintk("Too many volumes: %d\n", b->stripe.volumes_count); 186 return -EIO; 187 } 188 189 p = xdr_inline_decode(xdr, b->stripe.volumes_count * 4); 190 if (!p) 191 return -EIO; 192 for (i = 0; i < b->stripe.volumes_count; i++) 193 b->stripe.volumes[i] = be32_to_cpup(p++); 194 break; 195 case PNFS_BLOCK_VOLUME_SCSI: 196 p = xdr_inline_decode(xdr, 4 + 4 + 4); 197 if (!p) 198 return -EIO; 199 b->scsi.code_set = be32_to_cpup(p++); 200 b->scsi.designator_type = be32_to_cpup(p++); 201 b->scsi.designator_len = be32_to_cpup(p++); 202 p = xdr_inline_decode(xdr, b->scsi.designator_len); 203 if (!p) 204 return -EIO; 205 if (b->scsi.designator_len > 256) 206 return -EIO; 207 memcpy(&b->scsi.designator, p, b->scsi.designator_len); 208 p = xdr_inline_decode(xdr, 8); 209 if (!p) 210 return -EIO; 211 p = xdr_decode_hyper(p, &b->scsi.pr_key); 212 break; 213 default: 214 dprintk("unknown volume type!\n"); 215 return -EIO; 216 } 217 218 return 0; 219 } 220 221 static bool bl_map_simple(struct pnfs_block_dev *dev, u64 offset, 222 struct pnfs_block_dev_map *map) 223 { 224 map->start = dev->start; 225 map->len = dev->len; 226 map->disk_offset = dev->disk_offset; 227 map->bdev = file_bdev(dev->bdev_file); 228 return true; 229 } 230 231 static bool bl_map_concat(struct pnfs_block_dev *dev, u64 offset, 232 struct pnfs_block_dev_map *map) 233 { 234 int i; 235 236 for (i = 0; i < dev->nr_children; i++) { 237 struct pnfs_block_dev *child = &dev->children[i]; 238 239 if (child->start > offset || 240 child->start + child->len <= offset) 241 continue; 242 243 child->map(child, offset - child->start, map); 244 return true; 245 } 246 247 dprintk("%s: ran off loop!\n", __func__); 248 return false; 249 } 250 251 static bool bl_map_stripe(struct pnfs_block_dev *dev, u64 offset, 252 struct pnfs_block_dev_map *map) 253 { 254 struct pnfs_block_dev *child; 255 u64 chunk; 256 u32 chunk_idx; 257 u64 disk_offset; 258 259 chunk = div_u64(offset, dev->chunk_size); 260 div_u64_rem(chunk, dev->nr_children, &chunk_idx); 261 262 if (chunk_idx >= dev->nr_children) { 263 dprintk("%s: invalid chunk idx %d (%lld/%lld)\n", 264 __func__, chunk_idx, offset, dev->chunk_size); 265 /* error, should not happen */ 266 return false; 267 } 268 269 /* truncate offset to the beginning of the stripe */ 270 offset = chunk * dev->chunk_size; 271 272 /* disk offset of the stripe */ 273 disk_offset = div_u64(offset, dev->nr_children); 274 275 child = &dev->children[chunk_idx]; 276 child->map(child, disk_offset, map); 277 278 map->start += offset; 279 map->disk_offset += disk_offset; 280 map->len = dev->chunk_size; 281 return true; 282 } 283 284 static int 285 bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, 286 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask); 287 288 289 static int 290 bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d, 291 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) 292 { 293 struct pnfs_block_volume *v = &volumes[idx]; 294 struct file *bdev_file; 295 dev_t dev; 296 297 dev = bl_resolve_deviceid(server, v, gfp_mask); 298 if (!dev) 299 return -EIO; 300 301 bdev_file = bdev_file_open_by_dev(dev, BLK_OPEN_READ | BLK_OPEN_WRITE, 302 NULL, NULL); 303 if (IS_ERR(bdev_file)) { 304 printk(KERN_WARNING "pNFS: failed to open device %d:%d (%ld)\n", 305 MAJOR(dev), MINOR(dev), PTR_ERR(bdev_file)); 306 return PTR_ERR(bdev_file); 307 } 308 d->bdev_file = bdev_file; 309 d->len = bdev_nr_bytes(file_bdev(bdev_file)); 310 d->map = bl_map_simple; 311 312 printk(KERN_INFO "pNFS: using block device %s\n", 313 file_bdev(bdev_file)->bd_disk->disk_name); 314 return 0; 315 } 316 317 static bool 318 bl_validate_designator(struct pnfs_block_volume *v) 319 { 320 switch (v->scsi.designator_type) { 321 case PS_DESIGNATOR_EUI64: 322 if (v->scsi.code_set != PS_CODE_SET_BINARY) 323 return false; 324 325 if (v->scsi.designator_len != 8 && 326 v->scsi.designator_len != 10 && 327 v->scsi.designator_len != 16) 328 return false; 329 330 return true; 331 case PS_DESIGNATOR_NAA: 332 if (v->scsi.code_set != PS_CODE_SET_BINARY) 333 return false; 334 335 if (v->scsi.designator_len != 8 && 336 v->scsi.designator_len != 16) 337 return false; 338 339 return true; 340 case PS_DESIGNATOR_T10: 341 case PS_DESIGNATOR_NAME: 342 pr_err("pNFS: unsupported designator " 343 "(code set %d, type %d, len %d.\n", 344 v->scsi.code_set, 345 v->scsi.designator_type, 346 v->scsi.designator_len); 347 return false; 348 default: 349 pr_err("pNFS: invalid designator " 350 "(code set %d, type %d, len %d.\n", 351 v->scsi.code_set, 352 v->scsi.designator_type, 353 v->scsi.designator_len); 354 return false; 355 } 356 } 357 358 static struct file * 359 bl_open_path(struct pnfs_block_volume *v, const char *prefix) 360 { 361 struct file *bdev_file; 362 const char *devname; 363 364 devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/%s%*phN", 365 prefix, v->scsi.designator_len, v->scsi.designator); 366 if (!devname) 367 return ERR_PTR(-ENOMEM); 368 369 bdev_file = bdev_file_open_by_path(devname, BLK_OPEN_READ | BLK_OPEN_WRITE, 370 NULL, NULL); 371 if (IS_ERR(bdev_file)) { 372 dprintk("failed to open device %s (%ld)\n", 373 devname, PTR_ERR(bdev_file)); 374 } 375 376 kfree(devname); 377 return bdev_file; 378 } 379 380 static int 381 bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d, 382 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) 383 { 384 struct pnfs_block_volume *v = &volumes[idx]; 385 struct file *bdev_file; 386 const struct pr_ops *ops; 387 int error; 388 389 if (!bl_validate_designator(v)) 390 return -EINVAL; 391 392 /* 393 * Try to open the RH/Fedora specific dm-mpath udev path first, as the 394 * wwn- links will only point to the first discovered SCSI device there. 395 * On other distributions like Debian, the default SCSI by-id path will 396 * point to the dm-multipath device if one exists. 397 */ 398 bdev_file = bl_open_path(v, "dm-uuid-mpath-0x"); 399 if (IS_ERR(bdev_file)) 400 bdev_file = bl_open_path(v, "wwn-0x"); 401 if (IS_ERR(bdev_file)) { 402 pr_warn("pNFS: no device found for volume %*phN\n", 403 v->scsi.designator_len, v->scsi.designator); 404 return PTR_ERR(bdev_file); 405 } 406 d->bdev_file = bdev_file; 407 408 d->len = bdev_nr_bytes(file_bdev(d->bdev_file)); 409 d->map = bl_map_simple; 410 d->pr_key = v->scsi.pr_key; 411 412 if (d->len == 0) 413 return -ENODEV; 414 415 pr_info("pNFS: using block device %s (reservation key 0x%llx)\n", 416 file_bdev(d->bdev_file)->bd_disk->disk_name, d->pr_key); 417 418 ops = file_bdev(d->bdev_file)->bd_disk->fops->pr_ops; 419 if (!ops) { 420 pr_err("pNFS: block device %s does not support reservations.", 421 file_bdev(d->bdev_file)->bd_disk->disk_name); 422 error = -EINVAL; 423 goto out_blkdev_put; 424 } 425 426 return 0; 427 428 out_blkdev_put: 429 fput(d->bdev_file); 430 return error; 431 } 432 433 static int 434 bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d, 435 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) 436 { 437 struct pnfs_block_volume *v = &volumes[idx]; 438 int ret; 439 440 ret = bl_parse_deviceid(server, d, volumes, v->slice.volume, gfp_mask); 441 if (ret) 442 return ret; 443 444 d->disk_offset = v->slice.start; 445 d->len = v->slice.len; 446 return 0; 447 } 448 449 static int 450 bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d, 451 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) 452 { 453 struct pnfs_block_volume *v = &volumes[idx]; 454 u64 len = 0; 455 int ret, i; 456 457 d->children = kcalloc(v->concat.volumes_count, 458 sizeof(struct pnfs_block_dev), gfp_mask); 459 if (!d->children) 460 return -ENOMEM; 461 462 for (i = 0; i < v->concat.volumes_count; i++) { 463 ret = bl_parse_deviceid(server, &d->children[i], 464 volumes, v->concat.volumes[i], gfp_mask); 465 if (ret) 466 return ret; 467 468 d->nr_children++; 469 d->children[i].start += len; 470 len += d->children[i].len; 471 } 472 473 d->len = len; 474 d->map = bl_map_concat; 475 return 0; 476 } 477 478 static int 479 bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d, 480 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) 481 { 482 struct pnfs_block_volume *v = &volumes[idx]; 483 u64 len = 0; 484 int ret, i; 485 486 d->children = kcalloc(v->stripe.volumes_count, 487 sizeof(struct pnfs_block_dev), gfp_mask); 488 if (!d->children) 489 return -ENOMEM; 490 491 for (i = 0; i < v->stripe.volumes_count; i++) { 492 ret = bl_parse_deviceid(server, &d->children[i], 493 volumes, v->stripe.volumes[i], gfp_mask); 494 if (ret) 495 return ret; 496 497 d->nr_children++; 498 len += d->children[i].len; 499 } 500 501 d->len = len; 502 d->chunk_size = v->stripe.chunk_size; 503 d->map = bl_map_stripe; 504 return 0; 505 } 506 507 static int 508 bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d, 509 struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask) 510 { 511 d->type = volumes[idx].type; 512 513 switch (d->type) { 514 case PNFS_BLOCK_VOLUME_SIMPLE: 515 return bl_parse_simple(server, d, volumes, idx, gfp_mask); 516 case PNFS_BLOCK_VOLUME_SLICE: 517 return bl_parse_slice(server, d, volumes, idx, gfp_mask); 518 case PNFS_BLOCK_VOLUME_CONCAT: 519 return bl_parse_concat(server, d, volumes, idx, gfp_mask); 520 case PNFS_BLOCK_VOLUME_STRIPE: 521 return bl_parse_stripe(server, d, volumes, idx, gfp_mask); 522 case PNFS_BLOCK_VOLUME_SCSI: 523 return bl_parse_scsi(server, d, volumes, idx, gfp_mask); 524 default: 525 dprintk("unsupported volume type: %d\n", d->type); 526 return -EIO; 527 } 528 } 529 530 struct nfs4_deviceid_node * 531 bl_alloc_deviceid_node(struct nfs_server *server, struct pnfs_device *pdev, 532 gfp_t gfp_mask) 533 { 534 struct nfs4_deviceid_node *node = NULL; 535 struct pnfs_block_volume *volumes; 536 struct pnfs_block_dev *top; 537 struct xdr_stream xdr; 538 struct xdr_buf buf; 539 struct page *scratch; 540 int nr_volumes, ret, i; 541 __be32 *p; 542 543 scratch = alloc_page(gfp_mask); 544 if (!scratch) 545 goto out; 546 547 xdr_init_decode_pages(&xdr, &buf, pdev->pages, pdev->pglen); 548 xdr_set_scratch_page(&xdr, scratch); 549 550 p = xdr_inline_decode(&xdr, sizeof(__be32)); 551 if (!p) 552 goto out_free_scratch; 553 nr_volumes = be32_to_cpup(p++); 554 555 volumes = kcalloc(nr_volumes, sizeof(struct pnfs_block_volume), 556 gfp_mask); 557 if (!volumes) 558 goto out_free_scratch; 559 560 for (i = 0; i < nr_volumes; i++) { 561 ret = nfs4_block_decode_volume(&xdr, &volumes[i]); 562 if (ret < 0) 563 goto out_free_volumes; 564 } 565 566 top = kzalloc(sizeof(*top), gfp_mask); 567 if (!top) 568 goto out_free_volumes; 569 570 ret = bl_parse_deviceid(server, top, volumes, nr_volumes - 1, gfp_mask); 571 572 node = &top->node; 573 nfs4_init_deviceid_node(node, server, &pdev->dev_id); 574 if (ret) 575 nfs4_mark_deviceid_unavailable(node); 576 577 out_free_volumes: 578 kfree(volumes); 579 out_free_scratch: 580 __free_page(scratch); 581 out: 582 return node; 583 } 584