1 /* NFSv4.1 client for Windows 2 * Copyright � 2012 The Regents of the University of Michigan 3 * 4 * Olga Kornievskaia <aglo@umich.edu> 5 * Casey Bodley <cbodley@umich.edu> 6 * 7 * This library is free software; you can redistribute it and/or modify it 8 * under the terms of the GNU Lesser General Public License as published by 9 * the Free Software Foundation; either version 2.1 of the License, or (at 10 * your option) any later version. 11 * 12 * This library is distributed in the hope that it will be useful, but 13 * without any warranty; without even the implied warranty of merchantability 14 * or fitness for a particular purpose. See the GNU Lesser General Public 15 * License for more details. 16 * 17 * You should have received a copy of the GNU Lesser General Public License 18 * along with this library; if not, write to the Free Software Foundation, 19 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 20 */ 21 22 #include <stdio.h> 23 24 #include "nfs41_ops.h" 25 #include "nfs41_callback.h" 26 #include "util.h" 27 #include "daemon_debug.h" 28 29 30 #define FLLVL 2 /* dprintf level for file layout logging */ 31 32 33 /* pnfs_layout_list */ 34 struct pnfs_layout_list { 35 struct list_entry head; 36 CRITICAL_SECTION lock; 37 }; 38 39 #define state_entry(pos) list_container(pos, pnfs_layout_state, entry) 40 #define layout_entry(pos) list_container(pos, pnfs_layout, entry) 41 #define file_layout_entry(pos) list_container(pos, pnfs_file_layout, layout.entry) 42 43 static enum pnfs_status layout_state_create( 44 IN const nfs41_fh *meta_fh, 45 OUT pnfs_layout_state **layout_out) 46 { 47 pnfs_layout_state *layout; 48 enum pnfs_status status = PNFS_SUCCESS; 49 50 layout = calloc(1, sizeof(pnfs_layout_state)); 51 if (layout == NULL) { 52 status = PNFSERR_RESOURCES; 53 goto out; 54 } 55 56 fh_copy(&layout->meta_fh, meta_fh); 57 list_init(&layout->layouts); 58 list_init(&layout->recalls); 59 InitializeSRWLock(&layout->lock); 60 InitializeConditionVariable(&layout->cond); 61 62 *layout_out = layout; 63 out: 64 return status; 65 } 66 67 static void file_layout_free( 68 IN pnfs_file_layout *layout) 69 { 70 if (layout->device) pnfs_file_device_put(layout->device); 71 free(layout->filehandles.arr); 72 free(layout); 73 } 74 75 static void layout_state_free_layouts( 76 IN pnfs_layout_state *state) 77 { 78 struct list_entry *entry, *tmp; 79 list_for_each_tmp(entry, tmp, &state->layouts) 80 file_layout_free(file_layout_entry(entry)); 81 list_init(&state->layouts); 82 } 83 84 static void layout_state_free_recalls( 85 IN pnfs_layout_state *state) 86 { 87 struct list_entry *entry, *tmp; 88 list_for_each_tmp(entry, tmp, &state->recalls) 89 free(layout_entry(entry)); 90 list_init(&state->recalls); 91 } 92 93 static void layout_state_free( 94 IN pnfs_layout_state *state) 95 { 96 layout_state_free_layouts(state); 97 layout_state_free_recalls(state); 98 free(state); 99 } 100 101 static int layout_entry_compare( 102 IN const struct list_entry *entry, 103 IN const void *value) 104 { 105 const pnfs_layout_state *layout = state_entry(entry); 106 const nfs41_fh *meta_fh = (const nfs41_fh*)value; 107 const nfs41_fh *layout_fh = (const nfs41_fh*)&layout->meta_fh; 108 const uint32_t diff = layout_fh->len - meta_fh->len; 109 return diff ? diff : memcmp(layout_fh->fh, meta_fh->fh, meta_fh->len); 110 } 111 112 static enum pnfs_status layout_entry_find( 113 IN struct pnfs_layout_list *layouts, 114 IN const nfs41_fh *meta_fh, 115 OUT struct list_entry **entry_out) 116 { 117 *entry_out = list_search(&layouts->head, meta_fh, layout_entry_compare); 118 return *entry_out ? PNFS_SUCCESS : PNFSERR_NO_LAYOUT; 119 } 120 121 enum pnfs_status pnfs_layout_list_create( 122 OUT struct pnfs_layout_list **layouts_out) 123 { 124 struct pnfs_layout_list *layouts; 125 enum pnfs_status status = PNFS_SUCCESS; 126 127 layouts = calloc(1, sizeof(struct pnfs_layout_list)); 128 if (layouts == NULL) { 129 status = PNFSERR_RESOURCES; 130 goto out; 131 } 132 list_init(&layouts->head); 133 InitializeCriticalSection(&layouts->lock); 134 *layouts_out = layouts; 135 out: 136 return status; 137 } 138 139 void pnfs_layout_list_free( 140 IN struct pnfs_layout_list *layouts) 141 { 142 struct list_entry *entry, *tmp; 143 144 EnterCriticalSection(&layouts->lock); 145 146 list_for_each_tmp(entry, tmp, &layouts->head) 147 layout_state_free(state_entry(entry)); 148 149 LeaveCriticalSection(&layouts->lock); 150 DeleteCriticalSection(&layouts->lock); 151 free(layouts); 152 } 153 154 static enum pnfs_status layout_state_find_or_create( 155 IN struct pnfs_layout_list *layouts, 156 IN const nfs41_fh *meta_fh, 157 OUT pnfs_layout_state **layout_out) 158 { 159 struct list_entry *entry; 160 enum pnfs_status status; 161 162 dprintf(FLLVL, "--> layout_state_find_or_create()\n"); 163 164 EnterCriticalSection(&layouts->lock); 165 166 /* search for an existing layout */ 167 status = layout_entry_find(layouts, meta_fh, &entry); 168 if (status) { 169 /* create a new layout */ 170 pnfs_layout_state *layout; 171 status = layout_state_create(meta_fh, &layout); 172 if (status == PNFS_SUCCESS) { 173 /* add it to the list */ 174 list_add_head(&layouts->head, &layout->entry); 175 *layout_out = layout; 176 177 dprintf(FLLVL, "<-- layout_state_find_or_create() " 178 "returning new layout %p\n", layout); 179 } else { 180 dprintf(FLLVL, "<-- layout_state_find_or_create() " 181 "returning %s\n", pnfs_error_string(status)); 182 } 183 } else { 184 *layout_out = state_entry(entry); 185 186 dprintf(FLLVL, "<-- layout_state_find_or_create() " 187 "returning existing layout %p\n", *layout_out); 188 } 189 190 LeaveCriticalSection(&layouts->lock); 191 return status; 192 } 193 194 static enum pnfs_status layout_state_find_and_delete( 195 IN struct pnfs_layout_list *layouts, 196 IN const nfs41_fh *meta_fh) 197 { 198 struct list_entry *entry; 199 enum pnfs_status status; 200 201 dprintf(FLLVL, "--> layout_state_find_and_delete()\n"); 202 203 EnterCriticalSection(&layouts->lock); 204 205 status = layout_entry_find(layouts, meta_fh, &entry); 206 if (status == PNFS_SUCCESS) { 207 list_remove(entry); 208 layout_state_free(state_entry(entry)); 209 } 210 211 LeaveCriticalSection(&layouts->lock); 212 213 dprintf(FLLVL, "<-- layout_state_find_and_delete() " 214 "returning %s\n", pnfs_error_string(status)); 215 return status; 216 } 217 218 219 /* pnfs_file_layout */ 220 static uint64_t range_max( 221 IN const pnfs_layout *layout) 222 { 223 uint64_t result = layout->offset + layout->length; 224 return result < layout->offset ? NFS4_UINT64_MAX : result; 225 } 226 227 static bool_t layout_sanity_check( 228 IN pnfs_file_layout *layout) 229 { 230 /* prevent div/0 */ 231 if (layout->layout.length == 0 || 232 layout->layout.iomode < PNFS_IOMODE_READ || 233 layout->layout.iomode > PNFS_IOMODE_RW || 234 layout_unit_size(layout) == 0) 235 return FALSE; 236 237 /* put a cap on layout.length to prevent overflow */ 238 layout->layout.length = range_max(&layout->layout) - layout->layout.offset; 239 return TRUE; 240 } 241 242 static int layout_filehandles_cmp( 243 IN const pnfs_file_layout_handles *lhs, 244 IN const pnfs_file_layout_handles *rhs) 245 { 246 const uint32_t diff = rhs->count - lhs->count; 247 return diff ? diff : memcmp(rhs->arr, lhs->arr, 248 rhs->count * sizeof(nfs41_path_fh)); 249 } 250 251 static bool_t layout_merge_segments( 252 IN pnfs_file_layout *to, 253 IN pnfs_file_layout *from) 254 { 255 const uint64_t to_max = range_max(&to->layout); 256 const uint64_t from_max = range_max(&from->layout); 257 258 /* cannot merge a segment with itself */ 259 if (to == from) 260 return FALSE; 261 262 /* the ranges must meet or overlap */ 263 if (to_max < from->layout.offset || from_max < to->layout.offset) 264 return FALSE; 265 266 /* the following fields must match: */ 267 if (to->layout.iomode != from->layout.iomode || 268 to->layout.type != from->layout.type || 269 layout_filehandles_cmp(&to->filehandles, &from->filehandles) != 0 || 270 memcmp(to->deviceid, from->deviceid, PNFS_DEVICEID_SIZE) != 0 || 271 to->pattern_offset != from->pattern_offset || 272 to->first_index != from->first_index || 273 to->util != from->util) 274 return FALSE; 275 276 dprintf(FLLVL, "merging layout range {%llu, %llu} with {%llu, %llu}\n", 277 to->layout.offset, to->layout.length, 278 from->layout.offset, from->layout.length); 279 280 /* calculate the union of the two ranges */ 281 to->layout.offset = min(to->layout.offset, from->layout.offset); 282 to->layout.length = max(to_max, from_max) - to->layout.offset; 283 return TRUE; 284 } 285 286 static enum pnfs_status layout_state_merge( 287 IN pnfs_layout_state *state, 288 IN pnfs_file_layout *from) 289 { 290 struct list_entry *entry, *tmp; 291 pnfs_file_layout *to; 292 enum pnfs_status status = PNFSERR_NO_LAYOUT; 293 294 /* attempt to merge the new segment with each existing segment */ 295 list_for_each_tmp(entry, tmp, &state->layouts) { 296 to = file_layout_entry(entry); 297 if (!layout_merge_segments(to, from)) 298 continue; 299 300 /* on success, remove/free the new segment */ 301 list_remove(&from->layout.entry); 302 file_layout_free(from); 303 status = PNFS_SUCCESS; 304 305 /* because the existing segment 'to' has grown, we may 306 * be able to merge it with later segments */ 307 from = to; 308 309 /* but if there could be io threads referencing this segment, 310 * we can't free it until io is finished */ 311 if (state->io_count) 312 break; 313 } 314 return status; 315 } 316 317 static void layout_ordered_insert( 318 IN pnfs_layout_state *state, 319 IN pnfs_layout *layout) 320 { 321 struct list_entry *entry; 322 list_for_each(entry, &state->layouts) { 323 pnfs_layout *existing = layout_entry(entry); 324 325 /* maintain an order of increasing offset */ 326 if (existing->offset < layout->offset) 327 continue; 328 329 /* when offsets are equal, prefer a longer segment first */ 330 if (existing->offset == layout->offset && 331 existing->length > layout->length) 332 continue; 333 334 list_add(&layout->entry, existing->entry.prev, &existing->entry); 335 return; 336 } 337 338 list_add_tail(&state->layouts, &layout->entry); 339 } 340 341 static enum pnfs_status layout_update_range( 342 IN OUT pnfs_layout_state *state, 343 IN const struct list_entry *layouts) 344 { 345 struct list_entry *entry, *tmp; 346 pnfs_file_layout *layout; 347 enum pnfs_status status = PNFSERR_NO_LAYOUT; 348 349 list_for_each_tmp(entry, tmp, layouts) { 350 layout = file_layout_entry(entry); 351 352 /* don't know what to do with non-file layouts */ 353 if (layout->layout.type != PNFS_LAYOUTTYPE_FILE) 354 continue; 355 356 if (!layout_sanity_check(layout)) { 357 file_layout_free(layout); 358 continue; 359 } 360 361 /* attempt to merge the range with existing segments */ 362 status = layout_state_merge(state, layout); 363 if (status) { 364 dprintf(FLLVL, "saving new layout:\n"); 365 dprint_layout(FLLVL, layout); 366 367 layout_ordered_insert(state, &layout->layout); 368 status = PNFS_SUCCESS; 369 } 370 } 371 return status; 372 } 373 374 static enum pnfs_status layout_update_stateid( 375 IN OUT pnfs_layout_state *state, 376 IN const stateid4 *stateid) 377 { 378 enum pnfs_status status = PNFS_SUCCESS; 379 380 if (state->stateid.seqid == 0) { 381 /* save a new layout stateid */ 382 memcpy(&state->stateid, stateid, sizeof(stateid4)); 383 } else if (memcmp(&state->stateid.other, stateid->other, 384 NFS4_STATEID_OTHER) == 0) { 385 /* update an existing layout stateid */ 386 state->stateid.seqid = stateid->seqid; 387 } else { 388 status = PNFSERR_NO_LAYOUT; 389 } 390 return status; 391 } 392 393 static enum pnfs_status layout_update( 394 IN OUT pnfs_layout_state *state, 395 IN const pnfs_layoutget_res_ok *layoutget_res) 396 { 397 enum pnfs_status status; 398 399 /* update the layout ranges held by the client */ 400 status = layout_update_range(state, &layoutget_res->layouts); 401 if (status) { 402 eprintf("LAYOUTGET didn't return any file layouts\n"); 403 goto out; 404 } 405 /* update the layout stateid */ 406 status = layout_update_stateid(state, &layoutget_res->stateid); 407 if (status) { 408 eprintf("LAYOUTGET returned a new stateid when we already had one\n"); 409 goto out; 410 } 411 /* if a previous LAYOUTGET set return_on_close, don't overwrite it */ 412 if (!state->return_on_close) 413 state->return_on_close = layoutget_res->return_on_close; 414 out: 415 return status; 416 } 417 418 static enum pnfs_status file_layout_fetch( 419 IN OUT pnfs_layout_state *state, 420 IN nfs41_session *session, 421 IN nfs41_path_fh *meta_file, 422 IN stateid_arg *stateid, 423 IN enum pnfs_iomode iomode, 424 IN uint64_t offset, 425 IN uint64_t minlength, 426 IN uint64_t length) 427 { 428 pnfs_layoutget_res_ok layoutget_res = { 0 }; 429 enum pnfs_status pnfsstat = PNFS_SUCCESS; 430 enum nfsstat4 nfsstat; 431 432 dprintf(FLLVL, "--> file_layout_fetch(%s, seqid=%u)\n", 433 pnfs_iomode_string(iomode), state->stateid.seqid); 434 435 list_init(&layoutget_res.layouts); 436 437 /* drop the lock during the rpc call */ 438 ReleaseSRWLockExclusive(&state->lock); 439 nfsstat = pnfs_rpc_layoutget(session, meta_file, stateid, 440 iomode, offset, minlength, length, &layoutget_res); 441 AcquireSRWLockExclusive(&state->lock); 442 443 if (nfsstat) { 444 dprintf(FLLVL, "pnfs_rpc_layoutget() failed with %s\n", 445 nfs_error_string(nfsstat)); 446 pnfsstat = PNFSERR_NOT_SUPPORTED; 447 } 448 449 switch (nfsstat) { 450 case NFS4_OK: 451 /* use the LAYOUTGET results to update our view of the layout */ 452 pnfsstat = layout_update(state, &layoutget_res); 453 break; 454 455 case NFS4ERR_BADIOMODE: 456 /* don't try RW again */ 457 if (iomode == PNFS_IOMODE_RW) 458 state->status |= PNFS_LAYOUT_NOT_RW; 459 break; 460 461 case NFS4ERR_LAYOUTUNAVAILABLE: 462 case NFS4ERR_UNKNOWN_LAYOUTTYPE: 463 case NFS4ERR_BADLAYOUT: 464 /* don't try again at all */ 465 state->status |= PNFS_LAYOUT_UNAVAILABLE; 466 break; 467 } 468 469 dprintf(FLLVL, "<-- file_layout_fetch() returning %s\n", 470 pnfs_error_string(pnfsstat)); 471 return pnfsstat; 472 } 473 474 /* returns PNFS_SUCCESS if the client holds valid layouts that cover 475 * the entire range requested. otherwise, returns PNFS_PENDING and 476 * sets 'offset_missing' to the lowest offset that is not covered */ 477 static enum pnfs_status layout_coverage_status( 478 IN pnfs_layout_state *state, 479 IN enum pnfs_iomode iomode, 480 IN uint64_t offset, 481 IN uint64_t length, 482 OUT uint64_t *offset_missing) 483 { 484 uint64_t position = offset; 485 struct list_entry *entry; 486 487 list_for_each(entry, &state->layouts) { 488 /* if the current position intersects with a compatible 489 * layout, move the position to the end of that layout */ 490 pnfs_layout *layout = layout_entry(entry); 491 if (layout->iomode >= iomode && 492 layout->offset <= position && 493 position < layout->offset + layout->length) 494 position = layout->offset + layout->length; 495 } 496 497 if (position >= offset + length) 498 return PNFS_SUCCESS; 499 500 *offset_missing = position; 501 return PNFS_PENDING; 502 } 503 504 static enum pnfs_status layout_fetch( 505 IN pnfs_layout_state *state, 506 IN nfs41_session *session, 507 IN nfs41_path_fh *meta_file, 508 IN stateid_arg *stateid, 509 IN enum pnfs_iomode iomode, 510 IN uint64_t offset, 511 IN uint64_t length) 512 { 513 stateid_arg layout_stateid = { 0 }; 514 enum pnfs_status status = PNFS_PENDING; 515 516 /* check for previous errors from LAYOUTGET */ 517 if ((state->status & PNFS_LAYOUT_UNAVAILABLE) || 518 ((state->status & PNFS_LAYOUT_NOT_RW) && iomode == PNFS_IOMODE_RW)) { 519 status = PNFSERR_NO_LAYOUT; 520 goto out; 521 } 522 523 /* wait for any pending LAYOUTGETs/LAYOUTRETURNs */ 524 while (state->pending) 525 SleepConditionVariableSRW(&state->cond, &state->lock, INFINITE, 0); 526 state->pending = TRUE; 527 528 /* if there's an existing layout stateid, use it */ 529 if (state->stateid.seqid) { 530 memcpy(&layout_stateid.stateid, &state->stateid, sizeof(stateid4)); 531 layout_stateid.type = STATEID_LAYOUT; 532 stateid = &layout_stateid; 533 } 534 535 if ((state->status & PNFS_LAYOUT_NOT_RW) == 0) { 536 /* try to get a RW layout first */ 537 status = file_layout_fetch(state, session, meta_file, 538 stateid, PNFS_IOMODE_RW, offset, length, NFS4_UINT64_MAX); 539 } 540 541 if (status && iomode == PNFS_IOMODE_READ) { 542 /* fall back on READ if necessary */ 543 status = file_layout_fetch(state, session, meta_file, 544 stateid, iomode, offset, length, NFS4_UINT64_MAX); 545 } 546 547 state->pending = FALSE; 548 WakeConditionVariable(&state->cond); 549 out: 550 return status; 551 } 552 553 static enum pnfs_status device_status( 554 IN pnfs_layout_state *state, 555 IN uint64_t offset, 556 IN uint64_t length, 557 OUT unsigned char *deviceid) 558 { 559 struct list_entry *entry; 560 enum pnfs_status status = PNFS_SUCCESS; 561 562 list_for_each(entry, &state->layouts) { 563 pnfs_file_layout *layout = file_layout_entry(entry); 564 565 if (layout->device == NULL) { 566 /* copy missing deviceid */ 567 memcpy(deviceid, layout->deviceid, PNFS_DEVICEID_SIZE); 568 status = PNFS_PENDING; 569 break; 570 } 571 } 572 return status; 573 } 574 575 static void device_assign( 576 IN pnfs_layout_state *state, 577 IN const unsigned char *deviceid, 578 IN pnfs_file_device *device) 579 { 580 struct list_entry *entry; 581 list_for_each(entry, &state->layouts) { 582 pnfs_file_layout *layout = file_layout_entry(entry); 583 584 /* assign the device to any matching layouts */ 585 if (layout->device == NULL && 586 memcmp(layout->deviceid, deviceid, PNFS_DEVICEID_SIZE) == 0) { 587 layout->device = device; 588 589 /* XXX: only assign the device to a single segment, because 590 * pnfs_file_device_get() only gives us a single reference */ 591 break; 592 } 593 } 594 } 595 596 static enum pnfs_status device_fetch( 597 IN pnfs_layout_state *state, 598 IN nfs41_session *session, 599 IN unsigned char *deviceid) 600 { 601 pnfs_file_device *device; 602 enum pnfs_status status; 603 604 /* drop the layoutstate lock for the rpc call */ 605 ReleaseSRWLockExclusive(&state->lock); 606 status = pnfs_file_device_get(session, 607 session->client->devices, deviceid, &device); 608 AcquireSRWLockExclusive(&state->lock); 609 610 if (status == PNFS_SUCCESS) 611 device_assign(state, deviceid, device); 612 return status; 613 } 614 615 616 /* nfs41_open_state */ 617 static enum pnfs_status client_supports_pnfs( 618 IN nfs41_client *client) 619 { 620 enum pnfs_status status; 621 AcquireSRWLockShared(&client->exid_lock); 622 status = client->roles & EXCHGID4_FLAG_USE_PNFS_MDS 623 ? PNFS_SUCCESS : PNFSERR_NOT_SUPPORTED; 624 ReleaseSRWLockShared(&client->exid_lock); 625 return status; 626 } 627 628 static enum pnfs_status fs_supports_layout( 629 IN const nfs41_superblock *superblock, 630 IN enum pnfs_layout_type type) 631 { 632 const uint32_t flag = 1 << (type - 1); 633 return (superblock->layout_types & flag) == 0 634 ? PNFSERR_NOT_SUPPORTED : PNFS_SUCCESS; 635 } 636 637 static enum pnfs_status open_state_layout_cached( 638 IN nfs41_open_state *state, 639 OUT pnfs_layout_state **layout_out) 640 { 641 enum pnfs_status status = PNFSERR_NO_LAYOUT; 642 643 if (state->layout) { 644 status = PNFS_SUCCESS; 645 *layout_out = state->layout; 646 647 dprintf(FLLVL, "pnfs_open_state_layout() found " 648 "cached layout %p\n", *layout_out); 649 } 650 return status; 651 } 652 653 enum pnfs_status pnfs_layout_state_open( 654 IN nfs41_open_state *state, 655 OUT pnfs_layout_state **layout_out) 656 { 657 struct pnfs_layout_list *layouts = state->session->client->layouts; 658 nfs41_session *session = state->session; 659 pnfs_layout_state *layout; 660 enum pnfs_status status; 661 662 dprintf(FLLVL, "--> pnfs_layout_state_open()\n"); 663 664 status = client_supports_pnfs(session->client); 665 if (status) 666 goto out; 667 status = fs_supports_layout(state->file.fh.superblock, PNFS_LAYOUTTYPE_FILE); 668 if (status) 669 goto out; 670 671 /* under shared lock, check open state for cached layouts */ 672 AcquireSRWLockShared(&state->lock); 673 status = open_state_layout_cached(state, &layout); 674 ReleaseSRWLockShared(&state->lock); 675 676 if (status) { 677 /* under exclusive lock, find or create a layout for this file */ 678 AcquireSRWLockExclusive(&state->lock); 679 680 status = open_state_layout_cached(state, &layout); 681 if (status) { 682 status = layout_state_find_or_create(layouts, &state->file.fh, &layout); 683 if (status == PNFS_SUCCESS) { 684 LONG open_count = InterlockedIncrement(&layout->open_count); 685 state->layout = layout; 686 687 dprintf(FLLVL, "pnfs_layout_state_open() caching layout %p " 688 "(%u opens)\n", state->layout, open_count); 689 } 690 } 691 692 ReleaseSRWLockExclusive(&state->lock); 693 694 if (status) 695 goto out; 696 } 697 698 *layout_out = layout; 699 out: 700 dprintf(FLLVL, "<-- pnfs_layout_state_open() returning %s\n", 701 pnfs_error_string(status)); 702 return status; 703 } 704 705 /* expects caller to hold an exclusive lock on pnfs_layout_state */ 706 enum pnfs_status pnfs_layout_state_prepare( 707 IN pnfs_layout_state *state, 708 IN nfs41_session *session, 709 IN nfs41_path_fh *meta_file, 710 IN stateid_arg *stateid, 711 IN enum pnfs_iomode iomode, 712 IN uint64_t offset, 713 IN uint64_t length) 714 { 715 unsigned char deviceid[PNFS_DEVICEID_SIZE]; 716 struct list_entry *entry; 717 uint64_t missing; 718 enum pnfs_status status; 719 720 /* fail if the range intersects any pending recalls */ 721 list_for_each(entry, &state->recalls) { 722 const pnfs_layout *recall = layout_entry(entry); 723 if (offset <= recall->offset + recall->length 724 && recall->offset <= offset + length) { 725 status = PNFSERR_LAYOUT_RECALLED; 726 goto out; 727 } 728 } 729 730 /* if part of the given range is not covered by a layout, 731 * attempt to fetch it with LAYOUTGET */ 732 status = layout_coverage_status(state, iomode, offset, length, &missing); 733 if (status == PNFS_PENDING) { 734 status = layout_fetch(state, session, meta_file, stateid, 735 iomode, missing, offset + length - missing); 736 737 /* return pending because layout_fetch() dropped the lock */ 738 if (status == PNFS_SUCCESS) 739 status = PNFS_PENDING; 740 goto out; 741 } 742 743 /* if any layouts in the range are missing device info, 744 * fetch them with GETDEVICEINFO */ 745 status = device_status(state, offset, length, deviceid); 746 if (status == PNFS_PENDING) { 747 status = device_fetch(state, session, deviceid); 748 749 /* return pending because device_fetch() dropped the lock */ 750 if (status == PNFS_SUCCESS) 751 status = PNFS_PENDING; 752 goto out; 753 } 754 out: 755 return status; 756 } 757 758 static enum pnfs_status layout_return_status( 759 IN const pnfs_layout_state *state) 760 { 761 /* return the layout if we have a stateid */ 762 return state->stateid.seqid ? PNFS_SUCCESS : PNFS_PENDING; 763 } 764 765 static enum pnfs_status file_layout_return( 766 IN nfs41_session *session, 767 IN nfs41_path_fh *file, 768 IN pnfs_layout_state *state) 769 { 770 enum pnfs_status status; 771 enum nfsstat4 nfsstat; 772 773 dprintf(FLLVL, "--> file_layout_return()\n"); 774 775 /* under shared lock, determine whether we need to return the layout */ 776 AcquireSRWLockShared(&state->lock); 777 status = layout_return_status(state); 778 ReleaseSRWLockShared(&state->lock); 779 780 if (status != PNFS_PENDING) 781 goto out; 782 783 /* under exclusive lock, return the layout and reset status flags */ 784 AcquireSRWLockExclusive(&state->lock); 785 786 /* wait for any pending LAYOUTGETs/LAYOUTRETURNs */ 787 while (state->pending) 788 SleepConditionVariableSRW(&state->cond, &state->lock, INFINITE, 0); 789 state->pending = TRUE; 790 791 status = layout_return_status(state); 792 if (status == PNFS_PENDING) { 793 pnfs_layoutreturn_res layoutreturn_res = { 0 }; 794 stateid4 stateid; 795 memcpy(&stateid, &state->stateid, sizeof(stateid)); 796 797 /* drop the lock during the rpc call */ 798 ReleaseSRWLockExclusive(&state->lock); 799 nfsstat = pnfs_rpc_layoutreturn(session, file, PNFS_LAYOUTTYPE_FILE, 800 PNFS_IOMODE_ANY, 0, NFS4_UINT64_MAX, &stateid, &layoutreturn_res); 801 AcquireSRWLockExclusive(&state->lock); 802 803 if (nfsstat) { 804 eprintf("pnfs_rpc_layoutreturn() failed with %s\n", 805 nfs_error_string(nfsstat)); 806 status = PNFSERR_NO_LAYOUT; 807 } else { 808 status = PNFS_SUCCESS; 809 810 /* update the layout range held by the client */ 811 layout_state_free_layouts(state); 812 813 /* 12.5.3. Layout Stateid: Once a client has no more 814 * layouts on a file, the layout stateid is no longer 815 * valid and MUST NOT be used. */ 816 ZeroMemory(&state->stateid, sizeof(stateid4)); 817 } 818 } 819 820 state->pending = FALSE; 821 WakeConditionVariable(&state->cond); 822 ReleaseSRWLockExclusive(&state->lock); 823 824 out: 825 dprintf(FLLVL, "<-- file_layout_return() returning %s\n", 826 pnfs_error_string(status)); 827 return status; 828 } 829 830 void pnfs_layout_state_close( 831 IN nfs41_session *session, 832 IN nfs41_open_state *state, 833 IN bool_t remove) 834 { 835 pnfs_layout_state *layout; 836 bool_t return_layout; 837 enum pnfs_status status; 838 839 AcquireSRWLockExclusive(&state->lock); 840 layout = state->layout; 841 state->layout = NULL; 842 ReleaseSRWLockExclusive(&state->lock); 843 844 if (layout) { 845 LONG open_count = InterlockedDecrement(&layout->open_count); 846 847 AcquireSRWLockShared(&layout->lock); 848 /* only return on close if it's the last close */ 849 return_layout = layout->return_on_close && (open_count <= 0); 850 ReleaseSRWLockShared(&layout->lock); 851 852 if (return_layout) { 853 status = file_layout_return(session, &state->file, layout); 854 if (status) 855 eprintf("file_layout_return() failed with %s\n", 856 pnfs_error_string(status)); 857 } 858 } 859 860 if (remove && session->client->layouts) { 861 /* free the layout when the file is removed */ 862 layout_state_find_and_delete(session->client->layouts, &state->file.fh); 863 } 864 } 865 866 867 /* pnfs_layout_recall */ 868 struct layout_recall { 869 pnfs_layout layout; 870 bool_t changed; 871 }; 872 #define recall_entry(pos) list_container(pos, struct layout_recall, layout.entry) 873 874 static bool_t layout_recall_compatible( 875 IN const pnfs_layout *layout, 876 IN const pnfs_layout *recall) 877 { 878 return layout->type == recall->type 879 && layout->offset <= (recall->offset + recall->length) 880 && recall->offset <= (layout->offset + layout->length) 881 && (recall->iomode == PNFS_IOMODE_ANY || 882 layout->iomode == recall->iomode); 883 } 884 885 static pnfs_file_layout* layout_allocate_copy( 886 IN const pnfs_file_layout *existing) 887 { 888 /* allocate a segment to cover the end of the range */ 889 pnfs_file_layout *layout = calloc(1, sizeof(pnfs_file_layout)); 890 if (layout == NULL) 891 goto out; 892 893 memcpy(layout, existing, sizeof(pnfs_file_layout)); 894 895 /* XXX: don't use the device from existing layout; 896 * we need to get a reference for ourselves */ 897 layout->device = NULL; 898 899 /* allocate a copy of the filehandle array */ 900 layout->filehandles.arr = calloc(layout->filehandles.count, 901 sizeof(nfs41_path_fh)); 902 if (layout->filehandles.arr == NULL) 903 goto out_free; 904 905 memcpy(layout->filehandles.arr, existing->filehandles.arr, 906 layout->filehandles.count * sizeof(nfs41_path_fh)); 907 out: 908 return layout; 909 910 out_free: 911 file_layout_free(layout); 912 layout = NULL; 913 goto out; 914 } 915 916 static void layout_recall_range( 917 IN pnfs_layout_state *state, 918 IN const pnfs_layout *recall) 919 { 920 struct list_entry *entry, *tmp; 921 list_for_each_tmp(entry, tmp, &state->layouts) { 922 pnfs_file_layout *layout = file_layout_entry(entry); 923 const uint64_t layout_end = layout->layout.offset + layout->layout.length; 924 925 if (!layout_recall_compatible(&layout->layout, recall)) 926 continue; 927 928 if (recall->offset > layout->layout.offset) { 929 /* segment starts before recall; shrink length */ 930 layout->layout.length = recall->offset - layout->layout.offset; 931 932 if (layout_end > recall->offset + recall->length) { 933 /* middle chunk of the segment is recalled; 934 * allocate a new segment to cover the end */ 935 pnfs_file_layout *remainder = layout_allocate_copy(layout); 936 if (remainder == NULL) { 937 /* silently ignore allocation errors here. behave 938 * as if we 'forgot' this last segment */ 939 } else { 940 layout->layout.offset = recall->offset + recall->length; 941 layout->layout.length = layout_end - layout->layout.offset; 942 layout_ordered_insert(state, &remainder->layout); 943 } 944 } 945 } else { 946 /* segment starts after recall */ 947 if (layout_end <= recall->offset + recall->length) { 948 /* entire segment is recalled */ 949 list_remove(&layout->layout.entry); 950 file_layout_free(layout); 951 } else { 952 /* beginning of segment is recalled; shrink offset/length */ 953 layout->layout.offset = recall->offset + recall->length; 954 layout->layout.length = layout_end - layout->layout.offset; 955 } 956 } 957 } 958 } 959 960 static void layout_state_deferred_recalls( 961 IN pnfs_layout_state *state) 962 { 963 struct list_entry *entry, *tmp; 964 list_for_each_tmp(entry, tmp, &state->recalls) { 965 /* process each deferred layout recall */ 966 pnfs_layout *recall = layout_entry(entry); 967 layout_recall_range(state, recall); 968 969 /* remove/free the recall entry */ 970 list_remove(&recall->entry); 971 free(recall); 972 } 973 } 974 975 static void layout_recall_entry_init( 976 OUT struct layout_recall *lrc, 977 IN const struct cb_layoutrecall_args *recall) 978 { 979 list_init(&lrc->layout.entry); 980 if (recall->recall.type == PNFS_RETURN_FILE) { 981 lrc->layout.offset = recall->recall.args.file.offset; 982 lrc->layout.length = recall->recall.args.file.length; 983 } else { 984 lrc->layout.offset = 0; 985 lrc->layout.length = NFS4_UINT64_MAX; 986 } 987 lrc->layout.iomode = recall->iomode; 988 lrc->layout.type = PNFS_LAYOUTTYPE_FILE; 989 lrc->changed = recall->changed; 990 } 991 992 static enum pnfs_status layout_recall_merge( 993 IN struct list_entry *list, 994 IN pnfs_layout *from) 995 { 996 struct list_entry *entry, *tmp; 997 enum pnfs_status status = PNFSERR_NO_LAYOUT; 998 999 /* attempt to merge the new recall with each existing recall */ 1000 list_for_each_tmp(entry, tmp, list) { 1001 pnfs_layout *to = layout_entry(entry); 1002 const uint64_t to_max = to->offset + to->length; 1003 const uint64_t from_max = from->offset + from->length; 1004 1005 /* the ranges must meet or overlap */ 1006 if (to_max < from->offset || from_max < to->offset) 1007 continue; 1008 1009 /* the following fields must match: */ 1010 if (to->iomode != from->iomode || to->type != from->type) 1011 continue; 1012 1013 dprintf(FLLVL, "merging recalled range {%llu, %llu} with {%llu, %llu}\n", 1014 to->offset, to->length, from->offset, from->length); 1015 1016 /* calculate the union of the two ranges */ 1017 to->offset = min(to->offset, from->offset); 1018 to->length = max(to_max, from_max) - to->offset; 1019 1020 /* on success, remove/free the new segment */ 1021 list_remove(&from->entry); 1022 free(from); 1023 status = PNFS_SUCCESS; 1024 1025 /* because the existing segment 'to' has grown, we may 1026 * be able to merge it with later segments */ 1027 from = to; 1028 } 1029 return status; 1030 } 1031 1032 static enum pnfs_status file_layout_recall( 1033 IN pnfs_layout_state *state, 1034 IN const struct cb_layoutrecall_args *recall) 1035 { 1036 const stateid4 *stateid = &recall->recall.args.file.stateid; 1037 enum pnfs_status status = PNFS_SUCCESS; 1038 1039 /* under an exclusive lock, flag the layout as recalled */ 1040 AcquireSRWLockExclusive(&state->lock); 1041 1042 if (state->stateid.seqid == 0) { 1043 /* return NOMATCHINGLAYOUT if it wasn't actually granted */ 1044 status = PNFSERR_NO_LAYOUT; 1045 goto out; 1046 } 1047 1048 if (recall->recall.type == PNFS_RETURN_FILE) { 1049 /* detect races between CB_LAYOUTRECALL and LAYOUTGET/LAYOUTRETURN */ 1050 if (stateid->seqid > state->stateid.seqid + 1) { 1051 /* the server has processed an outstanding LAYOUTGET or 1052 * LAYOUTRETURN; we must return ERR_DELAY until we get the 1053 * response and update our view of the layout */ 1054 status = PNFS_PENDING; 1055 goto out; 1056 } 1057 1058 /* save the updated seqid */ 1059 state->stateid.seqid = stateid->seqid; 1060 } 1061 1062 if (state->io_count) { 1063 /* save an entry for this recall, and process it once io finishes */ 1064 struct layout_recall *lrc = calloc(1, sizeof(struct layout_recall)); 1065 if (lrc == NULL) { 1066 /* on failure to allocate, we'll have to respond 1067 * to the CB_LAYOUTRECALL with NFS4ERR_DELAY */ 1068 status = PNFS_PENDING; 1069 goto out; 1070 } 1071 layout_recall_entry_init(lrc, recall); 1072 if (layout_recall_merge(&state->recalls, &lrc->layout) != PNFS_SUCCESS) 1073 list_add_tail(&state->recalls, &lrc->layout.entry); 1074 } else { 1075 /* if there is no pending io, process the recall immediately */ 1076 struct layout_recall lrc = { 0 }; 1077 layout_recall_entry_init(&lrc, recall); 1078 layout_recall_range(state, &lrc.layout); 1079 } 1080 out: 1081 ReleaseSRWLockExclusive(&state->lock); 1082 return status; 1083 } 1084 1085 static enum pnfs_status file_layout_recall_file( 1086 IN nfs41_client *client, 1087 IN const struct cb_layoutrecall_args *recall) 1088 { 1089 struct list_entry *entry; 1090 enum pnfs_status status; 1091 1092 dprintf(FLLVL, "--> file_layout_recall_file()\n"); 1093 1094 EnterCriticalSection(&client->layouts->lock); 1095 1096 status = layout_entry_find(client->layouts, &recall->recall.args.file.fh, &entry); 1097 if (status == PNFS_SUCCESS) 1098 status = file_layout_recall(state_entry(entry), recall); 1099 1100 LeaveCriticalSection(&client->layouts->lock); 1101 1102 dprintf(FLLVL, "<-- file_layout_recall_file() returning %s\n", 1103 pnfs_error_string(status)); 1104 return status; 1105 } 1106 1107 static bool_t fsid_matches( 1108 IN const nfs41_fsid *lhs, 1109 IN const nfs41_fsid *rhs) 1110 { 1111 return lhs->major == rhs->major && lhs->minor == rhs->minor; 1112 } 1113 1114 static enum pnfs_status file_layout_recall_fsid( 1115 IN nfs41_client *client, 1116 IN const struct cb_layoutrecall_args *recall) 1117 { 1118 struct list_entry *entry; 1119 pnfs_layout_state *state; 1120 nfs41_fh *fh; 1121 enum pnfs_status status = PNFSERR_NO_LAYOUT; 1122 1123 dprintf(FLLVL, "--> file_layout_recall_fsid(%llu, %llu)\n", 1124 recall->recall.args.fsid.major, recall->recall.args.fsid.minor); 1125 1126 EnterCriticalSection(&client->layouts->lock); 1127 1128 list_for_each(entry, &client->layouts->head) { 1129 state = state_entry(entry); 1130 /* no locks needed to read layout.meta_fh or superblock.fsid, 1131 * because they are only written once on creation */ 1132 fh = &state->meta_fh; 1133 if (fsid_matches(&recall->recall.args.fsid, &fh->superblock->fsid)) 1134 status = file_layout_recall(state, recall); 1135 } 1136 1137 LeaveCriticalSection(&client->layouts->lock); 1138 1139 /* bulk recalls require invalidation of cached device info */ 1140 pnfs_file_device_list_invalidate(client->devices); 1141 1142 dprintf(FLLVL, "<-- file_layout_recall_fsid() returning %s\n", 1143 pnfs_error_string(status)); 1144 return status; 1145 } 1146 1147 static enum pnfs_status file_layout_recall_all( 1148 IN nfs41_client *client, 1149 IN const struct cb_layoutrecall_args *recall) 1150 { 1151 struct list_entry *entry; 1152 enum pnfs_status status = PNFSERR_NO_LAYOUT; 1153 1154 dprintf(FLLVL, "--> file_layout_recall_all()\n"); 1155 1156 EnterCriticalSection(&client->layouts->lock); 1157 1158 list_for_each(entry, &client->layouts->head) 1159 status = file_layout_recall(state_entry(entry), recall); 1160 1161 LeaveCriticalSection(&client->layouts->lock); 1162 1163 /* bulk recalls require invalidation of cached device info */ 1164 pnfs_file_device_list_invalidate(client->devices); 1165 1166 dprintf(FLLVL, "<-- file_layout_recall_all() returning %s\n", 1167 pnfs_error_string(status)); 1168 return status; 1169 } 1170 1171 enum pnfs_status pnfs_file_layout_recall( 1172 IN nfs41_client *client, 1173 IN const struct cb_layoutrecall_args *recall) 1174 { 1175 enum pnfs_status status = PNFS_SUCCESS; 1176 1177 dprintf(FLLVL, "--> pnfs_file_layout_recall(%u, %s, %u)\n", 1178 recall->recall.type, pnfs_iomode_string(recall->iomode), 1179 recall->changed); 1180 1181 if (recall->type != PNFS_LAYOUTTYPE_FILE) { 1182 dprintf(FLLVL, "invalid layout type %u (%s)!\n", 1183 recall->type, pnfs_layout_type_string(recall->type)); 1184 status = PNFSERR_NOT_SUPPORTED; 1185 goto out; 1186 } 1187 1188 switch (recall->recall.type) { 1189 case PNFS_RETURN_FILE: 1190 status = file_layout_recall_file(client, recall); 1191 break; 1192 case PNFS_RETURN_FSID: 1193 status = file_layout_recall_fsid(client, recall); 1194 break; 1195 case PNFS_RETURN_ALL: 1196 status = file_layout_recall_all(client, recall); 1197 break; 1198 1199 default: 1200 dprintf(FLLVL, "invalid return type %u!\n", recall->recall); 1201 status = PNFSERR_NOT_SUPPORTED; 1202 goto out; 1203 } 1204 out: 1205 dprintf(FLLVL, "<-- pnfs_file_layout_recall() returning %s\n", 1206 pnfs_error_string(status)); 1207 return status; 1208 } 1209 1210 /* expects caller to hold a shared lock on pnfs_layout_state */ 1211 enum pnfs_status pnfs_layout_recall_status( 1212 IN const pnfs_layout_state *state, 1213 IN const pnfs_layout *layout) 1214 { 1215 struct list_entry *entry; 1216 enum pnfs_status status = PNFS_SUCCESS; 1217 1218 /* search for a pending recall that intersects with the given segment */ 1219 list_for_each(entry, &state->recalls) { 1220 const struct layout_recall *recall = recall_entry(entry); 1221 if (!layout_recall_compatible(layout, &recall->layout)) 1222 continue; 1223 1224 if (recall->changed) 1225 status = PNFSERR_LAYOUT_CHANGED; 1226 else 1227 status = PNFSERR_LAYOUT_RECALLED; 1228 break; 1229 } 1230 return status; 1231 } 1232 1233 void pnfs_layout_recall_fenced( 1234 IN pnfs_layout_state *state, 1235 IN const pnfs_layout *layout) 1236 { 1237 struct layout_recall *lrc = calloc(1, sizeof(struct layout_recall)); 1238 if (lrc == NULL) 1239 return; 1240 1241 AcquireSRWLockExclusive(&state->lock); 1242 1243 list_init(&lrc->layout.entry); 1244 lrc->layout.offset = layout->offset; 1245 lrc->layout.length = layout->length; 1246 lrc->layout.iomode = layout->iomode; 1247 lrc->layout.type = layout->type; 1248 lrc->changed = TRUE; 1249 1250 if (layout_recall_merge(&state->recalls, &lrc->layout) != PNFS_SUCCESS) 1251 list_add_tail(&state->recalls, &lrc->layout.entry); 1252 1253 ReleaseSRWLockExclusive(&state->lock); 1254 } 1255 1256 /* expects caller to hold an exclusive lock on pnfs_layout_state */ 1257 void pnfs_layout_io_start( 1258 IN pnfs_layout_state *state) 1259 { 1260 /* take a reference on the layout, so that it won't be recalled 1261 * until all io is finished */ 1262 state->io_count++; 1263 dprintf(FLLVL, "pnfs_layout_io_start(): count -> %u\n", 1264 state->io_count); 1265 } 1266 1267 void pnfs_layout_io_finished( 1268 IN pnfs_layout_state *state) 1269 { 1270 AcquireSRWLockExclusive(&state->lock); 1271 1272 /* return the reference to signify that an io request is finished */ 1273 state->io_count--; 1274 dprintf(FLLVL, "pnfs_layout_io_finished() count -> %u\n", 1275 state->io_count); 1276 1277 if (state->io_count > 0) /* more io pending */ 1278 goto out_unlock; 1279 1280 /* once all io is finished, process any layout recalls */ 1281 layout_state_deferred_recalls(state); 1282 1283 /* finish any segment merging that was delayed during io */ 1284 if (!list_empty(&state->layouts)) 1285 layout_state_merge(state, file_layout_entry(state->layouts.next)); 1286 1287 out_unlock: 1288 ReleaseSRWLockExclusive(&state->lock); 1289 } 1290