1""" Utilities for getting array slices out of file-like objects 2""" 3 4import operator 5from numbers import Integral 6from mmap import mmap 7 8from functools import reduce 9 10import numpy as np 11 12 13# Threshold for memory gap above which we always skip, to save memory 14# This value came from trying various values and looking at the timing with 15# ``bench_fileslice`` 16SKIP_THRESH = 2 ** 8 17 18 19class _NullLock(object): 20 """Can be used as no-function dummy object in place of ``threading.lock``. 21 22 The ``_NullLock`` is an object which can be used in place of a 23 ``threading.Lock`` object, but doesn't actually do anything. 24 25 It is used by the ``read_segments`` function in the event that a 26 ``Lock`` is not provided by the caller. 27 """ 28 def __enter__(self): 29 pass 30 31 def __exit__(self, exc_type, exc_val, exc_tb): 32 return False 33 34 35def is_fancy(sliceobj): 36 """ Returns True if sliceobj is attempting fancy indexing 37 38 Parameters 39 ---------- 40 sliceobj : object 41 something that can be used to slice an array as in ``arr[sliceobj]`` 42 43 Returns 44 ------- 45 tf: bool 46 True if sliceobj represents fancy indexing, False for basic indexing 47 """ 48 if not isinstance(sliceobj, tuple): 49 sliceobj = (sliceobj,) 50 for slicer in sliceobj: 51 if getattr(slicer, 'ndim', 0) > 0: # ndarray always fancy, but scalars are safe 52 return True 53 # slice or Ellipsis or None OK for basic 54 if isinstance(slicer, slice) or slicer in (None, Ellipsis): 55 continue 56 try: 57 int(slicer) 58 except TypeError: 59 return True 60 return False 61 62 63def canonical_slicers(sliceobj, shape, check_inds=True): 64 """ Return canonical version of `sliceobj` for array shape `shape` 65 66 `sliceobj` is a slicer for an array ``A`` implied by `shape`. 67 68 * Expand `sliceobj` with ``slice(None)`` to add any missing (implied) axes 69 in `sliceobj` 70 * Find any slicers in `sliceobj` that do a full axis slice and replace by 71 ``slice(None)`` 72 * Replace any floating point values for slicing with integers 73 * Replace negative integer slice values with equivalent positive integers. 74 75 Does not handle fancy indexing (indexing with arrays or array-like indices) 76 77 Parameters 78 ---------- 79 sliceobj : object 80 something that can be used to slice an array as in ``arr[sliceobj]`` 81 shape : sequence 82 shape of array that will be indexed by `sliceobj` 83 check_inds : {True, False}, optional 84 Whether to check if integer indices are out of bounds 85 86 Returns 87 ------- 88 can_slicers : tuple 89 version of `sliceobj` for which Ellipses have been expanded, missing 90 (implied) dimensions have been appended, and slice objects equivalent 91 to ``slice(None)`` have been replaced by ``slice(None)``, integer axes 92 have been checked, and negative indices set to positive equivalent 93 """ 94 if not isinstance(sliceobj, tuple): 95 sliceobj = (sliceobj,) 96 if is_fancy(sliceobj): 97 raise ValueError("Cannot handle fancy indexing") 98 can_slicers = [] 99 n_dim = len(shape) 100 n_real = 0 101 for i, slicer in enumerate(sliceobj): 102 if slicer is None: 103 can_slicers.append(None) 104 continue 105 if slicer == Ellipsis: 106 remaining = sliceobj[i + 1:] 107 if Ellipsis in remaining: 108 raise ValueError("More than one Ellipsis in slicing " 109 "expression") 110 real_remaining = [r for r in remaining if r is not None] 111 n_ellided = n_dim - n_real - len(real_remaining) 112 can_slicers.extend((slice(None),) * n_ellided) 113 n_real += n_ellided 114 continue 115 # int / slice indexing cases 116 dim_len = shape[n_real] 117 n_real += 1 118 try: # test for integer indexing 119 slicer = int(slicer) 120 except TypeError: # should be slice object 121 if slicer != slice(None): 122 # Could this be full slice? 123 if slicer.stop == dim_len and slicer.start in (None, 0) and \ 124 slicer.step in (None, 1): 125 slicer = slice(None) 126 else: 127 if slicer < 0: 128 slicer = dim_len + slicer 129 elif check_inds and slicer >= dim_len: 130 raise ValueError('Integer index %d to large' % slicer) 131 can_slicers.append(slicer) 132 # Fill out any missing dimensions 133 if n_real < n_dim: 134 can_slicers.extend((slice(None),) * (n_dim - n_real)) 135 return tuple(can_slicers) 136 137 138def slice2outax(ndim, sliceobj): 139 """ Matching output axes for input array ndim `ndim` and slice `sliceobj` 140 141 Parameters 142 ---------- 143 ndim : int 144 number of axes in input array 145 sliceobj : object 146 something that can be used to slice an array as in ``arr[sliceobj]`` 147 148 Returns 149 ------- 150 out_ax_inds : tuple 151 Say ``A` is a (pretend) input array of `ndim` dimensions. Say ``B = 152 A[sliceobj]``. `out_ax_inds` has one value per axis in ``A`` giving 153 corresponding axis in ``B``. 154 """ 155 sliceobj = canonical_slicers(sliceobj, [1] * ndim, check_inds=False) 156 out_ax_no = 0 157 out_ax_inds = [] 158 for obj in sliceobj: 159 if isinstance(obj, Integral): 160 out_ax_inds.append(None) 161 continue 162 if obj is not None: 163 out_ax_inds.append(out_ax_no) 164 out_ax_no += 1 165 return tuple(out_ax_inds) 166 167 168def slice2len(slicer, in_len): 169 """ Output length after slicing original length `in_len` with `slicer` 170 Parameters 171 ---------- 172 slicer : slice object 173 in_len : int 174 175 Returns 176 ------- 177 out_len : int 178 Length after slicing 179 180 Notes 181 ----- 182 Returns same as ``len(np.arange(in_len)[slicer])`` 183 """ 184 if slicer == slice(None): 185 return in_len 186 full_slicer = fill_slicer(slicer, in_len) 187 return _full_slicer_len(full_slicer) 188 189 190def _full_slicer_len(full_slicer): 191 """ Return length of slicer processed by ``fill_slicer`` 192 """ 193 start, stop, step = full_slicer.start, full_slicer.stop, full_slicer.step 194 if stop is None: # case of negative step 195 stop = -1 196 gap = stop - start 197 if (step > 0 and gap <= 0) or (step < 0 and gap >= 0): 198 return 0 199 return int(np.ceil(gap / step)) 200 201 202def fill_slicer(slicer, in_len): 203 """ Return slice object with Nones filled out to match `in_len` 204 205 Also fixes too large stop / start values according to slice() slicing 206 rules. 207 208 The returned slicer can have a None as `slicer.stop` if `slicer.step` is 209 negative and the input `slicer.stop` is None. This is because we can't 210 represent the ``stop`` as an integer, because -1 has a different meaning. 211 212 Parameters 213 ---------- 214 slicer : slice object 215 in_len : int 216 length of axis on which `slicer` will be applied 217 218 Returns 219 ------- 220 can_slicer : slice object 221 slice with start, stop, step set to explicit values, with the exception 222 of ``stop`` for negative step, which is None for the case of slicing 223 down through the first element 224 """ 225 start, stop, step = slicer.start, slicer.stop, slicer.step 226 if step is None: 227 step = 1 228 if start is not None and start < 0: 229 start = in_len + start 230 if stop is not None and stop < 0: 231 stop = in_len + stop 232 if step > 0: 233 if start is None: 234 start = 0 235 if stop is None: 236 stop = in_len 237 else: 238 stop = min(stop, in_len) 239 else: # step < 0 240 if start is None: 241 start = in_len - 1 242 else: 243 start = min(start, in_len - 1) 244 return slice(start, stop, step) 245 246 247def predict_shape(sliceobj, in_shape): 248 """ Predict shape of array from slicing array shape `shape` with `sliceobj` 249 250 Parameters 251 ---------- 252 sliceobj : object 253 something that can be used to slice an array as in ``arr[sliceobj]`` 254 in_shape : sequence 255 shape of array that could be sliced by `sliceobj` 256 257 Returns 258 ------- 259 out_shape : tuple 260 predicted shape arising from slicing array shape `in_shape` with 261 `sliceobj` 262 """ 263 if not isinstance(sliceobj, tuple): 264 sliceobj = (sliceobj,) 265 sliceobj = canonical_slicers(sliceobj, in_shape) 266 out_shape = [] 267 real_no = 0 268 for slicer in sliceobj: 269 if slicer is None: 270 out_shape.append(1) 271 continue 272 real_no += 1 273 try: # if int - we drop a dim (no append) 274 slicer = int(slicer) 275 except TypeError: 276 out_shape.append(slice2len(slicer, in_shape[real_no - 1])) 277 return tuple(out_shape) 278 279 280def _positive_slice(slicer): 281 """ Return full slice `slicer` enforcing positive step size 282 283 `slicer` assumed full in the sense of :func:`fill_slicer` 284 """ 285 start, stop, step = slicer.start, slicer.stop, slicer.step 286 if step > 0: 287 return slicer 288 if stop is None: 289 stop = -1 290 gap = stop - start 291 n = gap / step 292 n = int(n) - 1 if int(n) == n else int(n) 293 end = start + n * step 294 return slice(end, start + 1, -step) 295 296 297def threshold_heuristic(slicer, 298 dim_len, 299 stride, 300 skip_thresh=SKIP_THRESH): 301 """ Whether to force full axis read or contiguous read of stepped slice 302 303 Allows :func:`fileslice` to sometimes read memory that it will throw away 304 in order to get maximum speed. In other words, trade memory for fewer disk 305 reads. 306 307 Parameters 308 ---------- 309 slicer : slice object, or int 310 If slice, can be assumed to be full as in ``fill_slicer`` 311 dim_len : int 312 length of axis being sliced 313 stride : int 314 memory distance between elements on this axis 315 skip_thresh : int, optional 316 Memory gap threshold in bytes above which to prefer skipping memory 317 rather than reading it and later discarding. 318 319 Returns 320 ------- 321 action : {'full', 'contiguous', None} 322 Gives the suggested optimization for reading the data 323 324 * 'full' - read whole axis 325 * 'contiguous' - read all elements between start and stop 326 * None - read only memory needed for output 327 328 Notes 329 ----- 330 Let's say we are in the middle of reading a file at the start of some 331 memory length $B$ bytes. We don't need the memory, and we are considering 332 whether to read it anyway (then throw it away) (READ) or stop reading, skip 333 $B$ bytes and restart reading from there (SKIP). 334 335 After trying some more fancy algorithms, a hard threshold (`skip_thresh`) 336 for the maximum skip distance seemed to work well, as measured by times on 337 ``nibabel.benchmarks.bench_fileslice`` 338 """ 339 if isinstance(slicer, Integral): 340 gap_size = (dim_len - 1) * stride 341 return 'full' if gap_size <= skip_thresh else None 342 step_size = abs(slicer.step) * stride 343 if step_size > skip_thresh: 344 return None # Prefer skip 345 # At least contiguous - also full? 346 slicer = _positive_slice(slicer) 347 start, stop = slicer.start, slicer.stop 348 read_len = stop - start 349 gap_size = (dim_len - read_len) * stride 350 return 'full' if gap_size <= skip_thresh else 'contiguous' 351 352 353def optimize_slicer(slicer, dim_len, all_full, is_slowest, stride, 354 heuristic=threshold_heuristic): 355 """ Return maybe modified slice and post-slice slicing for `slicer` 356 357 Parameters 358 ---------- 359 slicer : slice object or int 360 dim_len : int 361 length of axis along which to slice 362 all_full : bool 363 Whether dimensions up until now have been full (all elements) 364 is_slowest : bool 365 Whether this dimension is the slowest changing in memory / on disk 366 stride : int 367 size of one step along this axis 368 heuristic : callable, optional 369 function taking slice object, dim_len, stride length as arguments, 370 returning one of 'full', 'contiguous', None. See 371 :func:`threshold_heuristic` for an example. 372 373 Returns 374 ------- 375 to_read : slice object or int 376 maybe modified slice based on `slicer` expressing what data should be 377 read from an underlying file or buffer. `to_read` must always have 378 positive ``step`` (because we don't want to go backwards in the buffer 379 / file) 380 post_slice : slice object 381 slice to be applied after array has been read. Applies any 382 transformations in `slicer` that have not been applied in `to_read`. If 383 axis will be dropped by `to_read` slicing, so no slicing would make 384 sense, return string ``dropped`` 385 386 Notes 387 ----- 388 This is the heart of the algorithm for making segments from slice objects. 389 390 A contiguous slice is a slice with ``slice.step in (1, -1)`` 391 392 A full slice is a continuous slice returning all elements. 393 394 The main question we have to ask is whether we should transform `to_read`, 395 `post_slice` to prefer a full read and partial slice. We only do this in 396 the case of all_full==True. In this case we might benefit from reading a 397 continuous chunk of data even if the slice is not continuous, or reading 398 all the data even if the slice is not full. Apply a heuristic `heuristic` 399 to decide whether to do this, and adapt `to_read` and `post_slice` slice 400 accordingly. 401 402 Otherwise (apart from constraint to be positive) return `to_read` unaltered 403 and `post_slice` as ``slice(None)`` 404 """ 405 # int or slice as input? 406 try: # if int - we drop a dim (no append) 407 slicer = int(slicer) # casts float to int as well 408 except TypeError: # slice 409 # Deal with full cases first 410 if slicer == slice(None): 411 return slicer, slicer 412 slicer = fill_slicer(slicer, dim_len) 413 # actually equivalent to slice(None) 414 if slicer == slice(0, dim_len, 1): 415 return slice(None), slice(None) 416 # full, but reversed 417 if slicer == slice(dim_len - 1, None, -1): 418 return slice(None), slice(None, None, -1) 419 # Not full, mabye continuous 420 is_int = False 421 else: # int 422 if slicer < 0: # make negative offsets positive 423 slicer = dim_len + slicer 424 is_int = True 425 if all_full: 426 action = heuristic(slicer, dim_len, stride) 427 # Check return values (we may be using a custom function) 428 if action not in ('full', 'contiguous', None): 429 raise ValueError(f'Unexpected return {action} from heuristic') 430 if is_int and action == 'contiguous': 431 raise ValueError("int index cannot be contiguous") 432 # If this is the slowest changing dimension, never upgrade None or 433 # contiguous beyond contiguous (we've already covered the already-full 434 # case) 435 if is_slowest and action == 'full': 436 action = None if is_int else 'contiguous' 437 if action == 'full': 438 return slice(None), slicer 439 elif action == 'contiguous': # Cannot be int 440 # If this is already contiguous, default None behavior handles it 441 step = slicer.step 442 if step not in (-1, 1): 443 if step < 0: 444 slicer = _positive_slice(slicer) 445 return (slice(slicer.start, slicer.stop, 1), 446 slice(None, None, step)) 447 # We only need to be positive 448 if is_int: 449 return slicer, 'dropped' 450 if slicer.step > 0: 451 return slicer, slice(None) 452 return _positive_slice(slicer), slice(None, None, -1) 453 454 455def calc_slicedefs(sliceobj, in_shape, itemsize, offset, order, 456 heuristic=threshold_heuristic): 457 """ Return parameters for slicing array with `sliceobj` given memory layout 458 459 Calculate the best combination of skips / (read + discard) to use for 460 reading the data from disk / memory, then generate corresponding 461 `segments`, the disk offsets and read lengths to read the memory. If we 462 have chosen some (read + discard) optimization, then we need to discard the 463 surplus values from the read array using `post_slicers`, a slicing tuple 464 that takes the array as read from a file-like object, and returns the array 465 we want. 466 467 Parameters 468 ---------- 469 sliceobj : object 470 something that can be used to slice an array as in ``arr[sliceobj]`` 471 in_shape : sequence 472 shape of underlying array to be sliced 473 itemsize : int 474 element size in array (in bytes) 475 offset : int 476 offset of array data in underlying file or memory buffer 477 order : {'C', 'F'} 478 memory layout of underlying array 479 heuristic : callable, optional 480 function taking slice object, dim_len, stride length as arguments, 481 returning one of 'full', 'contiguous', None. See 482 :func:`optimize_slicer` and :func:`threshold_heuristic` 483 484 Returns 485 ------- 486 segments : list 487 list of 2 element lists where lists are (offset, length), giving 488 absolute memory offset in bytes and number of bytes to read 489 read_shape : tuple 490 shape with which to interpret memory as read from `segments`. 491 Interpreting the memory read from `segments` with this shape, and a 492 dtype, gives an intermediate array - call this ``R`` 493 post_slicers : tuple 494 Any new slicing to be applied to the array ``R`` after reading via 495 `segments` and reshaping via `read_shape`. Slices are in terms of 496 `read_shape`. If empty, no new slicing to apply 497 """ 498 if order not in "CF": 499 raise ValueError("order should be one of 'CF'") 500 sliceobj = canonical_slicers(sliceobj, in_shape) 501 # order fastest changing first (record reordering) 502 if order == 'C': 503 sliceobj = sliceobj[::-1] 504 in_shape = in_shape[::-1] 505 # Analyze sliceobj for new read_slicers and fixup post_slicers 506 # read_slicers are the virtual slices; we don't slice with these, but use 507 # the slice definitions to read the relevant memory from disk 508 read_slicers, post_slicers = optimize_read_slicers( 509 sliceobj, in_shape, itemsize, heuristic) 510 # work out segments corresponding to read_slicers 511 segments = slicers2segments(read_slicers, in_shape, offset, itemsize) 512 # Make post_slicers empty if it is the slicing identity operation 513 if all(s == slice(None) for s in post_slicers): 514 post_slicers = [] 515 read_shape = predict_shape(read_slicers, in_shape) 516 # If reordered, order shape, post_slicers 517 if order == 'C': 518 read_shape = read_shape[::-1] 519 post_slicers = post_slicers[::-1] 520 return list(segments), tuple(read_shape), tuple(post_slicers) 521 522 523def optimize_read_slicers(sliceobj, in_shape, itemsize, heuristic): 524 """ Calculates slices to read from disk, and apply after reading 525 526 Parameters 527 ---------- 528 sliceobj : object 529 something that can be used to slice an array as in ``arr[sliceobj]``. 530 Can be assumed to be canonical in the sense of ``canonical_slicers`` 531 in_shape : sequence 532 shape of underlying array to be sliced. Array for `in_shape` assumed 533 to be already in 'F' order. Reorder shape / sliceobj for slicing a 'C' 534 array before passing to this function. 535 itemsize : int 536 element size in array (bytes) 537 heuristic : callable 538 function taking slice object, axis length, and stride length as 539 arguments, returning one of 'full', 'contiguous', None. See 540 :func:`optimize_slicer`; see :func:`threshold_heuristic` for an 541 example. 542 543 Returns 544 ------- 545 read_slicers : tuple 546 `sliceobj` maybe rephrased to fill out dimensions that are better read 547 from disk and later trimmed to their original size with `post_slicers`. 548 `read_slicers` implies a block of memory to be read from disk. The 549 actual disk positions come from `slicers2segments` run over 550 `read_slicers`. Includes any ``newaxis`` dimensions in `sliceobj` 551 post_slicers : tuple 552 Any new slicing to be applied to the read array after reading. The 553 `post_slicers` discard any memory that we read to save time, but that 554 we don't need for the slice. Include any ``newaxis`` dimension added 555 by `sliceobj` 556 """ 557 read_slicers = [] 558 post_slicers = [] 559 real_no = 0 560 stride = itemsize 561 all_full = True 562 for slicer in sliceobj: 563 if slicer is None: 564 read_slicers.append(None) 565 post_slicers.append(slice(None)) 566 continue 567 dim_len = in_shape[real_no] 568 real_no += 1 569 is_last = real_no == len(in_shape) 570 # make modified sliceobj (to_read, post_slice) 571 read_slicer, post_slicer = optimize_slicer( 572 slicer, dim_len, all_full, is_last, stride, heuristic) 573 read_slicers.append(read_slicer) 574 all_full = all_full and read_slicer == slice(None) 575 if not isinstance(read_slicer, Integral): 576 post_slicers.append(post_slicer) 577 stride *= dim_len 578 return tuple(read_slicers), tuple(post_slicers) 579 580 581def slicers2segments(read_slicers, in_shape, offset, itemsize): 582 """ Get segments from `read_slicers` given `in_shape` and memory steps 583 584 Parameters 585 ---------- 586 read_slicers : object 587 something that can be used to slice an array as in ``arr[sliceobj]`` 588 Slice objects can by be assumed canonical as in ``canonical_slicers``, 589 and positive as in ``_positive_slice`` 590 in_shape : sequence 591 shape of underlying array on disk before reading 592 offset : int 593 offset of array data in underlying file or memory buffer 594 itemsize : int 595 element size in array (in bytes) 596 597 Returns 598 ------- 599 segments : list 600 list of 2 element lists where lists are [offset, length], giving 601 absolute memory offset in bytes and number of bytes to read 602 """ 603 all_full = True 604 all_segments = [[offset, itemsize]] 605 stride = itemsize 606 real_no = 0 607 for read_slicer in read_slicers: 608 if read_slicer is None: 609 continue 610 dim_len = in_shape[real_no] 611 real_no += 1 612 is_int = isinstance(read_slicer, Integral) 613 if not is_int: # slicer is (now) a slice 614 # make slice full (it will always be positive) 615 read_slicer = fill_slicer(read_slicer, dim_len) 616 slice_len = _full_slicer_len(read_slicer) 617 is_full = read_slicer == slice(0, dim_len, 1) 618 is_contiguous = not is_int and read_slicer.step == 1 619 if all_full and is_contiguous: # full or contiguous 620 if read_slicer.start != 0: 621 all_segments[0][0] += stride * read_slicer.start 622 all_segments[0][1] *= slice_len 623 else: # Previous or current stuff is not contiguous 624 if is_int: 625 for segment in all_segments: 626 segment[0] += stride * read_slicer 627 else: # slice object 628 segments = all_segments 629 all_segments = [] 630 for i in range(read_slicer.start, 631 read_slicer.stop, 632 read_slicer.step): 633 for s in segments: 634 all_segments.append([s[0] + stride * i, s[1]]) 635 all_full = all_full and is_full 636 stride *= dim_len 637 return all_segments 638 639 640def read_segments(fileobj, segments, n_bytes, lock=None): 641 """ Read `n_bytes` byte data implied by `segments` from `fileobj` 642 643 Parameters 644 ---------- 645 fileobj : file-like object 646 Implements `seek` and `read` 647 segments : sequence 648 list of 2 sequences where sequences are (offset, length), giving 649 absolute file offset in bytes and number of bytes to read 650 n_bytes : int 651 total number of bytes that will be read 652 lock : {None, threading.Lock, lock-like} optional 653 If provided, used to ensure that paired calls to ``seek`` and ``read`` 654 cannot be interrupted by another thread accessing the same ``fileobj``. 655 Each thread which accesses the same file via ``read_segments`` must 656 share a lock in order to ensure that the file access is thread-safe. 657 A lock does not need to be provided for single-threaded access. The 658 default value (``None``) results in a lock-like object (a 659 ``_NullLock``) which does not do anything. 660 661 Returns 662 ------- 663 buffer : buffer object 664 object implementing buffer protocol, such as byte string or ndarray or 665 mmap or ctypes ``c_char_array`` 666 """ 667 # Make a lock-like thing to make the code below a bit nicer 668 if lock is None: 669 lock = _NullLock() 670 671 if len(segments) == 0: 672 if n_bytes != 0: 673 raise ValueError("No segments, but non-zero n_bytes") 674 return b'' 675 if len(segments) == 1: 676 offset, length = segments[0] 677 with lock: 678 fileobj.seek(offset) 679 bytes = fileobj.read(length) 680 if len(bytes) != n_bytes: 681 raise ValueError("Whoops, not enough data in file") 682 return bytes 683 # More than one segment 684 bytes = mmap(-1, n_bytes) 685 for offset, length in segments: 686 with lock: 687 fileobj.seek(offset) 688 bytes.write(fileobj.read(length)) 689 if bytes.tell() != n_bytes: 690 raise ValueError("Oh dear, n_bytes does not look right") 691 return bytes 692 693 694def _simple_fileslice(fileobj, sliceobj, shape, dtype, offset=0, order='C', 695 heuristic=None): 696 """ Read all data from `fileobj` into array, then slice with `sliceobj` 697 698 The simplest possible thing; read all the data into the full array, then 699 slice the full array. 700 701 Parameters 702 ---------- 703 fileobj : file-like object 704 implements ``read`` and ``seek`` 705 sliceobj : object 706 something that can be used to slice an array as in ``arr[sliceobj]`` 707 shape : sequence 708 shape of full array inside `fileobj` 709 dtype : dtype object 710 dtype of array inside `fileobj` 711 offset : int, optional 712 offset of array data within `fileobj` 713 order : {'C', 'F'}, optional 714 memory layout of array in `fileobj` 715 heuristic : optional 716 The routine doesn't use `heuristic`; the parameter is for API 717 compatibility with :func:`fileslice` 718 719 Returns 720 ------- 721 sliced_arr : array 722 Array in `fileobj` as sliced with `sliceobj` 723 """ 724 fileobj.seek(offset) 725 nbytes = reduce(operator.mul, shape) * dtype.itemsize 726 bytes = fileobj.read(nbytes) 727 new_arr = np.ndarray(shape, dtype, buffer=bytes, order=order) 728 return new_arr[sliceobj] 729 730 731def fileslice(fileobj, sliceobj, shape, dtype, offset=0, order='C', 732 heuristic=threshold_heuristic, lock=None): 733 """ Slice array in `fileobj` using `sliceobj` slicer and array definitions 734 735 `fileobj` contains the contiguous binary data for an array ``A`` of shape, 736 dtype, memory layout `shape`, `dtype`, `order`, with the binary data 737 starting at file offset `offset`. 738 739 Our job is to return the sliced array ``A[sliceobj]`` in the most efficient 740 way in terms of memory and time. 741 742 Sometimes it will be quicker to read memory that we will later throw away, 743 to save time we might lose doing short seeks on `fileobj`. Call these 744 alternatives: (read + discard); and skip. This routine guesses when to 745 (read+discard) or skip using the callable `heuristic`, with a default using 746 a hard threshold for the memory gap large enough to prefer a skip. 747 748 Parameters 749 ---------- 750 fileobj : file-like object 751 file-like object, opened for reading in binary mode. Implements 752 ``read`` and ``seek``. 753 sliceobj : object 754 something that can be used to slice an array as in ``arr[sliceobj]``. 755 shape : sequence 756 shape of full array inside `fileobj`. 757 dtype : dtype specifier 758 dtype of array inside `fileobj`, or input to ``numpy.dtype`` to specify 759 array dtype. 760 offset : int, optional 761 offset of array data within `fileobj` 762 order : {'C', 'F'}, optional 763 memory layout of array in `fileobj`. 764 heuristic : callable, optional 765 function taking slice object, axis length, stride length as arguments, 766 returning one of 'full', 'contiguous', None. See 767 :func:`optimize_slicer` and see :func:`threshold_heuristic` for an 768 example. 769 lock : {None, threading.Lock, lock-like} optional 770 If provided, used to ensure that paired calls to ``seek`` and ``read`` 771 cannot be interrupted by another thread accessing the same ``fileobj``. 772 Each thread which accesses the same file via ``read_segments`` must 773 share a lock in order to ensure that the file access is thread-safe. 774 A lock does not need to be provided for single-threaded access. The 775 default value (``None``) results in a lock-like object (a 776 ``_NullLock``) which does not do anything. 777 778 Returns 779 ------- 780 sliced_arr : array 781 Array in `fileobj` as sliced with `sliceobj` 782 """ 783 if is_fancy(sliceobj): 784 raise ValueError("Cannot handle fancy indexing") 785 dtype = np.dtype(dtype) 786 itemsize = int(dtype.itemsize) 787 segments, sliced_shape, post_slicers = calc_slicedefs( 788 sliceobj, shape, itemsize, offset, order) 789 n_bytes = reduce(operator.mul, sliced_shape, 1) * itemsize 790 arr_data = read_segments(fileobj, segments, n_bytes, lock) 791 sliced = np.ndarray(sliced_shape, dtype, buffer=arr_data, order=order) 792 return sliced[post_slicers] 793 794 795def strided_scalar(shape, scalar=0.): 796 """ Return array shape `shape` where all entries point to value `scalar` 797 798 Parameters 799 ---------- 800 shape : sequence 801 Shape of output array. 802 scalar : scalar 803 Scalar value with which to fill array. 804 805 Returns 806 ------- 807 strided_arr : array 808 Array of shape `shape` for which all values == `scalar`, built by 809 setting all strides of `strided_arr` to 0, so the scalar is broadcast 810 out to the full array `shape`. `strided_arr` is flagged as not 811 `writeable`. 812 813 The array is set read-only to avoid a numpy error when broadcasting - 814 see https://github.com/numpy/numpy/issues/6491 815 """ 816 shape = tuple(shape) 817 scalar = np.array(scalar) 818 strides = [0] * len(shape) 819 strided_scalar = np.lib.stride_tricks.as_strided(scalar, shape, strides) 820 strided_scalar.flags.writeable = False 821 return strided_scalar 822