1""" Utilities for getting array slices out of file-like objects
2"""
3
4import operator
5from numbers import Integral
6from mmap import mmap
7
8from functools import reduce
9
10import numpy as np
11
12
13# Threshold for memory gap above which we always skip, to save memory
14# This value came from trying various values and looking at the timing with
15# ``bench_fileslice``
16SKIP_THRESH = 2 ** 8
17
18
19class _NullLock(object):
20    """Can be used as no-function dummy object in place of ``threading.lock``.
21
22    The ``_NullLock`` is an object which can be used in place of a
23    ``threading.Lock`` object, but doesn't actually do anything.
24
25    It is used by the ``read_segments`` function in the event that a
26    ``Lock`` is not provided by the caller.
27    """
28    def __enter__(self):
29        pass
30
31    def __exit__(self, exc_type, exc_val, exc_tb):
32        return False
33
34
35def is_fancy(sliceobj):
36    """ Returns True if sliceobj is attempting fancy indexing
37
38    Parameters
39    ----------
40    sliceobj : object
41        something that can be used to slice an array as in ``arr[sliceobj]``
42
43    Returns
44    -------
45    tf: bool
46        True if sliceobj represents fancy indexing, False for basic indexing
47    """
48    if not isinstance(sliceobj, tuple):
49        sliceobj = (sliceobj,)
50    for slicer in sliceobj:
51        if getattr(slicer, 'ndim', 0) > 0:  # ndarray always fancy, but scalars are safe
52            return True
53        # slice or Ellipsis or None OK for  basic
54        if isinstance(slicer, slice) or slicer in (None, Ellipsis):
55            continue
56        try:
57            int(slicer)
58        except TypeError:
59            return True
60    return False
61
62
63def canonical_slicers(sliceobj, shape, check_inds=True):
64    """ Return canonical version of `sliceobj` for array shape `shape`
65
66    `sliceobj` is a slicer for an array ``A`` implied by `shape`.
67
68    * Expand `sliceobj` with ``slice(None)`` to add any missing (implied) axes
69      in `sliceobj`
70    * Find any slicers in `sliceobj` that do a full axis slice and replace by
71      ``slice(None)``
72    * Replace any floating point values for slicing with integers
73    * Replace negative integer slice values with equivalent positive integers.
74
75    Does not handle fancy indexing (indexing with arrays or array-like indices)
76
77    Parameters
78    ----------
79    sliceobj : object
80        something that can be used to slice an array as in ``arr[sliceobj]``
81    shape : sequence
82        shape of array that will be indexed by `sliceobj`
83    check_inds : {True, False}, optional
84        Whether to check if integer indices are out of bounds
85
86    Returns
87    -------
88    can_slicers : tuple
89        version of `sliceobj` for which Ellipses have been expanded, missing
90        (implied) dimensions have been appended, and slice objects equivalent
91        to ``slice(None)`` have been replaced by ``slice(None)``, integer axes
92        have been checked, and negative indices set to positive equivalent
93    """
94    if not isinstance(sliceobj, tuple):
95        sliceobj = (sliceobj,)
96    if is_fancy(sliceobj):
97        raise ValueError("Cannot handle fancy indexing")
98    can_slicers = []
99    n_dim = len(shape)
100    n_real = 0
101    for i, slicer in enumerate(sliceobj):
102        if slicer is None:
103            can_slicers.append(None)
104            continue
105        if slicer == Ellipsis:
106            remaining = sliceobj[i + 1:]
107            if Ellipsis in remaining:
108                raise ValueError("More than one Ellipsis in slicing "
109                                 "expression")
110            real_remaining = [r for r in remaining if r is not None]
111            n_ellided = n_dim - n_real - len(real_remaining)
112            can_slicers.extend((slice(None),) * n_ellided)
113            n_real += n_ellided
114            continue
115        # int / slice indexing cases
116        dim_len = shape[n_real]
117        n_real += 1
118        try:  # test for integer indexing
119            slicer = int(slicer)
120        except TypeError:  # should be slice object
121            if slicer != slice(None):
122                # Could this be full slice?
123                if slicer.stop == dim_len and slicer.start in (None, 0) and \
124                        slicer.step in (None, 1):
125                    slicer = slice(None)
126        else:
127            if slicer < 0:
128                slicer = dim_len + slicer
129            elif check_inds and slicer >= dim_len:
130                raise ValueError('Integer index %d to large' % slicer)
131        can_slicers.append(slicer)
132    # Fill out any missing dimensions
133    if n_real < n_dim:
134        can_slicers.extend((slice(None),) * (n_dim - n_real))
135    return tuple(can_slicers)
136
137
138def slice2outax(ndim, sliceobj):
139    """ Matching output axes for input array ndim `ndim` and slice `sliceobj`
140
141    Parameters
142    ----------
143    ndim : int
144        number of axes in input array
145    sliceobj : object
146        something that can be used to slice an array as in ``arr[sliceobj]``
147
148    Returns
149    -------
150    out_ax_inds : tuple
151        Say ``A` is a (pretend) input array of `ndim` dimensions. Say ``B =
152        A[sliceobj]``.  `out_ax_inds` has one value per axis in ``A`` giving
153        corresponding axis in ``B``.
154    """
155    sliceobj = canonical_slicers(sliceobj, [1] * ndim, check_inds=False)
156    out_ax_no = 0
157    out_ax_inds = []
158    for obj in sliceobj:
159        if isinstance(obj, Integral):
160            out_ax_inds.append(None)
161            continue
162        if obj is not None:
163            out_ax_inds.append(out_ax_no)
164        out_ax_no += 1
165    return tuple(out_ax_inds)
166
167
168def slice2len(slicer, in_len):
169    """ Output length after slicing original length `in_len` with `slicer`
170    Parameters
171    ----------
172    slicer : slice object
173    in_len : int
174
175    Returns
176    -------
177    out_len : int
178        Length after slicing
179
180    Notes
181    -----
182    Returns same as ``len(np.arange(in_len)[slicer])``
183    """
184    if slicer == slice(None):
185        return in_len
186    full_slicer = fill_slicer(slicer, in_len)
187    return _full_slicer_len(full_slicer)
188
189
190def _full_slicer_len(full_slicer):
191    """ Return length of slicer processed by ``fill_slicer``
192    """
193    start, stop, step = full_slicer.start, full_slicer.stop, full_slicer.step
194    if stop is None:  # case of negative step
195        stop = -1
196    gap = stop - start
197    if (step > 0 and gap <= 0) or (step < 0 and gap >= 0):
198        return 0
199    return int(np.ceil(gap / step))
200
201
202def fill_slicer(slicer, in_len):
203    """ Return slice object with Nones filled out to match `in_len`
204
205    Also fixes too large stop / start values according to slice() slicing
206    rules.
207
208    The returned slicer can have a None as `slicer.stop` if `slicer.step` is
209    negative and the input `slicer.stop` is None. This is because we can't
210    represent the ``stop`` as an integer, because -1 has a different meaning.
211
212    Parameters
213    ----------
214    slicer : slice object
215    in_len : int
216        length of axis on which `slicer` will be applied
217
218    Returns
219    -------
220    can_slicer : slice object
221        slice with start, stop, step set to explicit values, with the exception
222        of ``stop`` for negative step, which is None for the case of slicing
223        down through the first element
224    """
225    start, stop, step = slicer.start, slicer.stop, slicer.step
226    if step is None:
227        step = 1
228    if start is not None and start < 0:
229        start = in_len + start
230    if stop is not None and stop < 0:
231        stop = in_len + stop
232    if step > 0:
233        if start is None:
234            start = 0
235        if stop is None:
236            stop = in_len
237        else:
238            stop = min(stop, in_len)
239    else:  # step < 0
240        if start is None:
241            start = in_len - 1
242        else:
243            start = min(start, in_len - 1)
244    return slice(start, stop, step)
245
246
247def predict_shape(sliceobj, in_shape):
248    """ Predict shape of array from slicing array shape `shape` with `sliceobj`
249
250    Parameters
251    ----------
252    sliceobj : object
253        something that can be used to slice an array as in ``arr[sliceobj]``
254    in_shape : sequence
255        shape of array that could be sliced by `sliceobj`
256
257    Returns
258    -------
259    out_shape : tuple
260        predicted shape arising from slicing array shape `in_shape` with
261        `sliceobj`
262    """
263    if not isinstance(sliceobj, tuple):
264        sliceobj = (sliceobj,)
265    sliceobj = canonical_slicers(sliceobj, in_shape)
266    out_shape = []
267    real_no = 0
268    for slicer in sliceobj:
269        if slicer is None:
270            out_shape.append(1)
271            continue
272        real_no += 1
273        try:  # if int - we drop a dim (no append)
274            slicer = int(slicer)
275        except TypeError:
276            out_shape.append(slice2len(slicer, in_shape[real_no - 1]))
277    return tuple(out_shape)
278
279
280def _positive_slice(slicer):
281    """ Return full slice `slicer` enforcing positive step size
282
283    `slicer` assumed full in the sense of :func:`fill_slicer`
284    """
285    start, stop, step = slicer.start, slicer.stop, slicer.step
286    if step > 0:
287        return slicer
288    if stop is None:
289        stop = -1
290    gap = stop - start
291    n = gap / step
292    n = int(n) - 1 if int(n) == n else int(n)
293    end = start + n * step
294    return slice(end, start + 1, -step)
295
296
297def threshold_heuristic(slicer,
298                        dim_len,
299                        stride,
300                        skip_thresh=SKIP_THRESH):
301    """ Whether to force full axis read or contiguous read of stepped slice
302
303    Allows :func:`fileslice` to sometimes read memory that it will throw away
304    in order to get maximum speed.  In other words, trade memory for fewer disk
305    reads.
306
307    Parameters
308    ----------
309    slicer : slice object, or int
310        If slice, can be assumed to be full as in ``fill_slicer``
311    dim_len : int
312        length of axis being sliced
313    stride : int
314        memory distance between elements on this axis
315    skip_thresh : int, optional
316        Memory gap threshold in bytes above which to prefer skipping memory
317        rather than reading it and later discarding.
318
319    Returns
320    -------
321    action : {'full', 'contiguous', None}
322        Gives the suggested optimization for reading the data
323
324        * 'full' - read whole axis
325        * 'contiguous' - read all elements between start and stop
326        * None - read only memory needed for output
327
328    Notes
329    -----
330    Let's say we are in the middle of reading a file at the start of some
331    memory length $B$ bytes.  We don't need the memory, and we are considering
332    whether to read it anyway (then throw it away) (READ) or stop reading, skip
333    $B$ bytes and restart reading from there (SKIP).
334
335    After trying some more fancy algorithms, a hard threshold (`skip_thresh`)
336    for the maximum skip distance seemed to work well, as measured by times on
337    ``nibabel.benchmarks.bench_fileslice``
338    """
339    if isinstance(slicer, Integral):
340        gap_size = (dim_len - 1) * stride
341        return 'full' if gap_size <= skip_thresh else None
342    step_size = abs(slicer.step) * stride
343    if step_size > skip_thresh:
344        return None  # Prefer skip
345    # At least contiguous - also full?
346    slicer = _positive_slice(slicer)
347    start, stop = slicer.start, slicer.stop
348    read_len = stop - start
349    gap_size = (dim_len - read_len) * stride
350    return 'full' if gap_size <= skip_thresh else 'contiguous'
351
352
353def optimize_slicer(slicer, dim_len, all_full, is_slowest, stride,
354                    heuristic=threshold_heuristic):
355    """ Return maybe modified slice and post-slice slicing for `slicer`
356
357    Parameters
358    ----------
359    slicer : slice object or int
360    dim_len : int
361        length of axis along which to slice
362    all_full : bool
363        Whether dimensions up until now have been full (all elements)
364    is_slowest : bool
365        Whether this dimension is the slowest changing in memory / on disk
366    stride : int
367        size of one step along this axis
368    heuristic : callable, optional
369        function taking slice object, dim_len, stride length as arguments,
370        returning one of 'full', 'contiguous', None. See
371        :func:`threshold_heuristic` for an example.
372
373    Returns
374    -------
375    to_read : slice object or int
376        maybe modified slice based on `slicer` expressing what data should be
377        read from an underlying file or buffer. `to_read` must always have
378        positive ``step`` (because we don't want to go backwards in the buffer
379        / file)
380    post_slice : slice object
381        slice to be applied after array has been read.  Applies any
382        transformations in `slicer` that have not been applied in `to_read`. If
383        axis will be dropped by `to_read` slicing, so no slicing would make
384        sense, return string ``dropped``
385
386    Notes
387    -----
388    This is the heart of the algorithm for making segments from slice objects.
389
390    A contiguous slice is a slice with ``slice.step in (1, -1)``
391
392    A full slice is a continuous slice returning all elements.
393
394    The main question we have to ask is whether we should transform `to_read`,
395    `post_slice` to prefer a full read and partial slice.  We only do this in
396    the case of all_full==True.  In this case we might benefit from reading a
397    continuous chunk of data even if the slice is not continuous, or reading
398    all the data even if the slice is not full. Apply a heuristic `heuristic`
399    to decide whether to do this, and adapt `to_read` and `post_slice` slice
400    accordingly.
401
402    Otherwise (apart from constraint to be positive) return `to_read` unaltered
403    and `post_slice` as ``slice(None)``
404    """
405    # int or slice as input?
406    try:  # if int - we drop a dim (no append)
407        slicer = int(slicer)  # casts float to int as well
408    except TypeError:  # slice
409        # Deal with full cases first
410        if slicer == slice(None):
411            return slicer, slicer
412        slicer = fill_slicer(slicer, dim_len)
413        # actually equivalent to slice(None)
414        if slicer == slice(0, dim_len, 1):
415            return slice(None), slice(None)
416        # full, but reversed
417        if slicer == slice(dim_len - 1, None, -1):
418            return slice(None), slice(None, None, -1)
419        # Not full, mabye continuous
420        is_int = False
421    else:  # int
422        if slicer < 0:  # make negative offsets positive
423            slicer = dim_len + slicer
424        is_int = True
425    if all_full:
426        action = heuristic(slicer, dim_len, stride)
427        # Check return values (we may be using a custom function)
428        if action not in ('full', 'contiguous', None):
429            raise ValueError(f'Unexpected return {action} from heuristic')
430        if is_int and action == 'contiguous':
431            raise ValueError("int index cannot be contiguous")
432        # If this is the slowest changing dimension, never upgrade None or
433        # contiguous beyond contiguous (we've already covered the already-full
434        # case)
435        if is_slowest and action == 'full':
436            action = None if is_int else 'contiguous'
437        if action == 'full':
438            return slice(None), slicer
439        elif action == 'contiguous':  # Cannot be int
440            # If this is already contiguous, default None behavior handles it
441            step = slicer.step
442            if step not in (-1, 1):
443                if step < 0:
444                    slicer = _positive_slice(slicer)
445                return (slice(slicer.start, slicer.stop, 1),
446                        slice(None, None, step))
447    # We only need to be positive
448    if is_int:
449        return slicer, 'dropped'
450    if slicer.step > 0:
451        return slicer, slice(None)
452    return _positive_slice(slicer), slice(None, None, -1)
453
454
455def calc_slicedefs(sliceobj, in_shape, itemsize, offset, order,
456                   heuristic=threshold_heuristic):
457    """ Return parameters for slicing array with `sliceobj` given memory layout
458
459    Calculate the best combination of skips / (read + discard) to use for
460    reading the data from disk / memory, then generate corresponding
461    `segments`, the disk offsets and read lengths to read the memory.  If we
462    have chosen some (read + discard) optimization, then we need to discard the
463    surplus values from the read array using `post_slicers`, a slicing tuple
464    that takes the array as read from a file-like object, and returns the array
465    we want.
466
467    Parameters
468    ----------
469    sliceobj : object
470        something that can be used to slice an array as in ``arr[sliceobj]``
471    in_shape : sequence
472        shape of underlying array to be sliced
473    itemsize : int
474        element size in array (in bytes)
475    offset : int
476        offset of array data in underlying file or memory buffer
477    order : {'C', 'F'}
478        memory layout of underlying array
479    heuristic : callable, optional
480        function taking slice object, dim_len, stride length as arguments,
481        returning one of 'full', 'contiguous', None.  See
482        :func:`optimize_slicer` and :func:`threshold_heuristic`
483
484    Returns
485    -------
486    segments : list
487        list of 2 element lists where lists are (offset, length), giving
488        absolute memory offset in bytes and number of bytes to read
489    read_shape : tuple
490        shape with which to interpret memory as read from `segments`.
491        Interpreting the memory read from `segments` with this shape, and a
492        dtype, gives an intermediate array - call this ``R``
493    post_slicers : tuple
494        Any new slicing to be applied to the array ``R`` after reading via
495        `segments` and reshaping via `read_shape`.  Slices are in terms of
496        `read_shape`.  If empty, no new slicing to apply
497    """
498    if order not in "CF":
499        raise ValueError("order should be one of 'CF'")
500    sliceobj = canonical_slicers(sliceobj, in_shape)
501    # order fastest changing first (record reordering)
502    if order == 'C':
503        sliceobj = sliceobj[::-1]
504        in_shape = in_shape[::-1]
505    # Analyze sliceobj for new read_slicers and fixup post_slicers
506    # read_slicers are the virtual slices; we don't slice with these, but use
507    # the slice definitions to read the relevant memory from disk
508    read_slicers, post_slicers = optimize_read_slicers(
509        sliceobj, in_shape, itemsize, heuristic)
510    # work out segments corresponding to read_slicers
511    segments = slicers2segments(read_slicers, in_shape, offset, itemsize)
512    # Make post_slicers empty if it is the slicing identity operation
513    if all(s == slice(None) for s in post_slicers):
514        post_slicers = []
515    read_shape = predict_shape(read_slicers, in_shape)
516    # If reordered, order shape, post_slicers
517    if order == 'C':
518        read_shape = read_shape[::-1]
519        post_slicers = post_slicers[::-1]
520    return list(segments), tuple(read_shape), tuple(post_slicers)
521
522
523def optimize_read_slicers(sliceobj, in_shape, itemsize, heuristic):
524    """ Calculates slices to read from disk, and apply after reading
525
526    Parameters
527    ----------
528    sliceobj : object
529        something that can be used to slice an array as in ``arr[sliceobj]``.
530        Can be assumed to be canonical in the sense of ``canonical_slicers``
531    in_shape : sequence
532        shape of underlying array to be sliced.  Array for `in_shape` assumed
533        to be already in 'F' order. Reorder shape / sliceobj for slicing a 'C'
534        array before passing to this function.
535    itemsize : int
536        element size in array (bytes)
537    heuristic : callable
538        function taking slice object, axis length, and stride length as
539        arguments, returning one of 'full', 'contiguous', None.  See
540        :func:`optimize_slicer`; see :func:`threshold_heuristic` for an
541        example.
542
543    Returns
544    -------
545    read_slicers : tuple
546        `sliceobj` maybe rephrased to fill out dimensions that are better read
547        from disk and later trimmed to their original size with `post_slicers`.
548        `read_slicers` implies a block of memory to be read from disk. The
549        actual disk positions come from `slicers2segments` run over
550        `read_slicers`. Includes any ``newaxis`` dimensions in `sliceobj`
551    post_slicers : tuple
552        Any new slicing to be applied to the read array after reading.  The
553        `post_slicers` discard any memory that we read to save time, but that
554        we don't need for the slice.  Include any ``newaxis`` dimension added
555        by `sliceobj`
556    """
557    read_slicers = []
558    post_slicers = []
559    real_no = 0
560    stride = itemsize
561    all_full = True
562    for slicer in sliceobj:
563        if slicer is None:
564            read_slicers.append(None)
565            post_slicers.append(slice(None))
566            continue
567        dim_len = in_shape[real_no]
568        real_no += 1
569        is_last = real_no == len(in_shape)
570        # make modified sliceobj (to_read, post_slice)
571        read_slicer, post_slicer = optimize_slicer(
572            slicer, dim_len, all_full, is_last, stride, heuristic)
573        read_slicers.append(read_slicer)
574        all_full = all_full and read_slicer == slice(None)
575        if not isinstance(read_slicer, Integral):
576            post_slicers.append(post_slicer)
577        stride *= dim_len
578    return tuple(read_slicers), tuple(post_slicers)
579
580
581def slicers2segments(read_slicers, in_shape, offset, itemsize):
582    """ Get segments from `read_slicers` given `in_shape` and memory steps
583
584    Parameters
585    ----------
586    read_slicers : object
587        something that can be used to slice an array as in ``arr[sliceobj]``
588        Slice objects can by be assumed canonical as in ``canonical_slicers``,
589        and positive as in ``_positive_slice``
590    in_shape : sequence
591        shape of underlying array on disk before reading
592    offset : int
593        offset of array data in underlying file or memory buffer
594    itemsize : int
595        element size in array (in bytes)
596
597    Returns
598    -------
599    segments : list
600        list of 2 element lists where lists are [offset, length], giving
601        absolute memory offset in bytes and number of bytes to read
602    """
603    all_full = True
604    all_segments = [[offset, itemsize]]
605    stride = itemsize
606    real_no = 0
607    for read_slicer in read_slicers:
608        if read_slicer is None:
609            continue
610        dim_len = in_shape[real_no]
611        real_no += 1
612        is_int = isinstance(read_slicer, Integral)
613        if not is_int:  # slicer is (now) a slice
614            # make slice full (it will always be positive)
615            read_slicer = fill_slicer(read_slicer, dim_len)
616            slice_len = _full_slicer_len(read_slicer)
617        is_full = read_slicer == slice(0, dim_len, 1)
618        is_contiguous = not is_int and read_slicer.step == 1
619        if all_full and is_contiguous:  # full or contiguous
620            if read_slicer.start != 0:
621                all_segments[0][0] += stride * read_slicer.start
622            all_segments[0][1] *= slice_len
623        else:  # Previous or current stuff is not contiguous
624            if is_int:
625                for segment in all_segments:
626                    segment[0] += stride * read_slicer
627            else:  # slice object
628                segments = all_segments
629                all_segments = []
630                for i in range(read_slicer.start,
631                               read_slicer.stop,
632                               read_slicer.step):
633                    for s in segments:
634                        all_segments.append([s[0] + stride * i, s[1]])
635        all_full = all_full and is_full
636        stride *= dim_len
637    return all_segments
638
639
640def read_segments(fileobj, segments, n_bytes, lock=None):
641    """ Read `n_bytes` byte data implied by `segments` from `fileobj`
642
643    Parameters
644    ----------
645    fileobj : file-like object
646        Implements `seek` and `read`
647    segments : sequence
648        list of 2 sequences where sequences are (offset, length), giving
649        absolute file offset in bytes and number of bytes to read
650    n_bytes : int
651        total number of bytes that will be read
652    lock : {None, threading.Lock, lock-like} optional
653        If provided, used to ensure that paired calls to ``seek`` and ``read``
654        cannot be interrupted by another thread accessing the same ``fileobj``.
655        Each thread which accesses the same file via ``read_segments`` must
656        share a lock in order to ensure that the file access is thread-safe.
657        A lock does not need to be provided for single-threaded access. The
658        default value (``None``) results in a lock-like object  (a
659        ``_NullLock``) which does not do anything.
660
661    Returns
662    -------
663    buffer : buffer object
664        object implementing buffer protocol, such as byte string or ndarray or
665        mmap or ctypes ``c_char_array``
666    """
667    # Make a lock-like thing to make the code below a bit nicer
668    if lock is None:
669        lock = _NullLock()
670
671    if len(segments) == 0:
672        if n_bytes != 0:
673            raise ValueError("No segments, but non-zero n_bytes")
674        return b''
675    if len(segments) == 1:
676        offset, length = segments[0]
677        with lock:
678            fileobj.seek(offset)
679            bytes = fileobj.read(length)
680        if len(bytes) != n_bytes:
681            raise ValueError("Whoops, not enough data in file")
682        return bytes
683    # More than one segment
684    bytes = mmap(-1, n_bytes)
685    for offset, length in segments:
686        with lock:
687            fileobj.seek(offset)
688            bytes.write(fileobj.read(length))
689    if bytes.tell() != n_bytes:
690        raise ValueError("Oh dear, n_bytes does not look right")
691    return bytes
692
693
694def _simple_fileslice(fileobj, sliceobj, shape, dtype, offset=0, order='C',
695                      heuristic=None):
696    """  Read all data from `fileobj` into array, then slice with `sliceobj`
697
698    The simplest possible thing; read all the data into the full array, then
699    slice the full array.
700
701    Parameters
702    ----------
703    fileobj : file-like object
704        implements ``read`` and ``seek``
705    sliceobj : object
706        something that can be used to slice an array as in ``arr[sliceobj]``
707    shape : sequence
708        shape of full array inside `fileobj`
709    dtype : dtype object
710        dtype of array inside `fileobj`
711    offset : int, optional
712        offset of array data within `fileobj`
713    order : {'C', 'F'}, optional
714        memory layout of array in `fileobj`
715    heuristic : optional
716        The routine doesn't use `heuristic`; the parameter is for API
717        compatibility with :func:`fileslice`
718
719    Returns
720    -------
721    sliced_arr : array
722        Array in `fileobj` as sliced with `sliceobj`
723    """
724    fileobj.seek(offset)
725    nbytes = reduce(operator.mul, shape) * dtype.itemsize
726    bytes = fileobj.read(nbytes)
727    new_arr = np.ndarray(shape, dtype, buffer=bytes, order=order)
728    return new_arr[sliceobj]
729
730
731def fileslice(fileobj, sliceobj, shape, dtype, offset=0, order='C',
732              heuristic=threshold_heuristic, lock=None):
733    """ Slice array in `fileobj` using `sliceobj` slicer and array definitions
734
735    `fileobj` contains the contiguous binary data for an array ``A`` of shape,
736    dtype, memory layout `shape`, `dtype`, `order`, with the binary data
737    starting at file offset `offset`.
738
739    Our job is to return the sliced array ``A[sliceobj]`` in the most efficient
740    way in terms of memory and time.
741
742    Sometimes it will be quicker to read memory that we will later throw away,
743    to save time we might lose doing short seeks on `fileobj`.  Call these
744    alternatives: (read + discard); and skip.  This routine guesses when to
745    (read+discard) or skip using the callable `heuristic`, with a default using
746    a hard threshold for the memory gap large enough to prefer a skip.
747
748    Parameters
749    ----------
750    fileobj : file-like object
751        file-like object, opened for reading in binary mode. Implements
752        ``read`` and ``seek``.
753    sliceobj : object
754        something that can be used to slice an array as in ``arr[sliceobj]``.
755    shape : sequence
756        shape of full array inside `fileobj`.
757    dtype : dtype specifier
758        dtype of array inside `fileobj`, or input to ``numpy.dtype`` to specify
759        array dtype.
760    offset : int, optional
761        offset of array data within `fileobj`
762    order : {'C', 'F'}, optional
763        memory layout of array in `fileobj`.
764    heuristic : callable, optional
765        function taking slice object, axis length, stride length as arguments,
766        returning one of 'full', 'contiguous', None.  See
767        :func:`optimize_slicer` and see :func:`threshold_heuristic` for an
768        example.
769    lock : {None, threading.Lock, lock-like} optional
770        If provided, used to ensure that paired calls to ``seek`` and ``read``
771        cannot be interrupted by another thread accessing the same ``fileobj``.
772        Each thread which accesses the same file via ``read_segments`` must
773        share a lock in order to ensure that the file access is thread-safe.
774        A lock does not need to be provided for single-threaded access. The
775        default value (``None``) results in a lock-like object  (a
776        ``_NullLock``) which does not do anything.
777
778    Returns
779    -------
780    sliced_arr : array
781        Array in `fileobj` as sliced with `sliceobj`
782    """
783    if is_fancy(sliceobj):
784        raise ValueError("Cannot handle fancy indexing")
785    dtype = np.dtype(dtype)
786    itemsize = int(dtype.itemsize)
787    segments, sliced_shape, post_slicers = calc_slicedefs(
788        sliceobj, shape, itemsize, offset, order)
789    n_bytes = reduce(operator.mul, sliced_shape, 1) * itemsize
790    arr_data = read_segments(fileobj, segments, n_bytes, lock)
791    sliced = np.ndarray(sliced_shape, dtype, buffer=arr_data, order=order)
792    return sliced[post_slicers]
793
794
795def strided_scalar(shape, scalar=0.):
796    """ Return array shape `shape` where all entries point to value `scalar`
797
798    Parameters
799    ----------
800    shape : sequence
801        Shape of output array.
802    scalar : scalar
803        Scalar value with which to fill array.
804
805    Returns
806    -------
807    strided_arr : array
808        Array of shape `shape` for which all values == `scalar`, built by
809        setting all strides of `strided_arr` to 0, so the scalar is broadcast
810        out to the full array `shape`. `strided_arr` is flagged as not
811        `writeable`.
812
813        The array is set read-only to avoid a numpy error when broadcasting -
814        see https://github.com/numpy/numpy/issues/6491
815    """
816    shape = tuple(shape)
817    scalar = np.array(scalar)
818    strides = [0] * len(shape)
819    strided_scalar = np.lib.stride_tricks.as_strided(scalar, shape, strides)
820    strided_scalar.flags.writeable = False
821    return strided_scalar
822