1# -*- coding: utf-8 -*-
2
3########################################################################
4#
5#       License: BSD
6#       Created: March 4, 2003
7#       Author:  Francesc Alted - faltet@pytables.com
8#
9#       $Id$
10#
11########################################################################
12
13"""Utility functions."""
14
15import os
16import sys
17import warnings
18import subprocess
19import re
20from time import time
21
22import numpy
23
24from .flavor import array_of_flavor
25
26# The map between byteorders in NumPy and PyTables
27byteorders = {
28    '>': 'big',
29    '<': 'little',
30    '=': sys.byteorder,
31    '|': 'irrelevant',
32}
33
34# The type used for size values: indexes, coordinates, dimension
35# lengths, row numbers, shapes, chunk shapes, byte counts...
36SizeType = numpy.int64
37
38
39def correct_byteorder(ptype, byteorder):
40    """Fix the byteorder depending on the PyTables types."""
41
42    if ptype in ['string', 'bool', 'int8', 'uint8', 'object']:
43        return "irrelevant"
44    else:
45        return byteorder
46
47
48def is_idx(index):
49    """Checks if an object can work as an index or not."""
50
51    if type(index) is int:
52        return True
53    elif hasattr(index, "__index__"):  # Only works on Python 2.5 (PEP 357)
54        # Exclude the array([idx]) as working as an index.  Fixes #303.
55        if (hasattr(index, "shape") and index.shape != ()):
56            return False
57        try:
58            index.__index__()
59            if isinstance(index, bool):
60                warnings.warn(
61                    'using a boolean instead of an integer will result in an '
62                    'error in the future', DeprecationWarning, stacklevel=2)
63            return True
64        except TypeError:
65            return False
66    elif isinstance(index, numpy.integer):
67        return True
68    # For Python 2.4 one should test 0-dim and 1-dim, 1-elem arrays as well
69    elif (isinstance(index, numpy.ndarray) and (index.shape == ()) and
70          index.dtype.str[1] == 'i'):
71        return True
72
73    return False
74
75
76def idx2long(index):
77    """Convert a possible index into a long int."""
78
79    try:
80        return int(index)
81    except:
82        raise TypeError("not an integer type.")
83
84
85# This is used in VLArray and EArray to produce NumPy object compliant
86# with atom from a generic python type.  If copy is stated as True, it
87# is assured that it will return a copy of the object and never the same
88# object or a new one sharing the same memory.
89def convert_to_np_atom(arr, atom, copy=False):
90    """Convert a generic object into a NumPy object compliant with atom."""
91
92    # First, convert the object into a NumPy array
93    nparr = array_of_flavor(arr, 'numpy')
94    # Copy of data if necessary for getting a contiguous buffer, or if
95    # dtype is not the correct one.
96    if atom.shape == ():
97        # Scalar atom case
98        nparr = numpy.array(nparr, dtype=atom.dtype, copy=copy)
99    else:
100        # Multidimensional atom case.  Addresses #133.
101        # We need to use this strange way to obtain a dtype compliant
102        # array because NumPy doesn't honor the shape of the dtype when
103        # it is multidimensional.  See:
104        # http://scipy.org/scipy/numpy/ticket/926
105        # for details.
106        # All of this is done just to taking advantage of the NumPy
107        # broadcasting rules.
108        newshape = nparr.shape[:-len(atom.dtype.shape)]
109        nparr2 = numpy.empty(newshape, dtype=[('', atom.dtype)])
110        nparr2['f0'][:] = nparr
111        # Return a view (i.e. get rid of the record type)
112        nparr = nparr2.view(atom.dtype)
113    return nparr
114
115
116
117# The next is used in Array, EArray and VLArray, and it is a bit more
118# high level than convert_to_np_atom
119def convert_to_np_atom2(object, atom):
120    """Convert a generic object into a NumPy object compliant with atom."""
121
122    # Check whether the object needs to be copied to make the operation
123    # safe to in-place conversion.
124    copy = atom.type in ['time64']
125    nparr = convert_to_np_atom(object, atom, copy)
126    # Finally, check the byteorder and change it if needed
127    byteorder = byteorders[nparr.dtype.byteorder]
128    if (byteorder in ['little', 'big'] and byteorder != sys.byteorder):
129        # The byteorder needs to be fixed (a copy is made
130        # so that the original array is not modified)
131        nparr = nparr.byteswap()
132
133    return nparr
134
135
136
137def check_file_access(filename, mode='r'):
138    """Check for file access in the specified `mode`.
139
140    `mode` is one of the modes supported by `File` objects.  If the file
141    indicated by `filename` can be accessed using that `mode`, the
142    function ends successfully.  Else, an ``IOError`` is raised
143    explaining the reason of the failure.
144
145    All this paraphernalia is used to avoid the lengthy and scaring HDF5
146    messages produced when there are problems opening a file.  No
147    changes are ever made to the file system.
148
149    """
150
151    if mode == 'r':
152        # The file should be readable.
153        if not os.access(filename, os.F_OK):
154            raise IOError("``%s`` does not exist" % (filename,))
155        if not os.path.isfile(filename):
156            raise IOError("``%s`` is not a regular file" % (filename,))
157        if not os.access(filename, os.R_OK):
158            raise IOError("file ``%s`` exists but it can not be read"
159                          % (filename,))
160    elif mode == 'w':
161        if os.access(filename, os.F_OK):
162            # Since the file is not removed but replaced,
163            # it must already be accessible to read and write operations.
164            check_file_access(filename, 'r+')
165        else:
166            # A new file is going to be created,
167            # so the directory should be writable.
168            parentname = os.path.dirname(filename)
169            if not parentname:
170                parentname = '.'
171            if not os.access(parentname, os.F_OK):
172                raise IOError("``%s`` does not exist" % (parentname,))
173            if not os.path.isdir(parentname):
174                raise IOError("``%s`` is not a directory" % (parentname,))
175            if not os.access(parentname, os.W_OK):
176                raise IOError("directory ``%s`` exists but it can not be "
177                              "written" % (parentname,))
178    elif mode == 'a':
179        if os.access(filename, os.F_OK):
180            check_file_access(filename, 'r+')
181        else:
182            check_file_access(filename, 'w')
183    elif mode == 'r+':
184        check_file_access(filename, 'r')
185        if not os.access(filename, os.W_OK):
186            raise IOError("file ``%s`` exists but it can not be written"
187                          % (filename,))
188    else:
189        raise ValueError("invalid mode: %r" % (mode,))
190
191
192
193def lazyattr(fget):
194    """Create a *lazy attribute* from the result of `fget`.
195
196    This function is intended to be used as a *method decorator*.  It
197    returns a *property* which caches the result of calling the `fget`
198    instance method.  The docstring of `fget` is used for the property
199    itself.  For instance:
200
201    >>> class MyClass(object):
202    ...     @lazyattr
203    ...     def attribute(self):
204    ...         'Attribute description.'
205    ...         print('creating value')
206    ...         return 10
207    ...
208    >>> type(MyClass.attribute)
209    <type 'property'>
210    >>> MyClass.attribute.__doc__
211    'Attribute description.'
212    >>> obj = MyClass()
213    >>> obj.__dict__
214    {}
215    >>> obj.attribute
216    creating value
217    10
218    >>> obj.__dict__
219    {'attribute': 10}
220    >>> obj.attribute
221    10
222    >>> del obj.attribute
223    Traceback (most recent call last):
224      ...
225    AttributeError: can't delete attribute
226
227    .. warning::
228
229        Please note that this decorator *changes the type of the
230        decorated object* from an instance method into a property.
231
232    """
233
234    name = fget.__name__
235
236    def newfget(self):
237        mydict = self.__dict__
238        if name in mydict:
239            return mydict[name]
240        mydict[name] = value = fget(self)
241        return value
242
243    return property(newfget, None, None, fget.__doc__)
244
245
246def show_stats(explain, tref, encoding=None):
247    """Show the used memory (only works for Linux 2.6.x)."""
248
249    if encoding is None:
250        encoding = sys.getdefaultencoding()
251
252    # Build the command to obtain memory info
253    cmd = "cat /proc/%s/status" % os.getpid()
254    sout = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE).stdout
255    for line in sout:
256        line = line.decode(encoding)
257        if line.startswith("VmSize:"):
258            vmsize = int(line.split()[1])
259        elif line.startswith("VmRSS:"):
260            vmrss = int(line.split()[1])
261        elif line.startswith("VmData:"):
262            vmdata = int(line.split()[1])
263        elif line.startswith("VmStk:"):
264            vmstk = int(line.split()[1])
265        elif line.startswith("VmExe:"):
266            vmexe = int(line.split()[1])
267        elif line.startswith("VmLib:"):
268            vmlib = int(line.split()[1])
269    sout.close()
270    print("Memory usage: ******* %s *******" % explain)
271    print("VmSize: %7s kB\tVmRSS: %7s kB" % (vmsize, vmrss))
272    print("VmData: %7s kB\tVmStk: %7s kB" % (vmdata, vmstk))
273    print("VmExe:  %7s kB\tVmLib: %7s kB" % (vmexe, vmlib))
274    tnow = time()
275    print("WallClock time:", round(tnow - tref, 3))
276    return tnow
277
278
279# truncate data before calling __setitem__, to improve compression ratio
280# this function is taken verbatim from netcdf4-python
281def quantize(data, least_significant_digit):
282    """quantize data to improve compression.
283
284    Data is quantized using around(scale*data)/scale, where scale is
285    2**bits, and bits is determined from the least_significant_digit.
286
287    For example, if least_significant_digit=1, bits will be 4.
288
289    """
290
291    precision = pow(10., -least_significant_digit)
292    exp = numpy.log10(precision)
293    if exp < 0:
294        exp = int(numpy.floor(exp))
295    else:
296        exp = int(numpy.ceil(exp))
297    bits = numpy.ceil(numpy.log2(pow(10., -exp)))
298    scale = pow(2., bits)
299    datout = numpy.around(scale * data) / scale
300
301    return datout
302
303
304# Utilities to detect leaked instances.  See recipe 14.10 of the Python
305# Cookbook by Martelli & Ascher.
306tracked_classes = {}
307import weakref
308
309
310def log_instance_creation(instance, name=None):
311    if name is None:
312        name = instance.__class__.__name__
313        if name not in tracked_classes:
314            tracked_classes[name] = []
315        tracked_classes[name].append(weakref.ref(instance))
316
317
318
319def string_to_classes(s):
320    if s == '*':
321        c = sorted(tracked_classes.keys())
322        return c
323    else:
324        return s.split()
325
326
327def fetch_logged_instances(classes="*"):
328    classnames = string_to_classes(classes)
329    return [(cn, len(tracked_classes[cn])) for cn in classnames]
330
331
332
333def count_logged_instances(classes, file=sys.stdout):
334    for classname in string_to_classes(classes):
335        file.write("%s: %d\n" % (classname, len(tracked_classes[classname])))
336
337
338
339def list_logged_instances(classes, file=sys.stdout):
340    for classname in string_to_classes(classes):
341        file.write('\n%s:\n' % classname)
342        for ref in tracked_classes[classname]:
343            obj = ref()
344            if obj is not None:
345                file.write('    %s\n' % repr(obj))
346
347
348
349def dump_logged_instances(classes, file=sys.stdout):
350    for classname in string_to_classes(classes):
351        file.write('\n%s:\n' % classname)
352        for ref in tracked_classes[classname]:
353            obj = ref()
354            if obj is not None:
355                file.write('    %s:\n' % obj)
356                for key, value in obj.__dict__.items():
357                    file.write('        %20s : %s\n' % (key, value))
358
359
360
361#
362# A class useful for cache usage
363#
364class CacheDict(dict):
365    """A dictionary that prevents itself from growing too much."""
366
367    def __init__(self, maxentries):
368        self.maxentries = maxentries
369        super(CacheDict, self).__init__(self)
370
371    def __setitem__(self, key, value):
372        # Protection against growing the cache too much
373        if len(self) > self.maxentries:
374            # Remove a 10% of (arbitrary) elements from the cache
375            entries_to_remove = self.maxentries / 10
376            for k in list(self.keys())[:entries_to_remove]:
377                super(CacheDict, self).__delitem__(k)
378        super(CacheDict, self).__setitem__(key, value)
379
380
381class NailedDict(object):
382    """A dictionary which ignores its items when it has nails on it."""
383
384    def __init__(self, maxentries):
385        self.maxentries = maxentries
386        self._cache = {}
387        self._nailcount = 0
388
389    # Only a restricted set of dictionary methods are supported.  That
390    # is why we buy instead of inherit.
391
392    # The following are intended to be used by ``Table`` code changing
393    # the set of usable indexes.
394
395    def clear(self):
396        self._cache.clear()
397
398    def nail(self):
399        self._nailcount += 1
400
401    def unnail(self):
402        self._nailcount -= 1
403
404    # The following are intended to be used by ``Table`` code handling
405    # conditions.
406
407    def __contains__(self, key):
408        if self._nailcount > 0:
409            return False
410        return key in self._cache
411
412    def __getitem__(self, key):
413        if self._nailcount > 0:
414            raise KeyError(key)
415        return self._cache[key]
416
417    def get(self, key, default=None):
418        if self._nailcount > 0:
419            return default
420        return self._cache.get(key, default)
421
422    def __setitem__(self, key, value):
423        if self._nailcount > 0:
424            return
425        cache = self._cache
426        # Protection against growing the cache too much
427        if len(cache) > self.maxentries:
428            # Remove a 10% of (arbitrary) elements from the cache
429            entries_to_remove = max(self.maxentries // 10, 1)
430            for k in list(cache.keys())[:entries_to_remove]:
431                del cache[k]
432        cache[key] = value
433
434
435def detect_number_of_cores():
436    """Detects the number of cores on a system.
437
438    Cribbed from pp.
439
440    """
441
442    # Linux, Unix and MacOS:
443    if hasattr(os, "sysconf"):
444        if "SC_NPROCESSORS_ONLN" in os.sysconf_names:
445            # Linux & Unix:
446            ncpus = os.sysconf("SC_NPROCESSORS_ONLN")
447            if isinstance(ncpus, int) and ncpus > 0:
448                return ncpus
449        else:  # OSX:
450            return int(os.popen2("sysctl -n hw.ncpu")[1].read())
451    # Windows:
452    if "NUMBER_OF_PROCESSORS" in os.environ:
453        ncpus = int(os.environ["NUMBER_OF_PROCESSORS"])
454        if ncpus > 0:
455            return ncpus
456    return 1  # Default
457
458
459
460# Main part
461# =========
462def _test():
463    """Run ``doctest`` on this module."""
464
465    import doctest
466    doctest.testmod()
467
468if __name__ == '__main__':
469    _test()
470
471
472## Local Variables:
473## mode: python
474## py-indent-offset: 4
475## tab-width: 4
476## fill-column: 72
477## End:
478