1# -*- coding: utf-8 -*- 2 3######################################################################## 4# 5# License: BSD 6# Created: March 4, 2003 7# Author: Francesc Alted - faltet@pytables.com 8# 9# $Id$ 10# 11######################################################################## 12 13"""Utility functions.""" 14 15import os 16import sys 17import warnings 18import subprocess 19import re 20from time import time 21 22import numpy 23 24from .flavor import array_of_flavor 25 26# The map between byteorders in NumPy and PyTables 27byteorders = { 28 '>': 'big', 29 '<': 'little', 30 '=': sys.byteorder, 31 '|': 'irrelevant', 32} 33 34# The type used for size values: indexes, coordinates, dimension 35# lengths, row numbers, shapes, chunk shapes, byte counts... 36SizeType = numpy.int64 37 38 39def correct_byteorder(ptype, byteorder): 40 """Fix the byteorder depending on the PyTables types.""" 41 42 if ptype in ['string', 'bool', 'int8', 'uint8', 'object']: 43 return "irrelevant" 44 else: 45 return byteorder 46 47 48def is_idx(index): 49 """Checks if an object can work as an index or not.""" 50 51 if type(index) is int: 52 return True 53 elif hasattr(index, "__index__"): # Only works on Python 2.5 (PEP 357) 54 # Exclude the array([idx]) as working as an index. Fixes #303. 55 if (hasattr(index, "shape") and index.shape != ()): 56 return False 57 try: 58 index.__index__() 59 if isinstance(index, bool): 60 warnings.warn( 61 'using a boolean instead of an integer will result in an ' 62 'error in the future', DeprecationWarning, stacklevel=2) 63 return True 64 except TypeError: 65 return False 66 elif isinstance(index, numpy.integer): 67 return True 68 # For Python 2.4 one should test 0-dim and 1-dim, 1-elem arrays as well 69 elif (isinstance(index, numpy.ndarray) and (index.shape == ()) and 70 index.dtype.str[1] == 'i'): 71 return True 72 73 return False 74 75 76def idx2long(index): 77 """Convert a possible index into a long int.""" 78 79 try: 80 return int(index) 81 except: 82 raise TypeError("not an integer type.") 83 84 85# This is used in VLArray and EArray to produce NumPy object compliant 86# with atom from a generic python type. If copy is stated as True, it 87# is assured that it will return a copy of the object and never the same 88# object or a new one sharing the same memory. 89def convert_to_np_atom(arr, atom, copy=False): 90 """Convert a generic object into a NumPy object compliant with atom.""" 91 92 # First, convert the object into a NumPy array 93 nparr = array_of_flavor(arr, 'numpy') 94 # Copy of data if necessary for getting a contiguous buffer, or if 95 # dtype is not the correct one. 96 if atom.shape == (): 97 # Scalar atom case 98 nparr = numpy.array(nparr, dtype=atom.dtype, copy=copy) 99 else: 100 # Multidimensional atom case. Addresses #133. 101 # We need to use this strange way to obtain a dtype compliant 102 # array because NumPy doesn't honor the shape of the dtype when 103 # it is multidimensional. See: 104 # http://scipy.org/scipy/numpy/ticket/926 105 # for details. 106 # All of this is done just to taking advantage of the NumPy 107 # broadcasting rules. 108 newshape = nparr.shape[:-len(atom.dtype.shape)] 109 nparr2 = numpy.empty(newshape, dtype=[('', atom.dtype)]) 110 nparr2['f0'][:] = nparr 111 # Return a view (i.e. get rid of the record type) 112 nparr = nparr2.view(atom.dtype) 113 return nparr 114 115 116 117# The next is used in Array, EArray and VLArray, and it is a bit more 118# high level than convert_to_np_atom 119def convert_to_np_atom2(object, atom): 120 """Convert a generic object into a NumPy object compliant with atom.""" 121 122 # Check whether the object needs to be copied to make the operation 123 # safe to in-place conversion. 124 copy = atom.type in ['time64'] 125 nparr = convert_to_np_atom(object, atom, copy) 126 # Finally, check the byteorder and change it if needed 127 byteorder = byteorders[nparr.dtype.byteorder] 128 if (byteorder in ['little', 'big'] and byteorder != sys.byteorder): 129 # The byteorder needs to be fixed (a copy is made 130 # so that the original array is not modified) 131 nparr = nparr.byteswap() 132 133 return nparr 134 135 136 137def check_file_access(filename, mode='r'): 138 """Check for file access in the specified `mode`. 139 140 `mode` is one of the modes supported by `File` objects. If the file 141 indicated by `filename` can be accessed using that `mode`, the 142 function ends successfully. Else, an ``IOError`` is raised 143 explaining the reason of the failure. 144 145 All this paraphernalia is used to avoid the lengthy and scaring HDF5 146 messages produced when there are problems opening a file. No 147 changes are ever made to the file system. 148 149 """ 150 151 if mode == 'r': 152 # The file should be readable. 153 if not os.access(filename, os.F_OK): 154 raise IOError("``%s`` does not exist" % (filename,)) 155 if not os.path.isfile(filename): 156 raise IOError("``%s`` is not a regular file" % (filename,)) 157 if not os.access(filename, os.R_OK): 158 raise IOError("file ``%s`` exists but it can not be read" 159 % (filename,)) 160 elif mode == 'w': 161 if os.access(filename, os.F_OK): 162 # Since the file is not removed but replaced, 163 # it must already be accessible to read and write operations. 164 check_file_access(filename, 'r+') 165 else: 166 # A new file is going to be created, 167 # so the directory should be writable. 168 parentname = os.path.dirname(filename) 169 if not parentname: 170 parentname = '.' 171 if not os.access(parentname, os.F_OK): 172 raise IOError("``%s`` does not exist" % (parentname,)) 173 if not os.path.isdir(parentname): 174 raise IOError("``%s`` is not a directory" % (parentname,)) 175 if not os.access(parentname, os.W_OK): 176 raise IOError("directory ``%s`` exists but it can not be " 177 "written" % (parentname,)) 178 elif mode == 'a': 179 if os.access(filename, os.F_OK): 180 check_file_access(filename, 'r+') 181 else: 182 check_file_access(filename, 'w') 183 elif mode == 'r+': 184 check_file_access(filename, 'r') 185 if not os.access(filename, os.W_OK): 186 raise IOError("file ``%s`` exists but it can not be written" 187 % (filename,)) 188 else: 189 raise ValueError("invalid mode: %r" % (mode,)) 190 191 192 193def lazyattr(fget): 194 """Create a *lazy attribute* from the result of `fget`. 195 196 This function is intended to be used as a *method decorator*. It 197 returns a *property* which caches the result of calling the `fget` 198 instance method. The docstring of `fget` is used for the property 199 itself. For instance: 200 201 >>> class MyClass(object): 202 ... @lazyattr 203 ... def attribute(self): 204 ... 'Attribute description.' 205 ... print('creating value') 206 ... return 10 207 ... 208 >>> type(MyClass.attribute) 209 <type 'property'> 210 >>> MyClass.attribute.__doc__ 211 'Attribute description.' 212 >>> obj = MyClass() 213 >>> obj.__dict__ 214 {} 215 >>> obj.attribute 216 creating value 217 10 218 >>> obj.__dict__ 219 {'attribute': 10} 220 >>> obj.attribute 221 10 222 >>> del obj.attribute 223 Traceback (most recent call last): 224 ... 225 AttributeError: can't delete attribute 226 227 .. warning:: 228 229 Please note that this decorator *changes the type of the 230 decorated object* from an instance method into a property. 231 232 """ 233 234 name = fget.__name__ 235 236 def newfget(self): 237 mydict = self.__dict__ 238 if name in mydict: 239 return mydict[name] 240 mydict[name] = value = fget(self) 241 return value 242 243 return property(newfget, None, None, fget.__doc__) 244 245 246def show_stats(explain, tref, encoding=None): 247 """Show the used memory (only works for Linux 2.6.x).""" 248 249 if encoding is None: 250 encoding = sys.getdefaultencoding() 251 252 # Build the command to obtain memory info 253 cmd = "cat /proc/%s/status" % os.getpid() 254 sout = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE).stdout 255 for line in sout: 256 line = line.decode(encoding) 257 if line.startswith("VmSize:"): 258 vmsize = int(line.split()[1]) 259 elif line.startswith("VmRSS:"): 260 vmrss = int(line.split()[1]) 261 elif line.startswith("VmData:"): 262 vmdata = int(line.split()[1]) 263 elif line.startswith("VmStk:"): 264 vmstk = int(line.split()[1]) 265 elif line.startswith("VmExe:"): 266 vmexe = int(line.split()[1]) 267 elif line.startswith("VmLib:"): 268 vmlib = int(line.split()[1]) 269 sout.close() 270 print("Memory usage: ******* %s *******" % explain) 271 print("VmSize: %7s kB\tVmRSS: %7s kB" % (vmsize, vmrss)) 272 print("VmData: %7s kB\tVmStk: %7s kB" % (vmdata, vmstk)) 273 print("VmExe: %7s kB\tVmLib: %7s kB" % (vmexe, vmlib)) 274 tnow = time() 275 print("WallClock time:", round(tnow - tref, 3)) 276 return tnow 277 278 279# truncate data before calling __setitem__, to improve compression ratio 280# this function is taken verbatim from netcdf4-python 281def quantize(data, least_significant_digit): 282 """quantize data to improve compression. 283 284 Data is quantized using around(scale*data)/scale, where scale is 285 2**bits, and bits is determined from the least_significant_digit. 286 287 For example, if least_significant_digit=1, bits will be 4. 288 289 """ 290 291 precision = pow(10., -least_significant_digit) 292 exp = numpy.log10(precision) 293 if exp < 0: 294 exp = int(numpy.floor(exp)) 295 else: 296 exp = int(numpy.ceil(exp)) 297 bits = numpy.ceil(numpy.log2(pow(10., -exp))) 298 scale = pow(2., bits) 299 datout = numpy.around(scale * data) / scale 300 301 return datout 302 303 304# Utilities to detect leaked instances. See recipe 14.10 of the Python 305# Cookbook by Martelli & Ascher. 306tracked_classes = {} 307import weakref 308 309 310def log_instance_creation(instance, name=None): 311 if name is None: 312 name = instance.__class__.__name__ 313 if name not in tracked_classes: 314 tracked_classes[name] = [] 315 tracked_classes[name].append(weakref.ref(instance)) 316 317 318 319def string_to_classes(s): 320 if s == '*': 321 c = sorted(tracked_classes.keys()) 322 return c 323 else: 324 return s.split() 325 326 327def fetch_logged_instances(classes="*"): 328 classnames = string_to_classes(classes) 329 return [(cn, len(tracked_classes[cn])) for cn in classnames] 330 331 332 333def count_logged_instances(classes, file=sys.stdout): 334 for classname in string_to_classes(classes): 335 file.write("%s: %d\n" % (classname, len(tracked_classes[classname]))) 336 337 338 339def list_logged_instances(classes, file=sys.stdout): 340 for classname in string_to_classes(classes): 341 file.write('\n%s:\n' % classname) 342 for ref in tracked_classes[classname]: 343 obj = ref() 344 if obj is not None: 345 file.write(' %s\n' % repr(obj)) 346 347 348 349def dump_logged_instances(classes, file=sys.stdout): 350 for classname in string_to_classes(classes): 351 file.write('\n%s:\n' % classname) 352 for ref in tracked_classes[classname]: 353 obj = ref() 354 if obj is not None: 355 file.write(' %s:\n' % obj) 356 for key, value in obj.__dict__.items(): 357 file.write(' %20s : %s\n' % (key, value)) 358 359 360 361# 362# A class useful for cache usage 363# 364class CacheDict(dict): 365 """A dictionary that prevents itself from growing too much.""" 366 367 def __init__(self, maxentries): 368 self.maxentries = maxentries 369 super(CacheDict, self).__init__(self) 370 371 def __setitem__(self, key, value): 372 # Protection against growing the cache too much 373 if len(self) > self.maxentries: 374 # Remove a 10% of (arbitrary) elements from the cache 375 entries_to_remove = self.maxentries / 10 376 for k in list(self.keys())[:entries_to_remove]: 377 super(CacheDict, self).__delitem__(k) 378 super(CacheDict, self).__setitem__(key, value) 379 380 381class NailedDict(object): 382 """A dictionary which ignores its items when it has nails on it.""" 383 384 def __init__(self, maxentries): 385 self.maxentries = maxentries 386 self._cache = {} 387 self._nailcount = 0 388 389 # Only a restricted set of dictionary methods are supported. That 390 # is why we buy instead of inherit. 391 392 # The following are intended to be used by ``Table`` code changing 393 # the set of usable indexes. 394 395 def clear(self): 396 self._cache.clear() 397 398 def nail(self): 399 self._nailcount += 1 400 401 def unnail(self): 402 self._nailcount -= 1 403 404 # The following are intended to be used by ``Table`` code handling 405 # conditions. 406 407 def __contains__(self, key): 408 if self._nailcount > 0: 409 return False 410 return key in self._cache 411 412 def __getitem__(self, key): 413 if self._nailcount > 0: 414 raise KeyError(key) 415 return self._cache[key] 416 417 def get(self, key, default=None): 418 if self._nailcount > 0: 419 return default 420 return self._cache.get(key, default) 421 422 def __setitem__(self, key, value): 423 if self._nailcount > 0: 424 return 425 cache = self._cache 426 # Protection against growing the cache too much 427 if len(cache) > self.maxentries: 428 # Remove a 10% of (arbitrary) elements from the cache 429 entries_to_remove = max(self.maxentries // 10, 1) 430 for k in list(cache.keys())[:entries_to_remove]: 431 del cache[k] 432 cache[key] = value 433 434 435def detect_number_of_cores(): 436 """Detects the number of cores on a system. 437 438 Cribbed from pp. 439 440 """ 441 442 # Linux, Unix and MacOS: 443 if hasattr(os, "sysconf"): 444 if "SC_NPROCESSORS_ONLN" in os.sysconf_names: 445 # Linux & Unix: 446 ncpus = os.sysconf("SC_NPROCESSORS_ONLN") 447 if isinstance(ncpus, int) and ncpus > 0: 448 return ncpus 449 else: # OSX: 450 return int(os.popen2("sysctl -n hw.ncpu")[1].read()) 451 # Windows: 452 if "NUMBER_OF_PROCESSORS" in os.environ: 453 ncpus = int(os.environ["NUMBER_OF_PROCESSORS"]) 454 if ncpus > 0: 455 return ncpus 456 return 1 # Default 457 458 459 460# Main part 461# ========= 462def _test(): 463 """Run ``doctest`` on this module.""" 464 465 import doctest 466 doctest.testmod() 467 468if __name__ == '__main__': 469 _test() 470 471 472## Local Variables: 473## mode: python 474## py-indent-offset: 4 475## tab-width: 4 476## fill-column: 72 477## End: 478