1""" 2NetCDF reader/writer module. 3 4This module is used to read and create NetCDF files. NetCDF files are 5accessed through the `netcdf_file` object. Data written to and from NetCDF 6files are contained in `netcdf_variable` objects. Attributes are given 7as member variables of the `netcdf_file` and `netcdf_variable` objects. 8 9This module implements the Scientific.IO.NetCDF API to read and create 10NetCDF files. The same API is also used in the PyNIO and pynetcdf 11modules, allowing these modules to be used interchangeably when working 12with NetCDF files. 13 14Only NetCDF3 is supported here; for NetCDF4 see 15`netCDF4-python <http://unidata.github.io/netcdf4-python/>`__, 16which has a similar API. 17 18""" 19 20# TODO: 21# * properly implement ``_FillValue``. 22# * fix character variables. 23# * implement PAGESIZE for Python 2.6? 24 25# The Scientific.IO.NetCDF API allows attributes to be added directly to 26# instances of ``netcdf_file`` and ``netcdf_variable``. To differentiate 27# between user-set attributes and instance attributes, user-set attributes 28# are automatically stored in the ``_attributes`` attribute by overloading 29#``__setattr__``. This is the reason why the code sometimes uses 30#``obj.__dict__['key'] = value``, instead of simply ``obj.key = value``; 31# otherwise the key would be inserted into userspace attributes. 32 33 34__all__ = ['netcdf_file', 'netcdf_variable'] 35 36 37import warnings 38import weakref 39from operator import mul 40from platform import python_implementation 41 42import mmap as mm 43 44import numpy as np 45from numpy import frombuffer, dtype, empty, array, asarray 46from numpy import little_endian as LITTLE_ENDIAN 47from functools import reduce 48 49 50IS_PYPY = python_implementation() == 'PyPy' 51 52ABSENT = b'\x00\x00\x00\x00\x00\x00\x00\x00' 53ZERO = b'\x00\x00\x00\x00' 54NC_BYTE = b'\x00\x00\x00\x01' 55NC_CHAR = b'\x00\x00\x00\x02' 56NC_SHORT = b'\x00\x00\x00\x03' 57NC_INT = b'\x00\x00\x00\x04' 58NC_FLOAT = b'\x00\x00\x00\x05' 59NC_DOUBLE = b'\x00\x00\x00\x06' 60NC_DIMENSION = b'\x00\x00\x00\n' 61NC_VARIABLE = b'\x00\x00\x00\x0b' 62NC_ATTRIBUTE = b'\x00\x00\x00\x0c' 63FILL_BYTE = b'\x81' 64FILL_CHAR = b'\x00' 65FILL_SHORT = b'\x80\x01' 66FILL_INT = b'\x80\x00\x00\x01' 67FILL_FLOAT = b'\x7C\xF0\x00\x00' 68FILL_DOUBLE = b'\x47\x9E\x00\x00\x00\x00\x00\x00' 69 70TYPEMAP = {NC_BYTE: ('b', 1), 71 NC_CHAR: ('c', 1), 72 NC_SHORT: ('h', 2), 73 NC_INT: ('i', 4), 74 NC_FLOAT: ('f', 4), 75 NC_DOUBLE: ('d', 8)} 76 77FILLMAP = {NC_BYTE: FILL_BYTE, 78 NC_CHAR: FILL_CHAR, 79 NC_SHORT: FILL_SHORT, 80 NC_INT: FILL_INT, 81 NC_FLOAT: FILL_FLOAT, 82 NC_DOUBLE: FILL_DOUBLE} 83 84REVERSE = {('b', 1): NC_BYTE, 85 ('B', 1): NC_CHAR, 86 ('c', 1): NC_CHAR, 87 ('h', 2): NC_SHORT, 88 ('i', 4): NC_INT, 89 ('f', 4): NC_FLOAT, 90 ('d', 8): NC_DOUBLE, 91 92 # these come from asarray(1).dtype.char and asarray('foo').dtype.char, 93 # used when getting the types from generic attributes. 94 ('l', 4): NC_INT, 95 ('S', 1): NC_CHAR} 96 97 98class netcdf_file: 99 """ 100 A file object for NetCDF data. 101 102 A `netcdf_file` object has two standard attributes: `dimensions` and 103 `variables`. The values of both are dictionaries, mapping dimension 104 names to their associated lengths and variable names to variables, 105 respectively. Application programs should never modify these 106 dictionaries. 107 108 All other attributes correspond to global attributes defined in the 109 NetCDF file. Global file attributes are created by assigning to an 110 attribute of the `netcdf_file` object. 111 112 Parameters 113 ---------- 114 filename : string or file-like 115 string -> filename 116 mode : {'r', 'w', 'a'}, optional 117 read-write-append mode, default is 'r' 118 mmap : None or bool, optional 119 Whether to mmap `filename` when reading. Default is True 120 when `filename` is a file name, False when `filename` is a 121 file-like object. Note that when mmap is in use, data arrays 122 returned refer directly to the mmapped data on disk, and the 123 file cannot be closed as long as references to it exist. 124 version : {1, 2}, optional 125 version of netcdf to read / write, where 1 means *Classic 126 format* and 2 means *64-bit offset format*. Default is 1. See 127 `here <https://www.unidata.ucar.edu/software/netcdf/docs/netcdf_introduction.html#select_format>`__ 128 for more info. 129 maskandscale : bool, optional 130 Whether to automatically scale and/or mask data based on attributes. 131 Default is False. 132 133 Notes 134 ----- 135 The major advantage of this module over other modules is that it doesn't 136 require the code to be linked to the NetCDF libraries. This module is 137 derived from `pupynere <https://bitbucket.org/robertodealmeida/pupynere/>`_. 138 139 NetCDF files are a self-describing binary data format. The file contains 140 metadata that describes the dimensions and variables in the file. More 141 details about NetCDF files can be found `here 142 <https://www.unidata.ucar.edu/software/netcdf/guide_toc.html>`__. There 143 are three main sections to a NetCDF data structure: 144 145 1. Dimensions 146 2. Variables 147 3. Attributes 148 149 The dimensions section records the name and length of each dimension used 150 by the variables. The variables would then indicate which dimensions it 151 uses and any attributes such as data units, along with containing the data 152 values for the variable. It is good practice to include a 153 variable that is the same name as a dimension to provide the values for 154 that axes. Lastly, the attributes section would contain additional 155 information such as the name of the file creator or the instrument used to 156 collect the data. 157 158 When writing data to a NetCDF file, there is often the need to indicate the 159 'record dimension'. A record dimension is the unbounded dimension for a 160 variable. For example, a temperature variable may have dimensions of 161 latitude, longitude and time. If one wants to add more temperature data to 162 the NetCDF file as time progresses, then the temperature variable should 163 have the time dimension flagged as the record dimension. 164 165 In addition, the NetCDF file header contains the position of the data in 166 the file, so access can be done in an efficient manner without loading 167 unnecessary data into memory. It uses the ``mmap`` module to create 168 Numpy arrays mapped to the data on disk, for the same purpose. 169 170 Note that when `netcdf_file` is used to open a file with mmap=True 171 (default for read-only), arrays returned by it refer to data 172 directly on the disk. The file should not be closed, and cannot be cleanly 173 closed when asked, if such arrays are alive. You may want to copy data arrays 174 obtained from mmapped Netcdf file if they are to be processed after the file 175 is closed, see the example below. 176 177 Examples 178 -------- 179 To create a NetCDF file: 180 181 >>> from scipy.io import netcdf 182 >>> f = netcdf.netcdf_file('simple.nc', 'w') 183 >>> f.history = 'Created for a test' 184 >>> f.createDimension('time', 10) 185 >>> time = f.createVariable('time', 'i', ('time',)) 186 >>> time[:] = np.arange(10) 187 >>> time.units = 'days since 2008-01-01' 188 >>> f.close() 189 190 Note the assignment of ``arange(10)`` to ``time[:]``. Exposing the slice 191 of the time variable allows for the data to be set in the object, rather 192 than letting ``arange(10)`` overwrite the ``time`` variable. 193 194 To read the NetCDF file we just created: 195 196 >>> from scipy.io import netcdf 197 >>> f = netcdf.netcdf_file('simple.nc', 'r') 198 >>> print(f.history) 199 b'Created for a test' 200 >>> time = f.variables['time'] 201 >>> print(time.units) 202 b'days since 2008-01-01' 203 >>> print(time.shape) 204 (10,) 205 >>> print(time[-1]) 206 9 207 208 NetCDF files, when opened read-only, return arrays that refer 209 directly to memory-mapped data on disk: 210 211 >>> data = time[:] 212 >>> data.base.base 213 <mmap.mmap object at 0x7fe753763180> 214 215 If the data is to be processed after the file is closed, it needs 216 to be copied to main memory: 217 218 >>> data = time[:].copy() 219 >>> f.close() 220 >>> data.mean() 221 4.5 222 223 A NetCDF file can also be used as context manager: 224 225 >>> from scipy.io import netcdf 226 >>> with netcdf.netcdf_file('simple.nc', 'r') as f: 227 ... print(f.history) 228 b'Created for a test' 229 230 """ 231 def __init__(self, filename, mode='r', mmap=None, version=1, 232 maskandscale=False): 233 """Initialize netcdf_file from fileobj (str or file-like).""" 234 if mode not in 'rwa': 235 raise ValueError("Mode must be either 'r', 'w' or 'a'.") 236 237 if hasattr(filename, 'seek'): # file-like 238 self.fp = filename 239 self.filename = 'None' 240 if mmap is None: 241 mmap = False 242 elif mmap and not hasattr(filename, 'fileno'): 243 raise ValueError('Cannot use file object for mmap') 244 else: # maybe it's a string 245 self.filename = filename 246 omode = 'r+' if mode == 'a' else mode 247 self.fp = open(self.filename, '%sb' % omode) 248 if mmap is None: 249 # Mmapped files on PyPy cannot be usually closed 250 # before the GC runs, so it's better to use mmap=False 251 # as the default. 252 mmap = (not IS_PYPY) 253 254 if mode != 'r': 255 # Cannot read write-only files 256 mmap = False 257 258 self.use_mmap = mmap 259 self.mode = mode 260 self.version_byte = version 261 self.maskandscale = maskandscale 262 263 self.dimensions = {} 264 self.variables = {} 265 266 self._dims = [] 267 self._recs = 0 268 self._recsize = 0 269 270 self._mm = None 271 self._mm_buf = None 272 if self.use_mmap: 273 self._mm = mm.mmap(self.fp.fileno(), 0, access=mm.ACCESS_READ) 274 self._mm_buf = np.frombuffer(self._mm, dtype=np.int8) 275 276 self._attributes = {} 277 278 if mode in 'ra': 279 self._read() 280 281 def __setattr__(self, attr, value): 282 # Store user defined attributes in a separate dict, 283 # so we can save them to file later. 284 try: 285 self._attributes[attr] = value 286 except AttributeError: 287 pass 288 self.__dict__[attr] = value 289 290 def close(self): 291 """Closes the NetCDF file.""" 292 if hasattr(self, 'fp') and not self.fp.closed: 293 try: 294 self.flush() 295 finally: 296 self.variables = {} 297 if self._mm_buf is not None: 298 ref = weakref.ref(self._mm_buf) 299 self._mm_buf = None 300 if ref() is None: 301 # self._mm_buf is gc'd, and we can close the mmap 302 self._mm.close() 303 else: 304 # we cannot close self._mm, since self._mm_buf is 305 # alive and there may still be arrays referring to it 306 warnings.warn(( 307 "Cannot close a netcdf_file opened with mmap=True, when " 308 "netcdf_variables or arrays referring to its data still exist. " 309 "All data arrays obtained from such files refer directly to " 310 "data on disk, and must be copied before the file can be cleanly " 311 "closed. (See netcdf_file docstring for more information on mmap.)" 312 ), category=RuntimeWarning) 313 self._mm = None 314 self.fp.close() 315 __del__ = close 316 317 def __enter__(self): 318 return self 319 320 def __exit__(self, type, value, traceback): 321 self.close() 322 323 def createDimension(self, name, length): 324 """ 325 Adds a dimension to the Dimension section of the NetCDF data structure. 326 327 Note that this function merely adds a new dimension that the variables can 328 reference. The values for the dimension, if desired, should be added as 329 a variable using `createVariable`, referring to this dimension. 330 331 Parameters 332 ---------- 333 name : str 334 Name of the dimension (Eg, 'lat' or 'time'). 335 length : int 336 Length of the dimension. 337 338 See Also 339 -------- 340 createVariable 341 342 """ 343 if length is None and self._dims: 344 raise ValueError("Only first dimension may be unlimited!") 345 346 self.dimensions[name] = length 347 self._dims.append(name) 348 349 def createVariable(self, name, type, dimensions): 350 """ 351 Create an empty variable for the `netcdf_file` object, specifying its data 352 type and the dimensions it uses. 353 354 Parameters 355 ---------- 356 name : str 357 Name of the new variable. 358 type : dtype or str 359 Data type of the variable. 360 dimensions : sequence of str 361 List of the dimension names used by the variable, in the desired order. 362 363 Returns 364 ------- 365 variable : netcdf_variable 366 The newly created ``netcdf_variable`` object. 367 This object has also been added to the `netcdf_file` object as well. 368 369 See Also 370 -------- 371 createDimension 372 373 Notes 374 ----- 375 Any dimensions to be used by the variable should already exist in the 376 NetCDF data structure or should be created by `createDimension` prior to 377 creating the NetCDF variable. 378 379 """ 380 shape = tuple([self.dimensions[dim] for dim in dimensions]) 381 shape_ = tuple([dim or 0 for dim in shape]) # replace None with 0 for NumPy 382 383 type = dtype(type) 384 typecode, size = type.char, type.itemsize 385 if (typecode, size) not in REVERSE: 386 raise ValueError("NetCDF 3 does not support type %s" % type) 387 388 data = empty(shape_, dtype=type.newbyteorder("B")) # convert to big endian always for NetCDF 3 389 self.variables[name] = netcdf_variable( 390 data, typecode, size, shape, dimensions, 391 maskandscale=self.maskandscale) 392 return self.variables[name] 393 394 def flush(self): 395 """ 396 Perform a sync-to-disk flush if the `netcdf_file` object is in write mode. 397 398 See Also 399 -------- 400 sync : Identical function 401 402 """ 403 if hasattr(self, 'mode') and self.mode in 'wa': 404 self._write() 405 sync = flush 406 407 def _write(self): 408 self.fp.seek(0) 409 self.fp.write(b'CDF') 410 self.fp.write(array(self.version_byte, '>b').tobytes()) 411 412 # Write headers and data. 413 self._write_numrecs() 414 self._write_dim_array() 415 self._write_gatt_array() 416 self._write_var_array() 417 418 def _write_numrecs(self): 419 # Get highest record count from all record variables. 420 for var in self.variables.values(): 421 if var.isrec and len(var.data) > self._recs: 422 self.__dict__['_recs'] = len(var.data) 423 self._pack_int(self._recs) 424 425 def _write_dim_array(self): 426 if self.dimensions: 427 self.fp.write(NC_DIMENSION) 428 self._pack_int(len(self.dimensions)) 429 for name in self._dims: 430 self._pack_string(name) 431 length = self.dimensions[name] 432 self._pack_int(length or 0) # replace None with 0 for record dimension 433 else: 434 self.fp.write(ABSENT) 435 436 def _write_gatt_array(self): 437 self._write_att_array(self._attributes) 438 439 def _write_att_array(self, attributes): 440 if attributes: 441 self.fp.write(NC_ATTRIBUTE) 442 self._pack_int(len(attributes)) 443 for name, values in attributes.items(): 444 self._pack_string(name) 445 self._write_att_values(values) 446 else: 447 self.fp.write(ABSENT) 448 449 def _write_var_array(self): 450 if self.variables: 451 self.fp.write(NC_VARIABLE) 452 self._pack_int(len(self.variables)) 453 454 # Sort variable names non-recs first, then recs. 455 def sortkey(n): 456 v = self.variables[n] 457 if v.isrec: 458 return (-1,) 459 return v._shape 460 variables = sorted(self.variables, key=sortkey, reverse=True) 461 462 # Set the metadata for all variables. 463 for name in variables: 464 self._write_var_metadata(name) 465 # Now that we have the metadata, we know the vsize of 466 # each record variable, so we can calculate recsize. 467 self.__dict__['_recsize'] = sum([ 468 var._vsize for var in self.variables.values() 469 if var.isrec]) 470 # Set the data for all variables. 471 for name in variables: 472 self._write_var_data(name) 473 else: 474 self.fp.write(ABSENT) 475 476 def _write_var_metadata(self, name): 477 var = self.variables[name] 478 479 self._pack_string(name) 480 self._pack_int(len(var.dimensions)) 481 for dimname in var.dimensions: 482 dimid = self._dims.index(dimname) 483 self._pack_int(dimid) 484 485 self._write_att_array(var._attributes) 486 487 nc_type = REVERSE[var.typecode(), var.itemsize()] 488 self.fp.write(nc_type) 489 490 if not var.isrec: 491 vsize = var.data.size * var.data.itemsize 492 vsize += -vsize % 4 493 else: # record variable 494 try: 495 vsize = var.data[0].size * var.data.itemsize 496 except IndexError: 497 vsize = 0 498 rec_vars = len([v for v in self.variables.values() 499 if v.isrec]) 500 if rec_vars > 1: 501 vsize += -vsize % 4 502 self.variables[name].__dict__['_vsize'] = vsize 503 self._pack_int(vsize) 504 505 # Pack a bogus begin, and set the real value later. 506 self.variables[name].__dict__['_begin'] = self.fp.tell() 507 self._pack_begin(0) 508 509 def _write_var_data(self, name): 510 var = self.variables[name] 511 512 # Set begin in file header. 513 the_beguine = self.fp.tell() 514 self.fp.seek(var._begin) 515 self._pack_begin(the_beguine) 516 self.fp.seek(the_beguine) 517 518 # Write data. 519 if not var.isrec: 520 self.fp.write(var.data.tobytes()) 521 count = var.data.size * var.data.itemsize 522 self._write_var_padding(var, var._vsize - count) 523 else: # record variable 524 # Handle rec vars with shape[0] < nrecs. 525 if self._recs > len(var.data): 526 shape = (self._recs,) + var.data.shape[1:] 527 # Resize in-place does not always work since 528 # the array might not be single-segment 529 try: 530 var.data.resize(shape) 531 except ValueError: 532 var.__dict__['data'] = np.resize(var.data, shape).astype(var.data.dtype) 533 534 pos0 = pos = self.fp.tell() 535 for rec in var.data: 536 # Apparently scalars cannot be converted to big endian. If we 537 # try to convert a ``=i4`` scalar to, say, '>i4' the dtype 538 # will remain as ``=i4``. 539 if not rec.shape and (rec.dtype.byteorder == '<' or 540 (rec.dtype.byteorder == '=' and LITTLE_ENDIAN)): 541 rec = rec.byteswap() 542 self.fp.write(rec.tobytes()) 543 # Padding 544 count = rec.size * rec.itemsize 545 self._write_var_padding(var, var._vsize - count) 546 pos += self._recsize 547 self.fp.seek(pos) 548 self.fp.seek(pos0 + var._vsize) 549 550 def _write_var_padding(self, var, size): 551 encoded_fill_value = var._get_encoded_fill_value() 552 num_fills = size // len(encoded_fill_value) 553 self.fp.write(encoded_fill_value * num_fills) 554 555 def _write_att_values(self, values): 556 if hasattr(values, 'dtype'): 557 nc_type = REVERSE[values.dtype.char, values.dtype.itemsize] 558 else: 559 types = [(int, NC_INT), (float, NC_FLOAT), (str, NC_CHAR)] 560 561 # bytes index into scalars in py3k. Check for "string" types 562 if isinstance(values, (str, bytes)): 563 sample = values 564 else: 565 try: 566 sample = values[0] # subscriptable? 567 except TypeError: 568 sample = values # scalar 569 570 for class_, nc_type in types: 571 if isinstance(sample, class_): 572 break 573 574 typecode, size = TYPEMAP[nc_type] 575 dtype_ = '>%s' % typecode 576 # asarray() dies with bytes and '>c' in py3k. Change to 'S' 577 dtype_ = 'S' if dtype_ == '>c' else dtype_ 578 579 values = asarray(values, dtype=dtype_) 580 581 self.fp.write(nc_type) 582 583 if values.dtype.char == 'S': 584 nelems = values.itemsize 585 else: 586 nelems = values.size 587 self._pack_int(nelems) 588 589 if not values.shape and (values.dtype.byteorder == '<' or 590 (values.dtype.byteorder == '=' and LITTLE_ENDIAN)): 591 values = values.byteswap() 592 self.fp.write(values.tobytes()) 593 count = values.size * values.itemsize 594 self.fp.write(b'\x00' * (-count % 4)) # pad 595 596 def _read(self): 597 # Check magic bytes and version 598 magic = self.fp.read(3) 599 if not magic == b'CDF': 600 raise TypeError("Error: %s is not a valid NetCDF 3 file" % 601 self.filename) 602 self.__dict__['version_byte'] = frombuffer(self.fp.read(1), '>b')[0] 603 604 # Read file headers and set data. 605 self._read_numrecs() 606 self._read_dim_array() 607 self._read_gatt_array() 608 self._read_var_array() 609 610 def _read_numrecs(self): 611 self.__dict__['_recs'] = self._unpack_int() 612 613 def _read_dim_array(self): 614 header = self.fp.read(4) 615 if header not in [ZERO, NC_DIMENSION]: 616 raise ValueError("Unexpected header.") 617 count = self._unpack_int() 618 619 for dim in range(count): 620 name = self._unpack_string().decode('latin1') 621 length = self._unpack_int() or None # None for record dimension 622 self.dimensions[name] = length 623 self._dims.append(name) # preserve order 624 625 def _read_gatt_array(self): 626 for k, v in self._read_att_array().items(): 627 self.__setattr__(k, v) 628 629 def _read_att_array(self): 630 header = self.fp.read(4) 631 if header not in [ZERO, NC_ATTRIBUTE]: 632 raise ValueError("Unexpected header.") 633 count = self._unpack_int() 634 635 attributes = {} 636 for attr in range(count): 637 name = self._unpack_string().decode('latin1') 638 attributes[name] = self._read_att_values() 639 return attributes 640 641 def _read_var_array(self): 642 header = self.fp.read(4) 643 if header not in [ZERO, NC_VARIABLE]: 644 raise ValueError("Unexpected header.") 645 646 begin = 0 647 dtypes = {'names': [], 'formats': []} 648 rec_vars = [] 649 count = self._unpack_int() 650 for var in range(count): 651 (name, dimensions, shape, attributes, 652 typecode, size, dtype_, begin_, vsize) = self._read_var() 653 # https://www.unidata.ucar.edu/software/netcdf/guide_toc.html 654 # Note that vsize is the product of the dimension lengths 655 # (omitting the record dimension) and the number of bytes 656 # per value (determined from the type), increased to the 657 # next multiple of 4, for each variable. If a record 658 # variable, this is the amount of space per record. The 659 # netCDF "record size" is calculated as the sum of the 660 # vsize's of all the record variables. 661 # 662 # The vsize field is actually redundant, because its value 663 # may be computed from other information in the header. The 664 # 32-bit vsize field is not large enough to contain the size 665 # of variables that require more than 2^32 - 4 bytes, so 666 # 2^32 - 1 is used in the vsize field for such variables. 667 if shape and shape[0] is None: # record variable 668 rec_vars.append(name) 669 # The netCDF "record size" is calculated as the sum of 670 # the vsize's of all the record variables. 671 self.__dict__['_recsize'] += vsize 672 if begin == 0: 673 begin = begin_ 674 dtypes['names'].append(name) 675 dtypes['formats'].append(str(shape[1:]) + dtype_) 676 677 # Handle padding with a virtual variable. 678 if typecode in 'bch': 679 actual_size = reduce(mul, (1,) + shape[1:]) * size 680 padding = -actual_size % 4 681 if padding: 682 dtypes['names'].append('_padding_%d' % var) 683 dtypes['formats'].append('(%d,)>b' % padding) 684 685 # Data will be set later. 686 data = None 687 else: # not a record variable 688 # Calculate size to avoid problems with vsize (above) 689 a_size = reduce(mul, shape, 1) * size 690 if self.use_mmap: 691 data = self._mm_buf[begin_:begin_+a_size].view(dtype=dtype_) 692 data.shape = shape 693 else: 694 pos = self.fp.tell() 695 self.fp.seek(begin_) 696 data = frombuffer(self.fp.read(a_size), dtype=dtype_ 697 ).copy() 698 data.shape = shape 699 self.fp.seek(pos) 700 701 # Add variable. 702 self.variables[name] = netcdf_variable( 703 data, typecode, size, shape, dimensions, attributes, 704 maskandscale=self.maskandscale) 705 706 if rec_vars: 707 # Remove padding when only one record variable. 708 if len(rec_vars) == 1: 709 dtypes['names'] = dtypes['names'][:1] 710 dtypes['formats'] = dtypes['formats'][:1] 711 712 # Build rec array. 713 if self.use_mmap: 714 rec_array = self._mm_buf[begin:begin+self._recs*self._recsize].view(dtype=dtypes) 715 rec_array.shape = (self._recs,) 716 else: 717 pos = self.fp.tell() 718 self.fp.seek(begin) 719 rec_array = frombuffer(self.fp.read(self._recs*self._recsize), 720 dtype=dtypes).copy() 721 rec_array.shape = (self._recs,) 722 self.fp.seek(pos) 723 724 for var in rec_vars: 725 self.variables[var].__dict__['data'] = rec_array[var] 726 727 def _read_var(self): 728 name = self._unpack_string().decode('latin1') 729 dimensions = [] 730 shape = [] 731 dims = self._unpack_int() 732 733 for i in range(dims): 734 dimid = self._unpack_int() 735 dimname = self._dims[dimid] 736 dimensions.append(dimname) 737 dim = self.dimensions[dimname] 738 shape.append(dim) 739 dimensions = tuple(dimensions) 740 shape = tuple(shape) 741 742 attributes = self._read_att_array() 743 nc_type = self.fp.read(4) 744 vsize = self._unpack_int() 745 begin = [self._unpack_int, self._unpack_int64][self.version_byte-1]() 746 747 typecode, size = TYPEMAP[nc_type] 748 dtype_ = '>%s' % typecode 749 750 return name, dimensions, shape, attributes, typecode, size, dtype_, begin, vsize 751 752 def _read_att_values(self): 753 nc_type = self.fp.read(4) 754 n = self._unpack_int() 755 756 typecode, size = TYPEMAP[nc_type] 757 758 count = n*size 759 values = self.fp.read(int(count)) 760 self.fp.read(-count % 4) # read padding 761 762 if typecode != 'c': 763 values = frombuffer(values, dtype='>%s' % typecode).copy() 764 if values.shape == (1,): 765 values = values[0] 766 else: 767 values = values.rstrip(b'\x00') 768 return values 769 770 def _pack_begin(self, begin): 771 if self.version_byte == 1: 772 self._pack_int(begin) 773 elif self.version_byte == 2: 774 self._pack_int64(begin) 775 776 def _pack_int(self, value): 777 self.fp.write(array(value, '>i').tobytes()) 778 _pack_int32 = _pack_int 779 780 def _unpack_int(self): 781 return int(frombuffer(self.fp.read(4), '>i')[0]) 782 _unpack_int32 = _unpack_int 783 784 def _pack_int64(self, value): 785 self.fp.write(array(value, '>q').tobytes()) 786 787 def _unpack_int64(self): 788 return frombuffer(self.fp.read(8), '>q')[0] 789 790 def _pack_string(self, s): 791 count = len(s) 792 self._pack_int(count) 793 self.fp.write(s.encode('latin1')) 794 self.fp.write(b'\x00' * (-count % 4)) # pad 795 796 def _unpack_string(self): 797 count = self._unpack_int() 798 s = self.fp.read(count).rstrip(b'\x00') 799 self.fp.read(-count % 4) # read padding 800 return s 801 802 803class netcdf_variable: 804 """ 805 A data object for netcdf files. 806 807 `netcdf_variable` objects are constructed by calling the method 808 `netcdf_file.createVariable` on the `netcdf_file` object. `netcdf_variable` 809 objects behave much like array objects defined in numpy, except that their 810 data resides in a file. Data is read by indexing and written by assigning 811 to an indexed subset; the entire array can be accessed by the index ``[:]`` 812 or (for scalars) by using the methods `getValue` and `assignValue`. 813 `netcdf_variable` objects also have attribute `shape` with the same meaning 814 as for arrays, but the shape cannot be modified. There is another read-only 815 attribute `dimensions`, whose value is the tuple of dimension names. 816 817 All other attributes correspond to variable attributes defined in 818 the NetCDF file. Variable attributes are created by assigning to an 819 attribute of the `netcdf_variable` object. 820 821 Parameters 822 ---------- 823 data : array_like 824 The data array that holds the values for the variable. 825 Typically, this is initialized as empty, but with the proper shape. 826 typecode : dtype character code 827 Desired data-type for the data array. 828 size : int 829 Desired element size for the data array. 830 shape : sequence of ints 831 The shape of the array. This should match the lengths of the 832 variable's dimensions. 833 dimensions : sequence of strings 834 The names of the dimensions used by the variable. Must be in the 835 same order of the dimension lengths given by `shape`. 836 attributes : dict, optional 837 Attribute values (any type) keyed by string names. These attributes 838 become attributes for the netcdf_variable object. 839 maskandscale : bool, optional 840 Whether to automatically scale and/or mask data based on attributes. 841 Default is False. 842 843 844 Attributes 845 ---------- 846 dimensions : list of str 847 List of names of dimensions used by the variable object. 848 isrec, shape 849 Properties 850 851 See also 852 -------- 853 isrec, shape 854 855 """ 856 def __init__(self, data, typecode, size, shape, dimensions, 857 attributes=None, 858 maskandscale=False): 859 self.data = data 860 self._typecode = typecode 861 self._size = size 862 self._shape = shape 863 self.dimensions = dimensions 864 self.maskandscale = maskandscale 865 866 self._attributes = attributes or {} 867 for k, v in self._attributes.items(): 868 self.__dict__[k] = v 869 870 def __setattr__(self, attr, value): 871 # Store user defined attributes in a separate dict, 872 # so we can save them to file later. 873 try: 874 self._attributes[attr] = value 875 except AttributeError: 876 pass 877 self.__dict__[attr] = value 878 879 def isrec(self): 880 """Returns whether the variable has a record dimension or not. 881 882 A record dimension is a dimension along which additional data could be 883 easily appended in the netcdf data structure without much rewriting of 884 the data file. This attribute is a read-only property of the 885 `netcdf_variable`. 886 887 """ 888 return bool(self.data.shape) and not self._shape[0] 889 isrec = property(isrec) 890 891 def shape(self): 892 """Returns the shape tuple of the data variable. 893 894 This is a read-only attribute and can not be modified in the 895 same manner of other numpy arrays. 896 """ 897 return self.data.shape 898 shape = property(shape) 899 900 def getValue(self): 901 """ 902 Retrieve a scalar value from a `netcdf_variable` of length one. 903 904 Raises 905 ------ 906 ValueError 907 If the netcdf variable is an array of length greater than one, 908 this exception will be raised. 909 910 """ 911 return self.data.item() 912 913 def assignValue(self, value): 914 """ 915 Assign a scalar value to a `netcdf_variable` of length one. 916 917 Parameters 918 ---------- 919 value : scalar 920 Scalar value (of compatible type) to assign to a length-one netcdf 921 variable. This value will be written to file. 922 923 Raises 924 ------ 925 ValueError 926 If the input is not a scalar, or if the destination is not a length-one 927 netcdf variable. 928 929 """ 930 if not self.data.flags.writeable: 931 # Work-around for a bug in NumPy. Calling itemset() on a read-only 932 # memory-mapped array causes a seg. fault. 933 # See NumPy ticket #1622, and SciPy ticket #1202. 934 # This check for `writeable` can be removed when the oldest version 935 # of NumPy still supported by scipy contains the fix for #1622. 936 raise RuntimeError("variable is not writeable") 937 938 self.data.itemset(value) 939 940 def typecode(self): 941 """ 942 Return the typecode of the variable. 943 944 Returns 945 ------- 946 typecode : char 947 The character typecode of the variable (e.g., 'i' for int). 948 949 """ 950 return self._typecode 951 952 def itemsize(self): 953 """ 954 Return the itemsize of the variable. 955 956 Returns 957 ------- 958 itemsize : int 959 The element size of the variable (e.g., 8 for float64). 960 961 """ 962 return self._size 963 964 def __getitem__(self, index): 965 if not self.maskandscale: 966 return self.data[index] 967 968 data = self.data[index].copy() 969 missing_value = self._get_missing_value() 970 data = self._apply_missing_value(data, missing_value) 971 scale_factor = self._attributes.get('scale_factor') 972 add_offset = self._attributes.get('add_offset') 973 if add_offset is not None or scale_factor is not None: 974 data = data.astype(np.float64) 975 if scale_factor is not None: 976 data = data * scale_factor 977 if add_offset is not None: 978 data += add_offset 979 980 return data 981 982 def __setitem__(self, index, data): 983 if self.maskandscale: 984 missing_value = ( 985 self._get_missing_value() or 986 getattr(data, 'fill_value', 999999)) 987 self._attributes.setdefault('missing_value', missing_value) 988 self._attributes.setdefault('_FillValue', missing_value) 989 data = ((data - self._attributes.get('add_offset', 0.0)) / 990 self._attributes.get('scale_factor', 1.0)) 991 data = np.ma.asarray(data).filled(missing_value) 992 if self._typecode not in 'fd' and data.dtype.kind == 'f': 993 data = np.round(data) 994 995 # Expand data for record vars? 996 if self.isrec: 997 if isinstance(index, tuple): 998 rec_index = index[0] 999 else: 1000 rec_index = index 1001 if isinstance(rec_index, slice): 1002 recs = (rec_index.start or 0) + len(data) 1003 else: 1004 recs = rec_index + 1 1005 if recs > len(self.data): 1006 shape = (recs,) + self._shape[1:] 1007 # Resize in-place does not always work since 1008 # the array might not be single-segment 1009 try: 1010 self.data.resize(shape) 1011 except ValueError: 1012 self.__dict__['data'] = np.resize(self.data, shape).astype(self.data.dtype) 1013 self.data[index] = data 1014 1015 def _default_encoded_fill_value(self): 1016 """ 1017 The default encoded fill-value for this Variable's data type. 1018 """ 1019 nc_type = REVERSE[self.typecode(), self.itemsize()] 1020 return FILLMAP[nc_type] 1021 1022 def _get_encoded_fill_value(self): 1023 """ 1024 Returns the encoded fill value for this variable as bytes. 1025 1026 This is taken from either the _FillValue attribute, or the default fill 1027 value for this variable's data type. 1028 """ 1029 if '_FillValue' in self._attributes: 1030 fill_value = np.array(self._attributes['_FillValue'], 1031 dtype=self.data.dtype).tobytes() 1032 if len(fill_value) == self.itemsize(): 1033 return fill_value 1034 else: 1035 return self._default_encoded_fill_value() 1036 else: 1037 return self._default_encoded_fill_value() 1038 1039 def _get_missing_value(self): 1040 """ 1041 Returns the value denoting "no data" for this variable. 1042 1043 If this variable does not have a missing/fill value, returns None. 1044 1045 If both _FillValue and missing_value are given, give precedence to 1046 _FillValue. The netCDF standard gives special meaning to _FillValue; 1047 missing_value is just used for compatibility with old datasets. 1048 """ 1049 1050 if '_FillValue' in self._attributes: 1051 missing_value = self._attributes['_FillValue'] 1052 elif 'missing_value' in self._attributes: 1053 missing_value = self._attributes['missing_value'] 1054 else: 1055 missing_value = None 1056 1057 return missing_value 1058 1059 @staticmethod 1060 def _apply_missing_value(data, missing_value): 1061 """ 1062 Applies the given missing value to the data array. 1063 1064 Returns a numpy.ma array, with any value equal to missing_value masked 1065 out (unless missing_value is None, in which case the original array is 1066 returned). 1067 """ 1068 1069 if missing_value is None: 1070 newdata = data 1071 else: 1072 try: 1073 missing_value_isnan = np.isnan(missing_value) 1074 except (TypeError, NotImplementedError): 1075 # some data types (e.g., characters) cannot be tested for NaN 1076 missing_value_isnan = False 1077 1078 if missing_value_isnan: 1079 mymask = np.isnan(data) 1080 else: 1081 mymask = (data == missing_value) 1082 1083 newdata = np.ma.masked_where(mymask, data) 1084 1085 return newdata 1086 1087 1088NetCDFFile = netcdf_file 1089NetCDFVariable = netcdf_variable 1090