1"""
2NetCDF reader/writer module.
3
4This module is used to read and create NetCDF files. NetCDF files are
5accessed through the `netcdf_file` object. Data written to and from NetCDF
6files are contained in `netcdf_variable` objects. Attributes are given
7as member variables of the `netcdf_file` and `netcdf_variable` objects.
8
9This module implements the Scientific.IO.NetCDF API to read and create
10NetCDF files. The same API is also used in the PyNIO and pynetcdf
11modules, allowing these modules to be used interchangeably when working
12with NetCDF files.
13
14Only NetCDF3 is supported here; for NetCDF4 see
15`netCDF4-python <http://unidata.github.io/netcdf4-python/>`__,
16which has a similar API.
17
18"""
19
20# TODO:
21# * properly implement ``_FillValue``.
22# * fix character variables.
23# * implement PAGESIZE for Python 2.6?
24
25# The Scientific.IO.NetCDF API allows attributes to be added directly to
26# instances of ``netcdf_file`` and ``netcdf_variable``. To differentiate
27# between user-set attributes and instance attributes, user-set attributes
28# are automatically stored in the ``_attributes`` attribute by overloading
29#``__setattr__``. This is the reason why the code sometimes uses
30#``obj.__dict__['key'] = value``, instead of simply ``obj.key = value``;
31# otherwise the key would be inserted into userspace attributes.
32
33
34__all__ = ['netcdf_file', 'netcdf_variable']
35
36
37import warnings
38import weakref
39from operator import mul
40from platform import python_implementation
41
42import mmap as mm
43
44import numpy as np
45from numpy import frombuffer, dtype, empty, array, asarray
46from numpy import little_endian as LITTLE_ENDIAN
47from functools import reduce
48
49
50IS_PYPY = python_implementation() == 'PyPy'
51
52ABSENT = b'\x00\x00\x00\x00\x00\x00\x00\x00'
53ZERO = b'\x00\x00\x00\x00'
54NC_BYTE = b'\x00\x00\x00\x01'
55NC_CHAR = b'\x00\x00\x00\x02'
56NC_SHORT = b'\x00\x00\x00\x03'
57NC_INT = b'\x00\x00\x00\x04'
58NC_FLOAT = b'\x00\x00\x00\x05'
59NC_DOUBLE = b'\x00\x00\x00\x06'
60NC_DIMENSION = b'\x00\x00\x00\n'
61NC_VARIABLE = b'\x00\x00\x00\x0b'
62NC_ATTRIBUTE = b'\x00\x00\x00\x0c'
63FILL_BYTE = b'\x81'
64FILL_CHAR = b'\x00'
65FILL_SHORT = b'\x80\x01'
66FILL_INT = b'\x80\x00\x00\x01'
67FILL_FLOAT = b'\x7C\xF0\x00\x00'
68FILL_DOUBLE = b'\x47\x9E\x00\x00\x00\x00\x00\x00'
69
70TYPEMAP = {NC_BYTE: ('b', 1),
71           NC_CHAR: ('c', 1),
72           NC_SHORT: ('h', 2),
73           NC_INT: ('i', 4),
74           NC_FLOAT: ('f', 4),
75           NC_DOUBLE: ('d', 8)}
76
77FILLMAP = {NC_BYTE: FILL_BYTE,
78           NC_CHAR: FILL_CHAR,
79           NC_SHORT: FILL_SHORT,
80           NC_INT: FILL_INT,
81           NC_FLOAT: FILL_FLOAT,
82           NC_DOUBLE: FILL_DOUBLE}
83
84REVERSE = {('b', 1): NC_BYTE,
85           ('B', 1): NC_CHAR,
86           ('c', 1): NC_CHAR,
87           ('h', 2): NC_SHORT,
88           ('i', 4): NC_INT,
89           ('f', 4): NC_FLOAT,
90           ('d', 8): NC_DOUBLE,
91
92           # these come from asarray(1).dtype.char and asarray('foo').dtype.char,
93           # used when getting the types from generic attributes.
94           ('l', 4): NC_INT,
95           ('S', 1): NC_CHAR}
96
97
98class netcdf_file:
99    """
100    A file object for NetCDF data.
101
102    A `netcdf_file` object has two standard attributes: `dimensions` and
103    `variables`. The values of both are dictionaries, mapping dimension
104    names to their associated lengths and variable names to variables,
105    respectively. Application programs should never modify these
106    dictionaries.
107
108    All other attributes correspond to global attributes defined in the
109    NetCDF file. Global file attributes are created by assigning to an
110    attribute of the `netcdf_file` object.
111
112    Parameters
113    ----------
114    filename : string or file-like
115        string -> filename
116    mode : {'r', 'w', 'a'}, optional
117        read-write-append mode, default is 'r'
118    mmap : None or bool, optional
119        Whether to mmap `filename` when reading.  Default is True
120        when `filename` is a file name, False when `filename` is a
121        file-like object. Note that when mmap is in use, data arrays
122        returned refer directly to the mmapped data on disk, and the
123        file cannot be closed as long as references to it exist.
124    version : {1, 2}, optional
125        version of netcdf to read / write, where 1 means *Classic
126        format* and 2 means *64-bit offset format*.  Default is 1.  See
127        `here <https://www.unidata.ucar.edu/software/netcdf/docs/netcdf_introduction.html#select_format>`__
128        for more info.
129    maskandscale : bool, optional
130        Whether to automatically scale and/or mask data based on attributes.
131        Default is False.
132
133    Notes
134    -----
135    The major advantage of this module over other modules is that it doesn't
136    require the code to be linked to the NetCDF libraries. This module is
137    derived from `pupynere <https://bitbucket.org/robertodealmeida/pupynere/>`_.
138
139    NetCDF files are a self-describing binary data format. The file contains
140    metadata that describes the dimensions and variables in the file. More
141    details about NetCDF files can be found `here
142    <https://www.unidata.ucar.edu/software/netcdf/guide_toc.html>`__. There
143    are three main sections to a NetCDF data structure:
144
145    1. Dimensions
146    2. Variables
147    3. Attributes
148
149    The dimensions section records the name and length of each dimension used
150    by the variables. The variables would then indicate which dimensions it
151    uses and any attributes such as data units, along with containing the data
152    values for the variable. It is good practice to include a
153    variable that is the same name as a dimension to provide the values for
154    that axes. Lastly, the attributes section would contain additional
155    information such as the name of the file creator or the instrument used to
156    collect the data.
157
158    When writing data to a NetCDF file, there is often the need to indicate the
159    'record dimension'. A record dimension is the unbounded dimension for a
160    variable. For example, a temperature variable may have dimensions of
161    latitude, longitude and time. If one wants to add more temperature data to
162    the NetCDF file as time progresses, then the temperature variable should
163    have the time dimension flagged as the record dimension.
164
165    In addition, the NetCDF file header contains the position of the data in
166    the file, so access can be done in an efficient manner without loading
167    unnecessary data into memory. It uses the ``mmap`` module to create
168    Numpy arrays mapped to the data on disk, for the same purpose.
169
170    Note that when `netcdf_file` is used to open a file with mmap=True
171    (default for read-only), arrays returned by it refer to data
172    directly on the disk. The file should not be closed, and cannot be cleanly
173    closed when asked, if such arrays are alive. You may want to copy data arrays
174    obtained from mmapped Netcdf file if they are to be processed after the file
175    is closed, see the example below.
176
177    Examples
178    --------
179    To create a NetCDF file:
180
181    >>> from scipy.io import netcdf
182    >>> f = netcdf.netcdf_file('simple.nc', 'w')
183    >>> f.history = 'Created for a test'
184    >>> f.createDimension('time', 10)
185    >>> time = f.createVariable('time', 'i', ('time',))
186    >>> time[:] = np.arange(10)
187    >>> time.units = 'days since 2008-01-01'
188    >>> f.close()
189
190    Note the assignment of ``arange(10)`` to ``time[:]``.  Exposing the slice
191    of the time variable allows for the data to be set in the object, rather
192    than letting ``arange(10)`` overwrite the ``time`` variable.
193
194    To read the NetCDF file we just created:
195
196    >>> from scipy.io import netcdf
197    >>> f = netcdf.netcdf_file('simple.nc', 'r')
198    >>> print(f.history)
199    b'Created for a test'
200    >>> time = f.variables['time']
201    >>> print(time.units)
202    b'days since 2008-01-01'
203    >>> print(time.shape)
204    (10,)
205    >>> print(time[-1])
206    9
207
208    NetCDF files, when opened read-only, return arrays that refer
209    directly to memory-mapped data on disk:
210
211    >>> data = time[:]
212    >>> data.base.base
213    <mmap.mmap object at 0x7fe753763180>
214
215    If the data is to be processed after the file is closed, it needs
216    to be copied to main memory:
217
218    >>> data = time[:].copy()
219    >>> f.close()
220    >>> data.mean()
221    4.5
222
223    A NetCDF file can also be used as context manager:
224
225    >>> from scipy.io import netcdf
226    >>> with netcdf.netcdf_file('simple.nc', 'r') as f:
227    ...     print(f.history)
228    b'Created for a test'
229
230    """
231    def __init__(self, filename, mode='r', mmap=None, version=1,
232                 maskandscale=False):
233        """Initialize netcdf_file from fileobj (str or file-like)."""
234        if mode not in 'rwa':
235            raise ValueError("Mode must be either 'r', 'w' or 'a'.")
236
237        if hasattr(filename, 'seek'):  # file-like
238            self.fp = filename
239            self.filename = 'None'
240            if mmap is None:
241                mmap = False
242            elif mmap and not hasattr(filename, 'fileno'):
243                raise ValueError('Cannot use file object for mmap')
244        else:  # maybe it's a string
245            self.filename = filename
246            omode = 'r+' if mode == 'a' else mode
247            self.fp = open(self.filename, '%sb' % omode)
248            if mmap is None:
249                # Mmapped files on PyPy cannot be usually closed
250                # before the GC runs, so it's better to use mmap=False
251                # as the default.
252                mmap = (not IS_PYPY)
253
254        if mode != 'r':
255            # Cannot read write-only files
256            mmap = False
257
258        self.use_mmap = mmap
259        self.mode = mode
260        self.version_byte = version
261        self.maskandscale = maskandscale
262
263        self.dimensions = {}
264        self.variables = {}
265
266        self._dims = []
267        self._recs = 0
268        self._recsize = 0
269
270        self._mm = None
271        self._mm_buf = None
272        if self.use_mmap:
273            self._mm = mm.mmap(self.fp.fileno(), 0, access=mm.ACCESS_READ)
274            self._mm_buf = np.frombuffer(self._mm, dtype=np.int8)
275
276        self._attributes = {}
277
278        if mode in 'ra':
279            self._read()
280
281    def __setattr__(self, attr, value):
282        # Store user defined attributes in a separate dict,
283        # so we can save them to file later.
284        try:
285            self._attributes[attr] = value
286        except AttributeError:
287            pass
288        self.__dict__[attr] = value
289
290    def close(self):
291        """Closes the NetCDF file."""
292        if hasattr(self, 'fp') and not self.fp.closed:
293            try:
294                self.flush()
295            finally:
296                self.variables = {}
297                if self._mm_buf is not None:
298                    ref = weakref.ref(self._mm_buf)
299                    self._mm_buf = None
300                    if ref() is None:
301                        # self._mm_buf is gc'd, and we can close the mmap
302                        self._mm.close()
303                    else:
304                        # we cannot close self._mm, since self._mm_buf is
305                        # alive and there may still be arrays referring to it
306                        warnings.warn((
307                            "Cannot close a netcdf_file opened with mmap=True, when "
308                            "netcdf_variables or arrays referring to its data still exist. "
309                            "All data arrays obtained from such files refer directly to "
310                            "data on disk, and must be copied before the file can be cleanly "
311                            "closed. (See netcdf_file docstring for more information on mmap.)"
312                        ), category=RuntimeWarning)
313                self._mm = None
314                self.fp.close()
315    __del__ = close
316
317    def __enter__(self):
318        return self
319
320    def __exit__(self, type, value, traceback):
321        self.close()
322
323    def createDimension(self, name, length):
324        """
325        Adds a dimension to the Dimension section of the NetCDF data structure.
326
327        Note that this function merely adds a new dimension that the variables can
328        reference. The values for the dimension, if desired, should be added as
329        a variable using `createVariable`, referring to this dimension.
330
331        Parameters
332        ----------
333        name : str
334            Name of the dimension (Eg, 'lat' or 'time').
335        length : int
336            Length of the dimension.
337
338        See Also
339        --------
340        createVariable
341
342        """
343        if length is None and self._dims:
344            raise ValueError("Only first dimension may be unlimited!")
345
346        self.dimensions[name] = length
347        self._dims.append(name)
348
349    def createVariable(self, name, type, dimensions):
350        """
351        Create an empty variable for the `netcdf_file` object, specifying its data
352        type and the dimensions it uses.
353
354        Parameters
355        ----------
356        name : str
357            Name of the new variable.
358        type : dtype or str
359            Data type of the variable.
360        dimensions : sequence of str
361            List of the dimension names used by the variable, in the desired order.
362
363        Returns
364        -------
365        variable : netcdf_variable
366            The newly created ``netcdf_variable`` object.
367            This object has also been added to the `netcdf_file` object as well.
368
369        See Also
370        --------
371        createDimension
372
373        Notes
374        -----
375        Any dimensions to be used by the variable should already exist in the
376        NetCDF data structure or should be created by `createDimension` prior to
377        creating the NetCDF variable.
378
379        """
380        shape = tuple([self.dimensions[dim] for dim in dimensions])
381        shape_ = tuple([dim or 0 for dim in shape])  # replace None with 0 for NumPy
382
383        type = dtype(type)
384        typecode, size = type.char, type.itemsize
385        if (typecode, size) not in REVERSE:
386            raise ValueError("NetCDF 3 does not support type %s" % type)
387
388        data = empty(shape_, dtype=type.newbyteorder("B"))  # convert to big endian always for NetCDF 3
389        self.variables[name] = netcdf_variable(
390                data, typecode, size, shape, dimensions,
391                maskandscale=self.maskandscale)
392        return self.variables[name]
393
394    def flush(self):
395        """
396        Perform a sync-to-disk flush if the `netcdf_file` object is in write mode.
397
398        See Also
399        --------
400        sync : Identical function
401
402        """
403        if hasattr(self, 'mode') and self.mode in 'wa':
404            self._write()
405    sync = flush
406
407    def _write(self):
408        self.fp.seek(0)
409        self.fp.write(b'CDF')
410        self.fp.write(array(self.version_byte, '>b').tobytes())
411
412        # Write headers and data.
413        self._write_numrecs()
414        self._write_dim_array()
415        self._write_gatt_array()
416        self._write_var_array()
417
418    def _write_numrecs(self):
419        # Get highest record count from all record variables.
420        for var in self.variables.values():
421            if var.isrec and len(var.data) > self._recs:
422                self.__dict__['_recs'] = len(var.data)
423        self._pack_int(self._recs)
424
425    def _write_dim_array(self):
426        if self.dimensions:
427            self.fp.write(NC_DIMENSION)
428            self._pack_int(len(self.dimensions))
429            for name in self._dims:
430                self._pack_string(name)
431                length = self.dimensions[name]
432                self._pack_int(length or 0)  # replace None with 0 for record dimension
433        else:
434            self.fp.write(ABSENT)
435
436    def _write_gatt_array(self):
437        self._write_att_array(self._attributes)
438
439    def _write_att_array(self, attributes):
440        if attributes:
441            self.fp.write(NC_ATTRIBUTE)
442            self._pack_int(len(attributes))
443            for name, values in attributes.items():
444                self._pack_string(name)
445                self._write_att_values(values)
446        else:
447            self.fp.write(ABSENT)
448
449    def _write_var_array(self):
450        if self.variables:
451            self.fp.write(NC_VARIABLE)
452            self._pack_int(len(self.variables))
453
454            # Sort variable names non-recs first, then recs.
455            def sortkey(n):
456                v = self.variables[n]
457                if v.isrec:
458                    return (-1,)
459                return v._shape
460            variables = sorted(self.variables, key=sortkey, reverse=True)
461
462            # Set the metadata for all variables.
463            for name in variables:
464                self._write_var_metadata(name)
465            # Now that we have the metadata, we know the vsize of
466            # each record variable, so we can calculate recsize.
467            self.__dict__['_recsize'] = sum([
468                    var._vsize for var in self.variables.values()
469                    if var.isrec])
470            # Set the data for all variables.
471            for name in variables:
472                self._write_var_data(name)
473        else:
474            self.fp.write(ABSENT)
475
476    def _write_var_metadata(self, name):
477        var = self.variables[name]
478
479        self._pack_string(name)
480        self._pack_int(len(var.dimensions))
481        for dimname in var.dimensions:
482            dimid = self._dims.index(dimname)
483            self._pack_int(dimid)
484
485        self._write_att_array(var._attributes)
486
487        nc_type = REVERSE[var.typecode(), var.itemsize()]
488        self.fp.write(nc_type)
489
490        if not var.isrec:
491            vsize = var.data.size * var.data.itemsize
492            vsize += -vsize % 4
493        else:  # record variable
494            try:
495                vsize = var.data[0].size * var.data.itemsize
496            except IndexError:
497                vsize = 0
498            rec_vars = len([v for v in self.variables.values()
499                            if v.isrec])
500            if rec_vars > 1:
501                vsize += -vsize % 4
502        self.variables[name].__dict__['_vsize'] = vsize
503        self._pack_int(vsize)
504
505        # Pack a bogus begin, and set the real value later.
506        self.variables[name].__dict__['_begin'] = self.fp.tell()
507        self._pack_begin(0)
508
509    def _write_var_data(self, name):
510        var = self.variables[name]
511
512        # Set begin in file header.
513        the_beguine = self.fp.tell()
514        self.fp.seek(var._begin)
515        self._pack_begin(the_beguine)
516        self.fp.seek(the_beguine)
517
518        # Write data.
519        if not var.isrec:
520            self.fp.write(var.data.tobytes())
521            count = var.data.size * var.data.itemsize
522            self._write_var_padding(var, var._vsize - count)
523        else:  # record variable
524            # Handle rec vars with shape[0] < nrecs.
525            if self._recs > len(var.data):
526                shape = (self._recs,) + var.data.shape[1:]
527                # Resize in-place does not always work since
528                # the array might not be single-segment
529                try:
530                    var.data.resize(shape)
531                except ValueError:
532                    var.__dict__['data'] = np.resize(var.data, shape).astype(var.data.dtype)
533
534            pos0 = pos = self.fp.tell()
535            for rec in var.data:
536                # Apparently scalars cannot be converted to big endian. If we
537                # try to convert a ``=i4`` scalar to, say, '>i4' the dtype
538                # will remain as ``=i4``.
539                if not rec.shape and (rec.dtype.byteorder == '<' or
540                        (rec.dtype.byteorder == '=' and LITTLE_ENDIAN)):
541                    rec = rec.byteswap()
542                self.fp.write(rec.tobytes())
543                # Padding
544                count = rec.size * rec.itemsize
545                self._write_var_padding(var, var._vsize - count)
546                pos += self._recsize
547                self.fp.seek(pos)
548            self.fp.seek(pos0 + var._vsize)
549
550    def _write_var_padding(self, var, size):
551        encoded_fill_value = var._get_encoded_fill_value()
552        num_fills = size // len(encoded_fill_value)
553        self.fp.write(encoded_fill_value * num_fills)
554
555    def _write_att_values(self, values):
556        if hasattr(values, 'dtype'):
557            nc_type = REVERSE[values.dtype.char, values.dtype.itemsize]
558        else:
559            types = [(int, NC_INT), (float, NC_FLOAT), (str, NC_CHAR)]
560
561            # bytes index into scalars in py3k. Check for "string" types
562            if isinstance(values, (str, bytes)):
563                sample = values
564            else:
565                try:
566                    sample = values[0]  # subscriptable?
567                except TypeError:
568                    sample = values     # scalar
569
570            for class_, nc_type in types:
571                if isinstance(sample, class_):
572                    break
573
574        typecode, size = TYPEMAP[nc_type]
575        dtype_ = '>%s' % typecode
576        # asarray() dies with bytes and '>c' in py3k. Change to 'S'
577        dtype_ = 'S' if dtype_ == '>c' else dtype_
578
579        values = asarray(values, dtype=dtype_)
580
581        self.fp.write(nc_type)
582
583        if values.dtype.char == 'S':
584            nelems = values.itemsize
585        else:
586            nelems = values.size
587        self._pack_int(nelems)
588
589        if not values.shape and (values.dtype.byteorder == '<' or
590                (values.dtype.byteorder == '=' and LITTLE_ENDIAN)):
591            values = values.byteswap()
592        self.fp.write(values.tobytes())
593        count = values.size * values.itemsize
594        self.fp.write(b'\x00' * (-count % 4))  # pad
595
596    def _read(self):
597        # Check magic bytes and version
598        magic = self.fp.read(3)
599        if not magic == b'CDF':
600            raise TypeError("Error: %s is not a valid NetCDF 3 file" %
601                            self.filename)
602        self.__dict__['version_byte'] = frombuffer(self.fp.read(1), '>b')[0]
603
604        # Read file headers and set data.
605        self._read_numrecs()
606        self._read_dim_array()
607        self._read_gatt_array()
608        self._read_var_array()
609
610    def _read_numrecs(self):
611        self.__dict__['_recs'] = self._unpack_int()
612
613    def _read_dim_array(self):
614        header = self.fp.read(4)
615        if header not in [ZERO, NC_DIMENSION]:
616            raise ValueError("Unexpected header.")
617        count = self._unpack_int()
618
619        for dim in range(count):
620            name = self._unpack_string().decode('latin1')
621            length = self._unpack_int() or None  # None for record dimension
622            self.dimensions[name] = length
623            self._dims.append(name)  # preserve order
624
625    def _read_gatt_array(self):
626        for k, v in self._read_att_array().items():
627            self.__setattr__(k, v)
628
629    def _read_att_array(self):
630        header = self.fp.read(4)
631        if header not in [ZERO, NC_ATTRIBUTE]:
632            raise ValueError("Unexpected header.")
633        count = self._unpack_int()
634
635        attributes = {}
636        for attr in range(count):
637            name = self._unpack_string().decode('latin1')
638            attributes[name] = self._read_att_values()
639        return attributes
640
641    def _read_var_array(self):
642        header = self.fp.read(4)
643        if header not in [ZERO, NC_VARIABLE]:
644            raise ValueError("Unexpected header.")
645
646        begin = 0
647        dtypes = {'names': [], 'formats': []}
648        rec_vars = []
649        count = self._unpack_int()
650        for var in range(count):
651            (name, dimensions, shape, attributes,
652             typecode, size, dtype_, begin_, vsize) = self._read_var()
653            # https://www.unidata.ucar.edu/software/netcdf/guide_toc.html
654            # Note that vsize is the product of the dimension lengths
655            # (omitting the record dimension) and the number of bytes
656            # per value (determined from the type), increased to the
657            # next multiple of 4, for each variable. If a record
658            # variable, this is the amount of space per record. The
659            # netCDF "record size" is calculated as the sum of the
660            # vsize's of all the record variables.
661            #
662            # The vsize field is actually redundant, because its value
663            # may be computed from other information in the header. The
664            # 32-bit vsize field is not large enough to contain the size
665            # of variables that require more than 2^32 - 4 bytes, so
666            # 2^32 - 1 is used in the vsize field for such variables.
667            if shape and shape[0] is None:  # record variable
668                rec_vars.append(name)
669                # The netCDF "record size" is calculated as the sum of
670                # the vsize's of all the record variables.
671                self.__dict__['_recsize'] += vsize
672                if begin == 0:
673                    begin = begin_
674                dtypes['names'].append(name)
675                dtypes['formats'].append(str(shape[1:]) + dtype_)
676
677                # Handle padding with a virtual variable.
678                if typecode in 'bch':
679                    actual_size = reduce(mul, (1,) + shape[1:]) * size
680                    padding = -actual_size % 4
681                    if padding:
682                        dtypes['names'].append('_padding_%d' % var)
683                        dtypes['formats'].append('(%d,)>b' % padding)
684
685                # Data will be set later.
686                data = None
687            else:  # not a record variable
688                # Calculate size to avoid problems with vsize (above)
689                a_size = reduce(mul, shape, 1) * size
690                if self.use_mmap:
691                    data = self._mm_buf[begin_:begin_+a_size].view(dtype=dtype_)
692                    data.shape = shape
693                else:
694                    pos = self.fp.tell()
695                    self.fp.seek(begin_)
696                    data = frombuffer(self.fp.read(a_size), dtype=dtype_
697                                      ).copy()
698                    data.shape = shape
699                    self.fp.seek(pos)
700
701            # Add variable.
702            self.variables[name] = netcdf_variable(
703                    data, typecode, size, shape, dimensions, attributes,
704                    maskandscale=self.maskandscale)
705
706        if rec_vars:
707            # Remove padding when only one record variable.
708            if len(rec_vars) == 1:
709                dtypes['names'] = dtypes['names'][:1]
710                dtypes['formats'] = dtypes['formats'][:1]
711
712            # Build rec array.
713            if self.use_mmap:
714                rec_array = self._mm_buf[begin:begin+self._recs*self._recsize].view(dtype=dtypes)
715                rec_array.shape = (self._recs,)
716            else:
717                pos = self.fp.tell()
718                self.fp.seek(begin)
719                rec_array = frombuffer(self.fp.read(self._recs*self._recsize),
720                                       dtype=dtypes).copy()
721                rec_array.shape = (self._recs,)
722                self.fp.seek(pos)
723
724            for var in rec_vars:
725                self.variables[var].__dict__['data'] = rec_array[var]
726
727    def _read_var(self):
728        name = self._unpack_string().decode('latin1')
729        dimensions = []
730        shape = []
731        dims = self._unpack_int()
732
733        for i in range(dims):
734            dimid = self._unpack_int()
735            dimname = self._dims[dimid]
736            dimensions.append(dimname)
737            dim = self.dimensions[dimname]
738            shape.append(dim)
739        dimensions = tuple(dimensions)
740        shape = tuple(shape)
741
742        attributes = self._read_att_array()
743        nc_type = self.fp.read(4)
744        vsize = self._unpack_int()
745        begin = [self._unpack_int, self._unpack_int64][self.version_byte-1]()
746
747        typecode, size = TYPEMAP[nc_type]
748        dtype_ = '>%s' % typecode
749
750        return name, dimensions, shape, attributes, typecode, size, dtype_, begin, vsize
751
752    def _read_att_values(self):
753        nc_type = self.fp.read(4)
754        n = self._unpack_int()
755
756        typecode, size = TYPEMAP[nc_type]
757
758        count = n*size
759        values = self.fp.read(int(count))
760        self.fp.read(-count % 4)  # read padding
761
762        if typecode != 'c':
763            values = frombuffer(values, dtype='>%s' % typecode).copy()
764            if values.shape == (1,):
765                values = values[0]
766        else:
767            values = values.rstrip(b'\x00')
768        return values
769
770    def _pack_begin(self, begin):
771        if self.version_byte == 1:
772            self._pack_int(begin)
773        elif self.version_byte == 2:
774            self._pack_int64(begin)
775
776    def _pack_int(self, value):
777        self.fp.write(array(value, '>i').tobytes())
778    _pack_int32 = _pack_int
779
780    def _unpack_int(self):
781        return int(frombuffer(self.fp.read(4), '>i')[0])
782    _unpack_int32 = _unpack_int
783
784    def _pack_int64(self, value):
785        self.fp.write(array(value, '>q').tobytes())
786
787    def _unpack_int64(self):
788        return frombuffer(self.fp.read(8), '>q')[0]
789
790    def _pack_string(self, s):
791        count = len(s)
792        self._pack_int(count)
793        self.fp.write(s.encode('latin1'))
794        self.fp.write(b'\x00' * (-count % 4))  # pad
795
796    def _unpack_string(self):
797        count = self._unpack_int()
798        s = self.fp.read(count).rstrip(b'\x00')
799        self.fp.read(-count % 4)  # read padding
800        return s
801
802
803class netcdf_variable:
804    """
805    A data object for netcdf files.
806
807    `netcdf_variable` objects are constructed by calling the method
808    `netcdf_file.createVariable` on the `netcdf_file` object. `netcdf_variable`
809    objects behave much like array objects defined in numpy, except that their
810    data resides in a file. Data is read by indexing and written by assigning
811    to an indexed subset; the entire array can be accessed by the index ``[:]``
812    or (for scalars) by using the methods `getValue` and `assignValue`.
813    `netcdf_variable` objects also have attribute `shape` with the same meaning
814    as for arrays, but the shape cannot be modified. There is another read-only
815    attribute `dimensions`, whose value is the tuple of dimension names.
816
817    All other attributes correspond to variable attributes defined in
818    the NetCDF file. Variable attributes are created by assigning to an
819    attribute of the `netcdf_variable` object.
820
821    Parameters
822    ----------
823    data : array_like
824        The data array that holds the values for the variable.
825        Typically, this is initialized as empty, but with the proper shape.
826    typecode : dtype character code
827        Desired data-type for the data array.
828    size : int
829        Desired element size for the data array.
830    shape : sequence of ints
831        The shape of the array. This should match the lengths of the
832        variable's dimensions.
833    dimensions : sequence of strings
834        The names of the dimensions used by the variable. Must be in the
835        same order of the dimension lengths given by `shape`.
836    attributes : dict, optional
837        Attribute values (any type) keyed by string names. These attributes
838        become attributes for the netcdf_variable object.
839    maskandscale : bool, optional
840        Whether to automatically scale and/or mask data based on attributes.
841        Default is False.
842
843
844    Attributes
845    ----------
846    dimensions : list of str
847        List of names of dimensions used by the variable object.
848    isrec, shape
849        Properties
850
851    See also
852    --------
853    isrec, shape
854
855    """
856    def __init__(self, data, typecode, size, shape, dimensions,
857                 attributes=None,
858                 maskandscale=False):
859        self.data = data
860        self._typecode = typecode
861        self._size = size
862        self._shape = shape
863        self.dimensions = dimensions
864        self.maskandscale = maskandscale
865
866        self._attributes = attributes or {}
867        for k, v in self._attributes.items():
868            self.__dict__[k] = v
869
870    def __setattr__(self, attr, value):
871        # Store user defined attributes in a separate dict,
872        # so we can save them to file later.
873        try:
874            self._attributes[attr] = value
875        except AttributeError:
876            pass
877        self.__dict__[attr] = value
878
879    def isrec(self):
880        """Returns whether the variable has a record dimension or not.
881
882        A record dimension is a dimension along which additional data could be
883        easily appended in the netcdf data structure without much rewriting of
884        the data file. This attribute is a read-only property of the
885        `netcdf_variable`.
886
887        """
888        return bool(self.data.shape) and not self._shape[0]
889    isrec = property(isrec)
890
891    def shape(self):
892        """Returns the shape tuple of the data variable.
893
894        This is a read-only attribute and can not be modified in the
895        same manner of other numpy arrays.
896        """
897        return self.data.shape
898    shape = property(shape)
899
900    def getValue(self):
901        """
902        Retrieve a scalar value from a `netcdf_variable` of length one.
903
904        Raises
905        ------
906        ValueError
907            If the netcdf variable is an array of length greater than one,
908            this exception will be raised.
909
910        """
911        return self.data.item()
912
913    def assignValue(self, value):
914        """
915        Assign a scalar value to a `netcdf_variable` of length one.
916
917        Parameters
918        ----------
919        value : scalar
920            Scalar value (of compatible type) to assign to a length-one netcdf
921            variable. This value will be written to file.
922
923        Raises
924        ------
925        ValueError
926            If the input is not a scalar, or if the destination is not a length-one
927            netcdf variable.
928
929        """
930        if not self.data.flags.writeable:
931            # Work-around for a bug in NumPy.  Calling itemset() on a read-only
932            # memory-mapped array causes a seg. fault.
933            # See NumPy ticket #1622, and SciPy ticket #1202.
934            # This check for `writeable` can be removed when the oldest version
935            # of NumPy still supported by scipy contains the fix for #1622.
936            raise RuntimeError("variable is not writeable")
937
938        self.data.itemset(value)
939
940    def typecode(self):
941        """
942        Return the typecode of the variable.
943
944        Returns
945        -------
946        typecode : char
947            The character typecode of the variable (e.g., 'i' for int).
948
949        """
950        return self._typecode
951
952    def itemsize(self):
953        """
954        Return the itemsize of the variable.
955
956        Returns
957        -------
958        itemsize : int
959            The element size of the variable (e.g., 8 for float64).
960
961        """
962        return self._size
963
964    def __getitem__(self, index):
965        if not self.maskandscale:
966            return self.data[index]
967
968        data = self.data[index].copy()
969        missing_value = self._get_missing_value()
970        data = self._apply_missing_value(data, missing_value)
971        scale_factor = self._attributes.get('scale_factor')
972        add_offset = self._attributes.get('add_offset')
973        if add_offset is not None or scale_factor is not None:
974            data = data.astype(np.float64)
975        if scale_factor is not None:
976            data = data * scale_factor
977        if add_offset is not None:
978            data += add_offset
979
980        return data
981
982    def __setitem__(self, index, data):
983        if self.maskandscale:
984            missing_value = (
985                    self._get_missing_value() or
986                    getattr(data, 'fill_value', 999999))
987            self._attributes.setdefault('missing_value', missing_value)
988            self._attributes.setdefault('_FillValue', missing_value)
989            data = ((data - self._attributes.get('add_offset', 0.0)) /
990                    self._attributes.get('scale_factor', 1.0))
991            data = np.ma.asarray(data).filled(missing_value)
992            if self._typecode not in 'fd' and data.dtype.kind == 'f':
993                data = np.round(data)
994
995        # Expand data for record vars?
996        if self.isrec:
997            if isinstance(index, tuple):
998                rec_index = index[0]
999            else:
1000                rec_index = index
1001            if isinstance(rec_index, slice):
1002                recs = (rec_index.start or 0) + len(data)
1003            else:
1004                recs = rec_index + 1
1005            if recs > len(self.data):
1006                shape = (recs,) + self._shape[1:]
1007                # Resize in-place does not always work since
1008                # the array might not be single-segment
1009                try:
1010                    self.data.resize(shape)
1011                except ValueError:
1012                    self.__dict__['data'] = np.resize(self.data, shape).astype(self.data.dtype)
1013        self.data[index] = data
1014
1015    def _default_encoded_fill_value(self):
1016        """
1017        The default encoded fill-value for this Variable's data type.
1018        """
1019        nc_type = REVERSE[self.typecode(), self.itemsize()]
1020        return FILLMAP[nc_type]
1021
1022    def _get_encoded_fill_value(self):
1023        """
1024        Returns the encoded fill value for this variable as bytes.
1025
1026        This is taken from either the _FillValue attribute, or the default fill
1027        value for this variable's data type.
1028        """
1029        if '_FillValue' in self._attributes:
1030            fill_value = np.array(self._attributes['_FillValue'],
1031                                  dtype=self.data.dtype).tobytes()
1032            if len(fill_value) == self.itemsize():
1033                return fill_value
1034            else:
1035                return self._default_encoded_fill_value()
1036        else:
1037            return self._default_encoded_fill_value()
1038
1039    def _get_missing_value(self):
1040        """
1041        Returns the value denoting "no data" for this variable.
1042
1043        If this variable does not have a missing/fill value, returns None.
1044
1045        If both _FillValue and missing_value are given, give precedence to
1046        _FillValue. The netCDF standard gives special meaning to _FillValue;
1047        missing_value is  just used for compatibility with old datasets.
1048        """
1049
1050        if '_FillValue' in self._attributes:
1051            missing_value = self._attributes['_FillValue']
1052        elif 'missing_value' in self._attributes:
1053            missing_value = self._attributes['missing_value']
1054        else:
1055            missing_value = None
1056
1057        return missing_value
1058
1059    @staticmethod
1060    def _apply_missing_value(data, missing_value):
1061        """
1062        Applies the given missing value to the data array.
1063
1064        Returns a numpy.ma array, with any value equal to missing_value masked
1065        out (unless missing_value is None, in which case the original array is
1066        returned).
1067        """
1068
1069        if missing_value is None:
1070            newdata = data
1071        else:
1072            try:
1073                missing_value_isnan = np.isnan(missing_value)
1074            except (TypeError, NotImplementedError):
1075                # some data types (e.g., characters) cannot be tested for NaN
1076                missing_value_isnan = False
1077
1078            if missing_value_isnan:
1079                mymask = np.isnan(data)
1080            else:
1081                mymask = (data == missing_value)
1082
1083            newdata = np.ma.masked_where(mymask, data)
1084
1085        return newdata
1086
1087
1088NetCDFFile = netcdf_file
1089NetCDFVariable = netcdf_variable
1090