1# -*- coding: utf-8 -*-
2"""
3MSEED bindings to ObsPy core module.
4"""
5from __future__ import (absolute_import, division, print_function,
6                        unicode_literals)
7from future.builtins import *  # NOQA
8from future.utils import native_str
9
10import ctypes as C  # NOQA
11import io
12import os
13import warnings
14from struct import pack
15
16import numpy as np
17
18from obspy import Stream, Trace, UTCDateTime
19from obspy.core.compatibility import from_buffer
20from obspy.core.util import NATIVE_BYTEORDER
21from . import (util, InternalMSEEDError, ObsPyMSEEDFilesizeTooSmallError,
22               ObsPyMSEEDFilesizeTooLargeError, ObsPyMSEEDError)
23from .headers import (DATATYPES, ENCODINGS, HPTERROR, HPTMODULUS, SAMPLETYPE,
24                      SEED_CONTROL_HEADERS, UNSUPPORTED_ENCODINGS,
25                      VALID_CONTROL_HEADERS, VALID_RECORD_LENGTHS, Selections,
26                      SelectTime, Blkt100S, Blkt1001S, clibmseed)
27
28
29def _is_mseed(filename):
30    """
31    Checks whether a file is Mini-SEED/full SEED or not.
32
33    :type filename: str
34    :param filename: Mini-SEED/full SEED file to be checked.
35    :rtype: bool
36    :return: ``True`` if a Mini-SEED file.
37
38    This method only reads the first seven bytes of the file and checks
39    whether its a Mini-SEED or full SEED file.
40
41    It also is true for fullSEED files because libmseed can read the data
42    part of fullSEED files. If the method finds a fullSEED file it also
43    checks if it has a data part and returns False otherwise.
44
45    Thus it cannot be used to validate a Mini-SEED or SEED file.
46    """
47    # Open filehandler or use an existing file like object.
48    if not hasattr(filename, 'read'):
49        file_size = os.path.getsize(filename)
50        with io.open(filename, 'rb') as fh:
51            return __is_mseed(fh, file_size=file_size)
52    else:
53        initial_pos = filename.tell()
54        try:
55            if hasattr(filename, "getbuffer"):
56                file_size = filename.getbuffer().nbytes
57            try:
58                file_size = os.fstat(filename.fileno()).st_size
59            except Exception:
60                _p = filename.tell()
61                filename.seek(0, 2)
62                file_size = filename.tell()
63                filename.seek(_p, 0)
64            return __is_mseed(filename, file_size)
65        finally:
66            # Reset pointer.
67            filename.seek(initial_pos, 0)
68
69
70def __is_mseed(fp, file_size):  # NOQA
71    """
72    Internal version of _is_mseed working only with open file-like object.
73    """
74    header = fp.read(7)
75    # File has less than 7 characters
76    if len(header) != 7:
77        return False
78    # Sequence number must contains a single number or be empty
79    seqnr = header[0:6].replace(b'\x00', b' ').strip()
80    if not seqnr.isdigit() and seqnr != b'':
81        # This might be a completely empty sequence - in that case jump 128
82        # bytes and try again.
83        fp.seek(-7, 1)
84        try:
85            _t = fp.read(128).decode().strip()
86        except Exception:
87            return False
88        if not _t:
89            return __is_mseed(fp=fp, file_size=file_size)
90        return False
91    # Check for any valid control header types.
92    if header[6:7] in [b'D', b'R', b'Q', b'M']:
93        return True
94    elif header[6:7] == b" ":
95        # If empty, it might be a noise record. Check the rest of 128 bytes
96        # (min record size) and try again.
97        try:
98            _t = fp.read(128 - 7).decode().strip()
99        except Exception:
100            return False
101        if not _t:
102            return __is_mseed(fp=fp, file_size=file_size)
103        return False
104    # Check if Full-SEED
105    elif header[6:7] != b'V':
106        return False
107    # Parse the whole file and check whether it has has a data record.
108    fp.seek(1, 1)
109    _i = 0
110    # search for blockettes 010 or 008
111    while True:
112        if fp.read(3) in [b'010', b'008']:
113            break
114        # the next for bytes are the record length
115        # as we are currently at position 7 (fp.read(3) fp.read(4))
116        # we need to subtract this first before we seek
117        # to the appropriate position
118        try:
119            fp.seek(int(fp.read(4)) - 7, 1)
120        except Exception:
121            return False
122        _i += 1
123        # break after 3 cycles
124        if _i == 3:
125            return False
126
127    # Try to get a record length.
128    fp.seek(8, 1)
129    try:
130        record_length = pow(2, int(fp.read(2)))
131    except Exception:
132        return False
133
134    # Jump to the second record.
135    fp.seek(record_length + 6, 0)
136    # Loop over all records and return True if one record is a data
137    # record
138    while fp.tell() < file_size:
139        flag = fp.read(1)
140        if flag in [b'D', b'R', b'Q', b'M']:
141            return True
142        fp.seek(record_length - 1, 1)
143    return False
144
145
146def _read_mseed(mseed_object, starttime=None, endtime=None, headonly=False,
147                sourcename=None, reclen=None, details=False,
148                header_byteorder=None, verbose=None, **kwargs):
149    """
150    Reads a Mini-SEED file and returns a Stream object.
151
152    .. warning::
153        This function should NOT be called directly, it registers via the
154        ObsPy :func:`~obspy.core.stream.read` function, call this instead.
155
156    :param mseed_object: Filename or open file like object that contains the
157        binary Mini-SEED data. Any object that provides a read() method will be
158        considered to be a file like object.
159    :type starttime: :class:`~obspy.core.utcdatetime.UTCDateTime`
160    :param starttime: Only read data samples after or at the start time.
161    :type endtime: :class:`~obspy.core.utcdatetime.UTCDateTime`
162    :param endtime: Only read data samples before or at the end time.
163    :param headonly: Determines whether or not to unpack the data or just
164        read the headers.
165    :type sourcename: str
166    :param sourcename: Only read data with matching SEED ID (can contain
167        wildcards "?" and "*", e.g. "BW.UH2.*" or "*.??Z"). Defaults to
168        ``None``.
169    :param reclen: If it is None, it will be automatically determined for every
170        record. If it is known, just set it to the record length in bytes which
171        will increase the reading speed slightly.
172    :type details: bool, optional
173    :param details: If ``True`` read additional information: timing quality
174        and availability of calibration information.
175        Note, that the traces are then also split on these additional
176        information. Thus the number of traces in a stream will change.
177        Details are stored in the mseed stats AttribDict of each trace.
178        ``False`` specifies for both cases, that this information is not
179        available. ``blkt1001.timing_quality`` specifies the timing quality
180        from 0 to 100 [%]. ``calibration_type`` specifies the type of available
181        calibration information blockettes:
182
183        - ``1``: Step Calibration (Blockette 300)
184        - ``2``: Sine Calibration (Blockette 310)
185        - ``3``: Pseudo-random Calibration (Blockette 320)
186        - ``4``: Generic Calibration  (Blockette 390)
187        - ``-2``: Calibration Abort (Blockette 395)
188
189    :type header_byteorder: int or str, optional
190    :param header_byteorder: Must be either ``0`` or ``'<'`` for LSBF or
191        little-endian, ``1`` or ``'>'`` for MBF or big-endian. ``'='`` is the
192        native byte order. Used to enforce the header byte order. Useful in
193        some rare cases where the automatic byte order detection fails.
194
195    .. rubric:: Example
196
197    >>> from obspy import read
198    >>> st = read("/path/to/two_channels.mseed")
199    >>> print(st)  # doctest: +ELLIPSIS
200    2 Trace(s) in Stream:
201    BW.UH3..EHE | 2010-06-20T00:00:00.279999Z - ... | 200.0 Hz, 386 samples
202    BW.UH3..EHZ | 2010-06-20T00:00:00.279999Z - ... | 200.0 Hz, 386 samples
203
204    >>> from obspy import UTCDateTime
205    >>> st = read("/path/to/two_channels.mseed",
206    ...           starttime=UTCDateTime("2010-06-20T00:00:01"),
207    ...           sourcename="*.?HZ")
208    >>> print(st)  # doctest: +ELLIPSIS
209    1 Trace(s) in Stream:
210    BW.UH3..EHZ | 2010-06-20T00:00:00.999999Z - ... | 200.0 Hz, 242 samples
211
212    Read with ``details=True`` to read more details of the file if present.
213
214    >>> st = read("/path/to/timingquality.mseed", details=True)
215    >>> print(st[0].stats.mseed.blkt1001.timing_quality)
216    55
217
218    ``False`` means that the necessary information could not be found in the
219    file.
220
221    >>> print(st[0].stats.mseed.calibration_type)
222    False
223
224    Note that each change in timing quality from record to record may trigger a
225    new Trace object to be created so the Stream object may contain many Trace
226    objects if ``details=True`` is used.
227
228    >>> print(len(st))
229    101
230    """
231    # Parse the headonly and reclen flags.
232    if headonly is True:
233        unpack_data = 0
234    else:
235        unpack_data = 1
236    if reclen is None:
237        reclen = -1
238    elif reclen not in VALID_RECORD_LENGTHS:
239        msg = 'Invalid record length. Autodetection will be used.'
240        warnings.warn(msg)
241        reclen = -1
242
243    # Determine the byte order.
244    if header_byteorder == "=":
245        header_byteorder = NATIVE_BYTEORDER
246
247    if header_byteorder is None:
248        header_byteorder = -1
249    elif header_byteorder in [0, "0", "<"]:
250        header_byteorder = 0
251    elif header_byteorder in [1, "1", ">"]:
252        header_byteorder = 1
253
254    # Parse some information about the file.
255    if header_byteorder == 0:
256        bo = "<"
257    elif header_byteorder > 0:
258        bo = ">"
259    else:
260        bo = None
261
262    # Determine total size. Either its a file-like object.
263    if hasattr(mseed_object, "tell") and hasattr(mseed_object, "seek"):
264        cur_pos = mseed_object.tell()
265        mseed_object.seek(0, 2)
266        length = mseed_object.tell() - cur_pos
267        mseed_object.seek(cur_pos, 0)
268    # Or a file name.
269    else:
270        length = os.path.getsize(mseed_object)
271
272    if length < 128:
273        msg = "The smallest possible mini-SEED record is made up of 128 " \
274              "bytes. The passed buffer or file contains only %i." % length
275        raise ObsPyMSEEDFilesizeTooSmallError(msg)
276    elif length > 2 ** 31:
277        msg = ("ObsPy can currently not directly read mini-SEED files that "
278               "are larger than 2^31 bytes (2048 MiB). To still read it, "
279               "please read the file in chunks as documented here: "
280               "https://github.com/obspy/obspy/pull/1419"
281               "#issuecomment-221582369")
282        raise ObsPyMSEEDFilesizeTooLargeError(msg)
283
284    info = util.get_record_information(mseed_object, endian=bo)
285
286    # Map the encoding to a readable string value.
287    if "encoding" not in info:
288        # Hopefully detected by libmseed.
289        info["encoding"] = None
290    elif info["encoding"] in ENCODINGS:
291        info['encoding'] = ENCODINGS[info['encoding']][0]
292    elif info["encoding"] in UNSUPPORTED_ENCODINGS:
293        msg = ("Encoding '%s' (%i) is not supported by ObsPy. Please send "
294               "the file to the ObsPy developers so that we can add "
295               "support for it.") % \
296            (UNSUPPORTED_ENCODINGS[info['encoding']], info['encoding'])
297        raise ValueError(msg)
298    else:
299        msg = "Encoding '%i' is not a valid MiniSEED encoding." % \
300            info['encoding']
301        raise ValueError(msg)
302
303    record_length = info["record_length"]
304
305    # Only keep information relevant for the whole file.
306    info = {'filesize': info['filesize']}
307
308    # If it's a file name just read it.
309    if isinstance(mseed_object, (str, native_str)):
310        # Read to NumPy array which is used as a buffer.
311        bfr_np = np.fromfile(mseed_object, dtype=np.int8)
312    elif hasattr(mseed_object, 'read'):
313        bfr_np = from_buffer(mseed_object.read(), dtype=np.int8)
314
315    # Search for data records and pass only the data part to the underlying C
316    # routine.
317    offset = 0
318    # 0 to 9 are defined in a row in the ASCII charset.
319    min_ascii = ord('0')
320
321    # Small function to check whether an array of ASCII values contains only
322    # digits.
323    def isdigit(x):
324        return True if (x - min_ascii).max() <= 9 else False
325
326    while True:
327        # This should never happen
328        if (isdigit(bfr_np[offset:offset + 6]) is False) or \
329                (bfr_np[offset + 6] not in VALID_CONTROL_HEADERS):
330            msg = 'Not a valid (Mini-)SEED file'
331            raise Exception(msg)
332        elif bfr_np[offset + 6] in SEED_CONTROL_HEADERS:
333            offset += record_length
334            continue
335        break
336    bfr_np = bfr_np[offset:]
337    buflen = len(bfr_np)
338
339    # If no selection is given pass None to the C function.
340    if starttime is None and endtime is None and sourcename is None:
341        selections = None
342    else:
343        select_time = SelectTime()
344        selections = Selections()
345        selections.timewindows.contents = select_time
346        if starttime is not None:
347            if not isinstance(starttime, UTCDateTime):
348                msg = 'starttime needs to be a UTCDateTime object'
349                raise ValueError(msg)
350            selections.timewindows.contents.starttime = \
351                util._convert_datetime_to_mstime(starttime)
352        else:
353            # HPTERROR results in no starttime.
354            selections.timewindows.contents.starttime = HPTERROR
355        if endtime is not None:
356            if not isinstance(endtime, UTCDateTime):
357                msg = 'endtime needs to be a UTCDateTime object'
358                raise ValueError(msg)
359            selections.timewindows.contents.endtime = \
360                util._convert_datetime_to_mstime(endtime)
361        else:
362            # HPTERROR results in no starttime.
363            selections.timewindows.contents.endtime = HPTERROR
364        if sourcename is not None:
365            if not isinstance(sourcename, (str, native_str)):
366                msg = 'sourcename needs to be a string'
367                raise ValueError(msg)
368            # libmseed uses underscores as separators and allows filtering
369            # after the dataquality which is disabled here to not confuse
370            # users. (* == all data qualities)
371            selections.srcname = (sourcename.replace('.', '_') + '_*').\
372                encode('ascii', 'ignore')
373        else:
374            selections.srcname = b'*'
375    all_data = []
376
377    # Use a callback function to allocate the memory and keep track of the
378    # data.
379    def allocate_data(samplecount, sampletype):
380        # Enhanced sanity checking for libmseed 2.10 can result in the
381        # sampletype not being set. Just return an empty array in this case.
382        if sampletype == b"\x00":
383            data = np.empty(0)
384        else:
385            data = np.empty(samplecount, dtype=DATATYPES[sampletype])
386        all_data.append(data)
387        return data.ctypes.data
388    # XXX: Do this properly!
389    # Define Python callback function for use in C function. Return a long so
390    # it hopefully works on 32 and 64 bit systems.
391    alloc_data = C.CFUNCTYPE(C.c_longlong, C.c_int, C.c_char)(allocate_data)
392
393    try:
394        verbose = int(verbose)
395    except Exception:
396        verbose = 0
397
398    clibmseed.verbose = bool(verbose)
399    try:
400        lil = clibmseed.readMSEEDBuffer(
401            bfr_np, buflen, selections, C.c_int8(unpack_data),
402            reclen, C.c_int8(verbose), C.c_int8(details), header_byteorder,
403            alloc_data)
404    except InternalMSEEDError as e:
405        msg = e.args[0]
406        if offset and offset in str(e):
407            # Append the offset of the full SEED header if necessary. That way
408            # the C code does not have to deal with it.
409            if offset and "offset" in msg:
410                msg = ("%s\nThe file contains a %i byte dataless part at the "
411                       "beginning. Make sure to add that to the reported "
412                       "offset to get the actual location in the file." % (
413                           msg, offset))
414                raise InternalMSEEDError(msg)
415        else:
416            raise
417    finally:
418        # Make sure to reset the verbosity.
419        clibmseed.verbose = True
420
421    del selections
422
423    traces = []
424    try:
425        current_id = lil.contents
426    # Return stream if not traces are found.
427    except ValueError:
428        clibmseed.lil_free(lil)
429        del lil
430        return Stream()
431
432    while True:
433        # Init header with the essential information.
434        header = {'network': current_id.network.strip(),
435                  'station': current_id.station.strip(),
436                  'location': current_id.location.strip(),
437                  'channel': current_id.channel.strip(),
438                  'mseed': {'dataquality': current_id.dataquality}}
439        # Loop over segments.
440        try:
441            current_segment = current_id.firstSegment.contents
442        except ValueError:
443            break
444        while True:
445            header['sampling_rate'] = current_segment.samprate
446            header['starttime'] = \
447                util._convert_mstime_to_datetime(current_segment.starttime)
448            header['mseed']['number_of_records'] = current_segment.recordcnt
449            header['mseed']['encoding'] = \
450                ENCODINGS[current_segment.encoding][0]
451            header['mseed']['byteorder'] = \
452                "<" if current_segment.byteorder == 0 else ">"
453            header['mseed']['record_length'] = current_segment.reclen
454            if details:
455                timing_quality = current_segment.timing_quality
456                if timing_quality == 0xFF:  # 0xFF is mask for not known timing
457                    timing_quality = False
458                header['mseed']['blkt1001'] = {}
459                header['mseed']['blkt1001']['timing_quality'] = timing_quality
460                header['mseed']['calibration_type'] = \
461                    current_segment.calibration_type \
462                    if current_segment.calibration_type != -1 else False
463
464            if headonly is False:
465                # The data always will be in sequential order.
466                data = all_data.pop(0)
467                header['npts'] = len(data)
468            else:
469                data = np.array([])
470                header['npts'] = current_segment.samplecnt
471            # Make sure to init the number of samples.
472            # Py3k: convert to unicode
473            header['mseed'] = dict((k, v.decode())
474                                   if isinstance(v, bytes) else (k, v)
475                                   for k, v in header['mseed'].items())
476            header = dict((k, util._decode_header_field(k, v))
477                          if isinstance(v, bytes) else (k, v)
478                          for k, v in header.items())
479            trace = Trace(header=header, data=data)
480            # Append global information.
481            for key, value in info.items():
482                setattr(trace.stats.mseed, key, value)
483            traces.append(trace)
484            # A Null pointer access results in a ValueError
485            try:
486                current_segment = current_segment.next.contents
487            except ValueError:
488                break
489        try:
490            current_id = current_id.next.contents
491        except ValueError:
492            break
493
494    clibmseed.lil_free(lil)  # NOQA
495    del lil  # NOQA
496    return Stream(traces=traces)
497
498
499def _np_copy_astype(data, dtype):
500    """
501    Helper function to copy data, replacing `trace.data.copy().astype(dtype)`
502
503    This is done to avoid copying data in memory twice that could happen due to
504    an API change in numpy's `astype` method (`copy` kwarg).
505    This helper workaround function can be dropped once bumping minimum numpy
506    version to >=1.7.0
507    """
508    try:
509        return data.astype(dtype, copy=True)
510    except TypeError:
511        return data.copy().astype(dtype)
512
513
514def _write_mseed(stream, filename, encoding=None, reclen=None, byteorder=None,
515                 sequence_number=None, flush=True, verbose=0, **_kwargs):
516    """
517    Write Mini-SEED file from a Stream object.
518
519    .. warning::
520        This function should NOT be called directly, it registers via the
521        the :meth:`~obspy.core.stream.Stream.write` method of an
522        ObsPy :class:`~obspy.core.stream.Stream` object, call this instead.
523
524    :type stream: :class:`~obspy.core.stream.Stream`
525    :param stream: A Stream object.
526    :type filename: str
527    :param filename: Name of the output file or a file-like object.
528    :type encoding: int or str, optional
529    :param encoding: Should be set to one of the following supported Mini-SEED
530        data encoding formats: ``ASCII`` (``0``)*, ``INT16`` (``1``),
531        ``INT32`` (``3``), ``FLOAT32`` (``4``)*, ``FLOAT64`` (``5``)*,
532        ``STEIM1`` (``10``) and ``STEIM2`` (``11``)*. If no encoding is given
533        it will be derived from the dtype of the data and the appropriate
534        default encoding (depicted with an asterix) will be chosen.
535    :type reclen: int, optional
536    :param reclen: Should be set to the desired data record length in bytes
537        which must be expressible as 2 raised to the power of X where X is
538        between (and including) 8 to 20.
539        Defaults to 4096
540    :type byteorder: int or str, optional
541    :param byteorder: Must be either ``0`` or ``'<'`` for LSBF or
542        little-endian, ``1`` or ``'>'`` for MBF or big-endian. ``'='`` is the
543        native byte order. If ``-1`` it will be passed directly to libmseed
544        which will also default it to big endian. Defaults to big endian.
545    :type sequence_number: int, optional
546    :param sequence_number: Must be an integer ranging between 1 and 999999.
547        Represents the sequence count of the first record of each Trace.
548        Defaults to 1.
549    :type flush: bool, optional
550    :param flush: If ``True``, all data will be packed into records. If
551        ``False`` new records will only be created when there is enough data to
552        completely fill a record. Be careful with this. If in doubt, choose
553        ``True`` which is also the default value.
554    :type verbose: int, optional
555    :param verbose: Controls verbosity, a value of ``0`` will result in no
556        diagnostic output.
557
558    .. note::
559        The ``reclen``, ``encoding``, ``byteorder`` and ``sequence_count``
560        keyword arguments can be set in the ``stats.mseed`` of
561        each :class:`~obspy.core.trace.Trace` as well as ``kwargs`` of this
562        function. If both are given the ``kwargs`` will be used.
563
564        The ``stats.mseed.blkt1001.timing_quality`` value will also be written
565        if it is set.
566
567        The ``stats.mseed.blkt1001.timing_quality`` value will also be written
568        if it is set.
569
570    .. rubric:: Example
571
572    >>> from obspy import read
573    >>> st = read()
574    >>> st.write('filename.mseed', format='MSEED')  # doctest: +SKIP
575    """
576    # Map flush and verbose flags.
577    if flush:
578        flush = 1
579    else:
580        flush = 0
581
582    if not verbose:
583        verbose = 0
584    if verbose is True:
585        verbose = 1
586
587    # Some sanity checks for the keyword arguments.
588    if reclen is not None and reclen not in VALID_RECORD_LENGTHS:
589        msg = 'Invalid record length. The record length must be a value\n' + \
590            'of 2 to the power of X where 8 <= X <= 20.'
591        raise ValueError(msg)
592    if byteorder is not None and byteorder not in [0, 1, -1]:
593        if byteorder == '=':
594            byteorder = NATIVE_BYTEORDER
595        # If not elif because NATIVE_BYTEORDER is '<' or '>'.
596        if byteorder == '<':
597            byteorder = 0
598        elif byteorder == '>':
599            byteorder = 1
600        else:
601            msg = "Invalid byte order. It must be either '<', '>', '=', " + \
602                  "0, 1 or -1"
603            raise ValueError(msg)
604
605    if encoding is not None:
606        encoding = util._convert_and_check_encoding_for_writing(encoding)
607
608    if sequence_number is not None:
609        # Check sequence number type
610        try:
611            sequence_number = int(sequence_number)
612            # Check sequence number value
613            if sequence_number < 1 or sequence_number > 999999:
614                raise ValueError("Sequence number out of range. It must be " +
615                                 " between 1 and 999999.")
616        except (TypeError, ValueError):
617            msg = "Invalid sequence number. It must be an integer ranging " +\
618                  "from 1 to 999999."
619            raise ValueError(msg)
620
621    trace_attributes = []
622    use_blkt_1001 = False
623
624    # The data might need to be modified. To not modify the input data keep
625    # references of which data to finally write.
626    trace_data = []
627    # Loop over every trace and figure out the correct settings.
628    for _i, trace in enumerate(stream):
629        # Create temporary dict for storing information while writing.
630        trace_attr = {}
631        trace_attributes.append(trace_attr)
632
633        # Figure out whether or not to use Blockette 1001. This check is done
634        # once to ensure that Blockette 1001 is either written for every record
635        # in the file or for none. It checks the starttime, the sampling rate
636        # and the timing quality. If starttime or sampling rate has a precision
637        # of more than 100 microseconds, or if timing quality is set, \
638        # Blockette 1001 will be written for every record.
639        starttime = util._convert_datetime_to_mstime(trace.stats.starttime)
640        if starttime % 100 != 0 or (
641                trace.stats.sampling_rate and
642                (1.0 / trace.stats.sampling_rate * HPTMODULUS) % 100 != 0):
643            use_blkt_1001 = True
644
645        if hasattr(trace.stats, 'mseed') and \
646           hasattr(trace.stats['mseed'], 'blkt1001') and \
647           hasattr(trace.stats['mseed']['blkt1001'], 'timing_quality'):
648
649            timing_quality = trace.stats['mseed']['blkt1001']['timing_quality']
650            # Check timing quality type
651            try:
652                timing_quality = int(timing_quality)
653                if timing_quality < 0 or timing_quality > 100:
654                    raise ValueError("Timing quality out of range. It must be "
655                                     "between 0 and 100.")
656            except ValueError:
657                msg = "Invalid timing quality in Stream[%i].stats." % _i + \
658                    "mseed.timing_quality. It must be an integer ranging" + \
659                    " from 0 to 100"
660                raise ValueError(msg)
661
662            trace_attr['timing_quality'] = timing_quality
663            use_blkt_1001 = True
664        else:
665            trace_attr['timing_quality'] = timing_quality = 0
666
667        if sequence_number is not None:
668            trace_attr['sequence_number'] = sequence_number
669        elif hasattr(trace.stats, 'mseed') and \
670                hasattr(trace.stats['mseed'], 'sequence_number'):
671
672            sequence_number = trace.stats['mseed']['sequence_number']
673            # Check sequence number type
674            try:
675                sequence_number = int(sequence_number)
676                # Check sequence number value
677                if sequence_number < 1 or sequence_number > 999999:
678                    raise ValueError("Sequence number out of range in " +
679                                     "Stream[%i].stats. It must be between " +
680                                     "1 and 999999.")
681            except (TypeError, ValueError):
682                msg = "Invalid sequence number in Stream[%i].stats." % _i +\
683                      "mseed.sequence_number. It must be an integer ranging" +\
684                      " from 1 to 999999."
685                raise ValueError(msg)
686            trace_attr['sequence_number'] = sequence_number
687        else:
688            trace_attr['sequence_number'] = sequence_number = 1
689
690        # Set data quality to indeterminate (= D) if it is not already set.
691        try:
692            trace_attr['dataquality'] = \
693                trace.stats['mseed']['dataquality'].upper()
694        except Exception:
695            trace_attr['dataquality'] = 'D'
696        # Sanity check for the dataquality to get a nice Python exception
697        # instead of a C error.
698        if trace_attr['dataquality'] not in ['D', 'R', 'Q', 'M']:
699            msg = 'Invalid dataquality in Stream[%i].stats' % _i + \
700                  '.mseed.dataquality\n' + \
701                  'The dataquality for Mini-SEED must be either D, R, Q ' + \
702                  'or M. See the SEED manual for further information.'
703            raise ValueError(msg)
704
705        # Check that data is of the right type.
706        if not isinstance(trace.data, np.ndarray):
707            msg = "Unsupported data type %s" % type(trace.data) + \
708                  " for Stream[%i].data." % _i
709            raise ValueError(msg)
710
711        # Check if ndarray is contiguous (see #192, #193)
712        if not trace.data.flags.c_contiguous:
713            msg = "Detected non contiguous data array in Stream[%i]" % _i + \
714                  ".data. Trying to fix array."
715            warnings.warn(msg)
716            trace.data = np.ascontiguousarray(trace.data)
717
718        # Handle the record length.
719        if reclen is not None:
720            trace_attr['reclen'] = reclen
721        elif hasattr(trace.stats, 'mseed') and \
722                hasattr(trace.stats.mseed, 'record_length'):
723            if trace.stats.mseed.record_length in VALID_RECORD_LENGTHS:
724                trace_attr['reclen'] = trace.stats.mseed.record_length
725            else:
726                msg = 'Invalid record length in Stream[%i].stats.' % _i + \
727                      'mseed.reclen.\nThe record length must be a value ' + \
728                      'of 2 to the power of X where 8 <= X <= 20.'
729                raise ValueError(msg)
730        else:
731            trace_attr['reclen'] = 4096
732
733        # Handle the byte order.
734        if byteorder is not None:
735            trace_attr['byteorder'] = byteorder
736        elif hasattr(trace.stats, 'mseed') and \
737                hasattr(trace.stats.mseed, 'byteorder'):
738            if trace.stats.mseed.byteorder in [0, 1, -1]:
739                trace_attr['byteorder'] = trace.stats.mseed.byteorder
740            elif trace.stats.mseed.byteorder == '=':
741                if NATIVE_BYTEORDER == '<':
742                    trace_attr['byteorder'] = 0
743                else:
744                    trace_attr['byteorder'] = 1
745            elif trace.stats.mseed.byteorder == '<':
746                trace_attr['byteorder'] = 0
747            elif trace.stats.mseed.byteorder == '>':
748                trace_attr['byteorder'] = 1
749            else:
750                msg = "Invalid byteorder in Stream[%i].stats." % _i + \
751                    "mseed.byteorder. It must be either '<', '>', '='," + \
752                    " 0, 1 or -1"
753                raise ValueError(msg)
754        else:
755            trace_attr['byteorder'] = 1
756        if trace_attr['byteorder'] == -1:
757            if NATIVE_BYTEORDER == '<':
758                trace_attr['byteorder'] = 0
759            else:
760                trace_attr['byteorder'] = 1
761
762        # Handle the encoding.
763        trace_attr['encoding'] = None
764        # If encoding arrives here it is already guaranteed to be a valid
765        # integer encoding.
766        if encoding is not None:
767            # Check if the dtype for all traces is compatible with the enforced
768            # encoding.
769            ident, _, dtype, _ = ENCODINGS[encoding]
770            if trace.data.dtype.type != dtype:
771                msg = """
772                    Wrong dtype for Stream[%i].data for encoding %s.
773                    Please change the dtype of your data or use an appropriate
774                    encoding. See the obspy.io.mseed documentation for more
775                    information.
776                    """ % (_i, ident)
777                raise Exception(msg)
778            trace_attr['encoding'] = encoding
779        elif hasattr(trace.stats, 'mseed') and hasattr(trace.stats.mseed,
780                                                       'encoding'):
781            trace_attr["encoding"] = \
782                util._convert_and_check_encoding_for_writing(
783                    trace.stats.mseed.encoding)
784            # Check if the encoding matches the data's dtype.
785            if trace.data.dtype.type != ENCODINGS[trace_attr['encoding']][2]:
786                msg = 'The encoding specified in ' + \
787                      'trace.stats.mseed.encoding does not match the ' + \
788                      'dtype of the data.\nA suitable encoding will ' + \
789                      'be chosen.'
790                warnings.warn(msg, UserWarning)
791                trace_attr['encoding'] = None
792        # automatically detect encoding if no encoding is given.
793        if trace_attr['encoding'] is None:
794            if trace.data.dtype.type == np.int32:
795                trace_attr['encoding'] = 11
796            elif trace.data.dtype.type == np.float32:
797                trace_attr['encoding'] = 4
798            elif trace.data.dtype.type == np.float64:
799                trace_attr['encoding'] = 5
800            elif trace.data.dtype.type == np.int16:
801                trace_attr['encoding'] = 1
802            elif trace.data.dtype.type == np.dtype(native_str('|S1')).type:
803                trace_attr['encoding'] = 0
804            # int64 data not supported; if possible downcast to int32, else
805            # create error message. After bumping up to numpy 1.9.0 this check
806            # can be replaced by numpy.can_cast()
807            elif trace.data.dtype.type == np.int64:
808                # check if data can be safely downcast to int32
809                ii32 = np.iinfo(np.int32)
810                if abs(trace.max()) <= ii32.max:
811                    trace_data.append(_np_copy_astype(trace.data, np.int32))
812                    trace_attr['encoding'] = 11
813                else:
814                    msg = ("int64 data only supported when writing MSEED if "
815                           "it can be downcast to int32 type data.")
816                    raise ObsPyMSEEDError(msg)
817            else:
818                msg = "Unsupported data type %s in Stream[%i].data" % \
819                    (trace.data.dtype, _i)
820                raise Exception(msg)
821
822        # Convert data if necessary, otherwise store references in list.
823        if trace_attr['encoding'] == 1:
824            # INT16 needs INT32 data type
825            trace_data.append(_np_copy_astype(trace.data, np.int32))
826        else:
827            trace_data.append(trace.data)
828
829    # Do some final sanity checks and raise a warning if a file will be written
830    # with more than one different encoding, record length or byte order.
831    encodings = {_i['encoding'] for _i in trace_attributes}
832    reclens = {_i['reclen'] for _i in trace_attributes}
833    byteorders = {_i['byteorder'] for _i in trace_attributes}
834    msg = 'File will be written with more than one different %s.\n' + \
835          'This might have a negative influence on the compatibility ' + \
836          'with other programs.'
837    if len(encodings) != 1:
838        warnings.warn(msg % 'encodings')
839    if len(reclens) != 1:
840        warnings.warn(msg % 'record lengths')
841    if len(byteorders) != 1:
842        warnings.warn(msg % 'byteorders')
843
844    # Open filehandler or use an existing file like object.
845    if not hasattr(filename, 'write'):
846        f = open(filename, 'wb')
847    else:
848        f = filename
849
850    # Loop over every trace and finally write it to the filehandler.
851    for trace, data, trace_attr in zip(stream, trace_data, trace_attributes):
852        if not len(data):
853            msg = 'Skipping empty trace "%s".' % (trace)
854            warnings.warn(msg)
855            continue
856        # Create C struct MSTrace.
857        mst = MST(trace, data, dataquality=trace_attr['dataquality'])
858
859        # Initialize packedsamples pointer for the mst_pack function
860        packedsamples = C.c_int()
861
862        # Callback function for mst_pack to actually write the file
863        def record_handler(record, reclen, _stream):
864            f.write(record[0:reclen])
865        # Define Python callback function for use in C function
866        rec_handler = C.CFUNCTYPE(C.c_void_p, C.POINTER(C.c_char), C.c_int,
867                                  C.c_void_p)(record_handler)
868
869        # Fill up msr record structure, this is already contained in
870        # mstg, however if blk1001 is set we need it anyway
871        msr = clibmseed.msr_init(None)
872        msr.contents.network = trace.stats.network.encode('ascii', 'strict')
873        msr.contents.station = trace.stats.station.encode('ascii', 'strict')
874        msr.contents.location = trace.stats.location.encode('ascii', 'strict')
875        msr.contents.channel = trace.stats.channel.encode('ascii', 'strict')
876        msr.contents.dataquality = trace_attr['dataquality'].\
877            encode('ascii', 'strict')
878
879        # Set starting sequence number
880        msr.contents.sequence_number = trace_attr['sequence_number']
881
882        # Only use Blockette 1001 if necessary.
883        if use_blkt_1001:
884            # Timing quality has been set in trace_attr
885
886            size = C.sizeof(Blkt1001S)
887            # Only timing quality matters here, other blockette attributes will
888            # be filled by libmseed.msr_normalize_header
889            blkt_value = pack(native_str("BBBB"), trace_attr['timing_quality'],
890                              0, 0, 0)
891            blkt_ptr = C.create_string_buffer(blkt_value, len(blkt_value))
892
893            # Usually returns a pointer to the added blockette in the
894            # blockette link chain and a NULL pointer if it fails.
895            # NULL pointers have a false boolean value according to the
896            # ctypes manual.
897            ret_val = clibmseed.msr_addblockette(msr, blkt_ptr,
898                                                 size, 1001, 0)
899
900            if bool(ret_val) is False:
901                clibmseed.msr_free(C.pointer(msr))
902                del msr
903                raise Exception('Error in msr_addblockette')
904
905        # Only use Blockette 100 if necessary.
906        # Determine if a blockette 100 will be needed to represent the input
907        # sample rate or if the sample rate in the fixed section of the data
908        # header will suffice (see ms_genfactmult in libmseed/genutils.c)
909        use_blkt_100 = False
910
911        _factor = C.c_int16()
912        _multiplier = C.c_int16()
913        _retval = clibmseed.ms_genfactmult(
914            trace.stats.sampling_rate, C.pointer(_factor),
915            C.pointer(_multiplier))
916        # Use blockette 100 if ms_genfactmult() failed.
917        if _retval != 0:
918            use_blkt_100 = True
919        # Otherwise figure out if ms_genfactmult() found exact factors.
920        # Otherwise write blockette 100.
921        else:
922            ms_sr = clibmseed.ms_nomsamprate(_factor.value, _multiplier.value)
923
924            # It is also necessary if the libmseed calculated sampling rate
925            # would result in a loss of accuracy - the floating point
926            # comparision is on purpose here as it will always try to
927            # preserve all accuracy.
928            # Cast to float32 to not add blockette 100 for values
929            # that cannot be represented with 32bits.
930            if np.float32(ms_sr) != np.float32(trace.stats.sampling_rate):
931                use_blkt_100 = True
932
933        if use_blkt_100:
934            size = C.sizeof(Blkt100S)
935            blkt100 = C.c_char(b' ')
936            C.memset(C.pointer(blkt100), 0, size)
937            ret_val = clibmseed.msr_addblockette(
938                msr, C.pointer(blkt100), size, 100, 0)  # NOQA
939            # Usually returns a pointer to the added blockette in the
940            # blockette link chain and a NULL pointer if it fails.
941            # NULL pointers have a false boolean value according to the
942            # ctypes manual.
943            if bool(ret_val) is False:
944                clibmseed.msr_free(C.pointer(msr))  # NOQA
945                del msr  # NOQA
946                raise Exception('Error in msr_addblockette')
947
948        # Pack mstg into a MSEED file using the callback record_handler as
949        # write method.
950        errcode = clibmseed.mst_pack(
951            mst.mst, rec_handler, None, trace_attr['reclen'],
952            trace_attr['encoding'], trace_attr['byteorder'],
953            C.byref(packedsamples), flush, verbose, msr)  # NOQA
954
955        if errcode == 0:
956            msg = ("Did not write any data for trace '%s' even though it "
957                   "contains data values.") % trace
958            raise ValueError(msg)
959        if errcode == -1:
960            clibmseed.msr_free(C.pointer(msr))  # NOQA
961            del mst, msr  # NOQA
962            raise Exception('Error in mst_pack')
963        # Deallocate any allocated memory.
964        clibmseed.msr_free(C.pointer(msr))  # NOQA
965        del mst, msr  # NOQA
966    # Close if its a file handler.
967    if not hasattr(filename, 'write'):
968        f.close()
969
970
971class MST(object):
972    """
973    Class that transforms a ObsPy Trace object to a libmseed internal MSTrace
974    struct.
975    """
976    def __init__(self, trace, data, dataquality):
977        """
978        The init function requires a ObsPy Trace object which will be used to
979        fill self.mstg.
980        """
981        self.mst = clibmseed.mst_init(None)
982        # Figure out the datatypes.
983        sampletype = SAMPLETYPE[data.dtype.type]
984
985        # Set the header values.
986        self.mst.contents.network = trace.stats.network.\
987            encode('ascii', 'strict')
988        self.mst.contents.station = trace.stats.station.\
989            encode('ascii', 'strict')
990        self.mst.contents.location = trace.stats.location.\
991            encode('ascii', 'strict')
992        self.mst.contents.channel = trace.stats.channel.\
993            encode('ascii', 'strict')
994        self.mst.contents.dataquality = dataquality.encode('ascii', 'strict')
995        self.mst.contents.type = b'\x00'
996        self.mst.contents.starttime = \
997            util._convert_datetime_to_mstime(trace.stats.starttime)
998        self.mst.contents.endtime = \
999            util._convert_datetime_to_mstime(trace.stats.endtime)
1000        self.mst.contents.samprate = trace.stats.sampling_rate
1001        self.mst.contents.samplecnt = trace.stats.npts
1002        self.mst.contents.numsamples = trace.stats.npts
1003        self.mst.contents.sampletype = sampletype.encode('ascii', 'strict')
1004
1005        # libmseed expects data in the native byte order.
1006        if data.dtype.byteorder != "=":
1007            data = data.byteswap()
1008
1009        # Copy the data. The copy appears to be necessary so that Python's
1010        # garbage collection does not interfere it.
1011        bytecount = data.itemsize * data.size
1012
1013        self.mst.contents.datasamples = clibmseed.allocate_bytes(bytecount)
1014        C.memmove(self.mst.contents.datasamples, data.ctypes.get_data(),
1015                  bytecount)
1016
1017    def __del__(self):
1018        """
1019        Frees all allocated memory.
1020        """
1021        # This also frees the data of the associated datasamples pointer.
1022        clibmseed.mst_free(C.pointer(self.mst))
1023        del self.mst
1024
1025
1026if __name__ == '__main__':
1027    import doctest
1028    doctest.testmod(exclude_empty=True)
1029