1# -*- coding: utf-8 -*-
2"""
3MiniSEED specific utilities.
4"""
5from __future__ import (absolute_import, division, print_function,
6                        unicode_literals)
7from future.builtins import *  # NOQA
8from future.utils import native_str
9
10import collections
11import ctypes as C  # NOQA
12import os
13import sys
14import warnings
15from datetime import datetime
16from struct import pack, unpack
17
18import numpy as np
19
20from obspy import UTCDateTime
21from obspy.core.compatibility import from_buffer, collections_abc
22from obspy.core.util.decorator import ObsPyDeprecationWarning
23from . import InternalMSEEDParseTimeError
24from .headers import (ENCODINGS, ENDIAN, FIXED_HEADER_ACTIVITY_FLAGS,
25                      FIXED_HEADER_DATA_QUAL_FLAGS,
26                      FIXED_HEADER_IO_CLOCK_FLAGS, HPTMODULUS,
27                      SAMPLESIZES, UNSUPPORTED_ENCODINGS, MSRecord,
28                      MS_NOERROR, clibmseed)
29
30
31def get_start_and_end_time(file_or_file_object):
32    """
33    Returns the start and end time of a MiniSEED file or file-like object.
34
35    :type file_or_file_object: str or file
36    :param file_or_file_object: MiniSEED file name or open file-like object
37        containing a MiniSEED record.
38    :return: tuple (start time of first record, end time of last record)
39
40    This method will return the start time of the first record and the end time
41    of the last record. Keep in mind that it will not return the correct result
42    if the records in the MiniSEED file do not have a chronological ordering.
43
44    The returned end time is the time of the last data sample and not the
45    time that the last sample covers.
46
47    .. rubric:: Example
48
49    >>> from obspy.core.util import get_example_file
50    >>> filename = get_example_file(
51    ...     "BW.BGLD.__.EHE.D.2008.001.first_10_records")
52    >>> get_start_and_end_time(filename)  # doctest: +NORMALIZE_WHITESPACE
53        (UTCDateTime(2007, 12, 31, 23, 59, 59, 915000),
54        UTCDateTime(2008, 1, 1, 0, 0, 20, 510000))
55
56    It also works with an open file pointer. The file pointer itself will not
57    be changed.
58
59    >>> f = open(filename, 'rb')
60    >>> get_start_and_end_time(f)  # doctest: +NORMALIZE_WHITESPACE
61        (UTCDateTime(2007, 12, 31, 23, 59, 59, 915000),
62        UTCDateTime(2008, 1, 1, 0, 0, 20, 510000))
63
64    And also with a MiniSEED file stored in a BytesIO
65
66    >>> import io
67    >>> file_object = io.BytesIO(f.read())
68    >>> get_start_and_end_time(file_object)  # doctest: +NORMALIZE_WHITESPACE
69        (UTCDateTime(2007, 12, 31, 23, 59, 59, 915000),
70        UTCDateTime(2008, 1, 1, 0, 0, 20, 510000))
71    >>> file_object.close()
72
73    If the file pointer does not point to the first record, the start time will
74    refer to the record it points to.
75
76    >>> _ = f.seek(512)
77    >>> get_start_and_end_time(f)  # doctest: +NORMALIZE_WHITESPACE
78        (UTCDateTime(2008, 1, 1, 0, 0, 1, 975000),
79        UTCDateTime(2008, 1, 1, 0, 0, 20, 510000))
80
81    The same is valid for a file-like object.
82
83    >>> file_object = io.BytesIO(f.read())
84    >>> get_start_and_end_time(file_object)  # doctest: +NORMALIZE_WHITESPACE
85        (UTCDateTime(2008, 1, 1, 0, 0, 1, 975000),
86        UTCDateTime(2008, 1, 1, 0, 0, 20, 510000))
87    >>> f.close()
88    """
89    # Get the starttime of the first record.
90    info = get_record_information(file_or_file_object)
91    starttime = info['starttime']
92    # Get the end time of the last record.
93    info = get_record_information(
94        file_or_file_object,
95        (info['number_of_records'] - 1) * info['record_length'])
96    endtime = info['endtime']
97    return starttime, endtime
98
99
100def get_flags(files, starttime=None, endtime=None,
101              io_flags=True, activity_flags=True,
102              data_quality_flags=True, timing_quality=True):
103    """
104    Counts all data quality, I/O, and activity flags of the given MiniSEED
105    file and returns statistics about the timing quality if applicable.
106
107    :param files: MiniSEED file or list of MiniSEED files.
108    :type files: list, str, file-like object
109    :param starttime: Only use records whose end time is larger than this
110        given time.
111    :type starttime: str or :class:`obspy.core.utcdatetime.UTCDateTime`
112    :param endtime: Only use records whose start time is smaller than this
113        given time.
114    :type endtime: str or :class:`obspy.core.utcdatetime.UTCDateTime`
115    :param io_flags: Extract I/O flag counts.
116    :type io_flags: bool
117    :param activity_flags: Extract activity flag counts.
118    :type activity_flags: bool
119    :param data_quality_flags: Extract data quality flag counts.
120    :type data_quality_flags: bool
121    :param timing_quality: Extract timing quality and corresponding statistics.
122    :type timing_quality: bool
123
124    :return: Dictionary with information about the timing quality and the data
125        quality, I/O, and activity flags. It has the following keys:
126        ``"number_of_records_used"``, ``"record_count"``,
127        ``"timing_correction"``, ``"timing_correction_count"``,
128        ``"data_quality_flags_counts"``, ``"activity_flags_counts"``,
129        ``"io_and_clock_flags_counts"``, ``"data_quality_flags_percentages"``,
130        ``"activity_flags_percentages"``, ``"io_and_clock_flags_percentages"``,
131        and ``"timing_quality"``.
132
133    .. rubric:: Flags
134
135    This method will count all set bit flags in the fixed header of a MiniSEED
136    file and return the total count for each flag type. The following flags
137    are extracted:
138
139    **Data quality flags:**
140
141    ========  =================================================
142    Bit       Description
143    ========  =================================================
144    [Bit 0]   Amplifier saturation detected (station dependent)
145    [Bit 1]   Digitizer clipping detected
146    [Bit 2]   Spikes detected
147    [Bit 3]   Glitches detected
148    [Bit 4]   Missing/padded data present
149    [Bit 5]   Telemetry synchronization error
150    [Bit 6]   A digital filter may be charging
151    [Bit 7]   Time tag is questionable
152    ========  =================================================
153
154    **Activity flags:**
155
156    ========  =================================================
157    Bit       Description
158    ========  =================================================
159    [Bit 0]   Calibration signals present
160    [Bit 1]   Time correction applied
161    [Bit 2]   Beginning of an event, station trigger
162    [Bit 3]   End of the event, station detriggers
163    [Bit 4]   A positive leap second happened during this record
164    [Bit 5]   A negative leap second happened during this record
165    [Bit 6]   Event in progress
166    ========  =================================================
167
168    **I/O and clock flags:**
169
170    ========  =================================================
171    Bit       Description
172    ========  =================================================
173    [Bit 0]   Station volume parity error possibly present
174    [Bit 1]   Long record read (possibly no problem)
175    [Bit 2]   Short record read (record padded)
176    [Bit 3]   Start of time series
177    [Bit 4]   End of time series
178    [Bit 5]   Clock locked
179    ========  =================================================
180
181    .. rubric:: Timing quality
182
183    If the file has a Blockette 1001 statistics about the timing quality will
184    be returned if ``timing_quality`` is True. See the doctests for more
185    information.
186
187    This method will read the timing quality in Blockette 1001 for each
188    record in the file if available and return the following statistics:
189    Minima, maxima, average, median and upper and lower quartiles.
190
191    .. rubric:: Examples
192
193    >>> from obspy.core.util import get_example_file
194    >>> filename = get_example_file("qualityflags.mseed")
195    >>> flags = get_flags(filename)
196    >>> for k, v in flags["data_quality_flags_counts"].items():
197    ...     print(k, v)
198    amplifier_saturation 9
199    digitizer_clipping 8
200    spikes 7
201    glitches 6
202    missing_padded_data 5
203    telemetry_sync_error 4
204    digital_filter_charging 3
205    suspect_time_tag 2
206
207    Reading a file with Blockette 1001 will return timing quality statistics if
208    requested.
209
210    >>> filename = get_example_file("timingquality.mseed")
211    >>> flags = get_flags(filename)
212    >>> for k, v in sorted(flags["timing_quality"].items()):
213    ...     print(k, v)  # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE
214    all_values [...]
215    lower_quartile 25.0
216    max 100.0
217    mean 50.0
218    median 50.0
219    min 0.0
220    upper_quartile 75.0
221    """
222    # Splat input files to array
223    if not isinstance(files, list):
224        files = [files]
225
226    starttime = float(UTCDateTime(starttime)) if starttime else None
227    endtime = float(UTCDateTime(endtime)) if endtime else None
228
229    records = []
230
231    # Use clibmseed to get header parameters for all files
232    for file in files:
233
234        # If it's a file name just read it.
235        if isinstance(file, (str, native_str)):
236            # Read to NumPy array which is used as a buffer.
237            bfr_np = np.fromfile(file, dtype=np.int8)
238        elif hasattr(file, 'read'):
239            bfr_np = from_buffer(file.read(), dtype=np.int8)
240
241        offset = 0
242
243        msr = clibmseed.msr_init(C.POINTER(MSRecord)())
244
245        while(True):
246            # Read up to max record length.
247            record = bfr_np[offset: offset + 8192]
248            if len(record) < 48:
249                break
250
251            retcode = clibmseed.msr_parse(record, len(record), C.pointer(msr),
252                                          -1, 0, 0)
253
254            if retcode != MS_NOERROR:
255                break
256
257            offset += msr.contents.reclen
258
259            # Check starttime and endtime of the record
260            # Read the sampling rate from the header (is this unsafe?)
261            # Tried using msr.msr_nomsamprate(msr) but the return is strange
262            r_start = clibmseed.msr_starttime(msr) / HPTMODULUS
263            r_delta = (1 / msr.contents.samprate)
264            r_end = (clibmseed.msr_endtime(msr) / HPTMODULUS) + r_delta
265
266            # Cut off records to start & endtime
267            if starttime is not None:
268                if r_end <= starttime:
269                    continue
270                if r_start < starttime:
271                    r_start = starttime
272
273            if endtime is not None:
274                if r_start >= endtime:
275                    continue
276                if r_end > endtime:
277                    r_end = endtime
278
279            # Get the timing quality in blockette1001 if it exists
280            if msr.contents.Blkt1001:
281                r_tq = msr.contents.Blkt1001.contents.timing_qual
282            else:
283                r_tq = None
284
285            # Collect all records and parameters in the range
286            records.append({
287                'start': r_start,
288                'delta': r_delta,
289                'end': r_end,
290                'tq': r_tq,
291                'tc': msr.contents.fsdh.contents.time_correct,
292                'io': msr.contents.fsdh.contents.io_flags,
293                'dq': msr.contents.fsdh.contents.dq_flags,
294                'ac': msr.contents.fsdh.contents.act_flags
295            })
296
297        # Free memory to be ready for next file.
298        clibmseed.msr_free(C.pointer(msr))
299
300    # It should be faster to sort by record endtime
301    # if we reverse the array first
302    records.reverse()
303    records.sort(key=lambda x: x["end"], reverse=True)
304
305    # Create collections for the flags
306    dq_flags_counts = collections.OrderedDict([
307        ("amplifier_saturation", 0),
308        ("digitizer_clipping", 0),
309        ("spikes", 0),
310        ("glitches", 0),
311        ("missing_padded_data", 0),
312        ("telemetry_sync_error", 0),
313        ("digital_filter_charging", 0),
314        ("suspect_time_tag", 0)
315    ])
316    dq_flags_seconds = collections.OrderedDict([
317        ("amplifier_saturation", 0),
318        ("digitizer_clipping", 0),
319        ("spikes", 0),
320        ("glitches", 0),
321        ("missing_padded_data", 0),
322        ("telemetry_sync_error", 0),
323        ("digital_filter_charging", 0),
324        ("suspect_time_tag", 0)
325    ])
326
327    io_flags_counts = collections.OrderedDict([
328        ("station_volume", 0),
329        ("long_record_read", 0),
330        ("short_record_read", 0),
331        ("start_time_series", 0),
332        ("end_time_series", 0),
333        ("clock_locked", 0)
334    ])
335    io_flags_seconds = collections.OrderedDict([
336        ("station_volume", 0),
337        ("long_record_read", 0),
338        ("short_record_read", 0),
339        ("start_time_series", 0),
340        ("end_time_series", 0),
341        ("clock_locked", 0)
342    ])
343
344    ac_flags_counts = collections.OrderedDict([
345        ("calibration_signal", 0),
346        ("time_correction_applied", 0),
347        ("event_begin", 0),
348        ("event_end", 0),
349        ("positive_leap", 0),
350        ("negative_leap", 0),
351        ("event_in_progress", 0)
352    ])
353    ac_flags_seconds = collections.OrderedDict([
354        ("calibration_signal", 0),
355        ("time_correction_applied", 0),
356        ("event_begin", 0),
357        ("event_end", 0),
358        ("positive_leap", 0),
359        ("negative_leap", 0),
360        ("event_in_progress", 0)
361    ])
362
363    coverage = None
364    used_record_count = 0
365    timing_correction = 0.0
366    timing_correction_count = 0
367    tq = []
368
369    # Go over all sorted records from back to front
370    for record in records:
371
372        # For counts we do not care about overlaps
373        # simply count contribution from all the records
374        if io_flags:
375            for _i, key in enumerate(io_flags_seconds.keys()):
376                if (record["io"] & (1 << _i)) != 0:
377                    io_flags_counts[key] += 1
378
379        if activity_flags:
380            for _i, key in enumerate(ac_flags_seconds.keys()):
381                if (record["ac"] & (1 << _i)) != 0:
382                    ac_flags_counts[key] += 1
383
384        if data_quality_flags:
385            for _i, key in enumerate(dq_flags_seconds.keys()):
386                if (record["dq"] & (1 << _i)) != 0:
387                    dq_flags_counts[key] += 1
388
389        # Coverage is the timewindow that is covered by the records
390        # so bits in overlapping records are not counted
391        # The first record starts a clean window
392        if coverage is None:
393            coverage = [record["start"], record["end"]]
394        else:
395
396            # Account for time tolerance
397            time_tolerance = 0.5 * record['delta']
398            tolerated_end = coverage[0] - time_tolerance
399
400            # Start is beyond coverage, skip the overlapping record
401            if record["start"] >= coverage[0]:
402                continue
403
404            # Fix end to the start of the coverage if it is overlaps
405            # with the coverage window. Or if it is within the allowed
406            # time tolerance
407            if record["end"] > coverage[0] or record["end"] > tolerated_end:
408                record["end"] = coverage[0]
409
410            # Extend the coverage
411            if record["start"] < coverage[0]:
412                coverage[0] = record["start"]
413
414        # Get the record length in seconds
415        record_length_seconds = (record["end"] - record["start"])
416
417        # Skip if the record length is 0 (or negative)
418        if record_length_seconds <= 0.0:
419            continue
420
421        # Overlapping records do not count ot the used_records
422        # used records tracks the amount of timing quality
423        # parameters we expect
424        used_record_count += 1
425
426        # Bitwise AND to count flags and store in orderedDicts
427        if io_flags:
428            for _i, key in enumerate(io_flags_seconds.keys()):
429                if (record["io"] & (1 << _i)) != 0:
430                    io_flags_seconds[key] += record_length_seconds
431
432        if activity_flags:
433            for _i, key in enumerate(ac_flags_seconds.keys()):
434                if (record["ac"] & (1 << _i)) != 0:
435                    ac_flags_seconds[key] += record_length_seconds
436
437        if data_quality_flags:
438            for _i, key in enumerate(dq_flags_seconds.keys()):
439                if (record["dq"] & (1 << _i)) != 0:
440                    dq_flags_seconds[key] += record_length_seconds
441
442        # Get the timing quality parameter and append to array
443        if timing_quality and record["tq"] is not None:
444            tq.append(float(record["tq"]))
445
446        # Check if a timing correction is specified
447        # (not whether it has been applied)
448        if record["tc"] != 0:
449            timing_correction += record_length_seconds
450            timing_correction_count += 1
451
452    # Get the total time analyzed
453    if endtime is not None and starttime is not None:
454        total_time_seconds = endtime - starttime
455    # If zero records agree with the selections, zero seconds have been
456    # analysed.
457    elif coverage is None:
458        total_time_seconds = 0
459    else:
460        total_time_seconds = coverage[1] - coverage[0]
461
462    # Percentage of time of bit flags set
463    if total_time_seconds:
464        if io_flags:
465            for _i, key in enumerate(io_flags_seconds.keys()):
466                io_flags_seconds[key] /= total_time_seconds * 1e-2
467        if data_quality_flags:
468            for _i, key in enumerate(dq_flags_seconds.keys()):
469                dq_flags_seconds[key] /= total_time_seconds * 1e-2
470        if activity_flags:
471            for _i, key in enumerate(ac_flags_seconds.keys()):
472                ac_flags_seconds[key] /= total_time_seconds * 1e-2
473
474        timing_correction /= total_time_seconds * 1e-2
475
476    # Add the timing quality if it is set for all used records
477    if timing_quality:
478        if len(tq) == used_record_count:
479            tq = np.array(tq, dtype=np.float64)
480            tq = {
481                "all_values": tq,
482                "min": tq.min(),
483                "max": tq.max(),
484                "mean": tq.mean(),
485                "median": np.median(tq),
486                "lower_quartile": np.percentile(tq, 25),
487                "upper_quartile": np.percentile(tq, 75)
488            }
489
490        else:
491            tq = {}
492
493    return {
494        'timing_correction': timing_correction,
495        'timing_correction_count': timing_correction_count,
496        'io_and_clock_flags_percentages': io_flags_seconds,
497        'io_and_clock_flags_counts': io_flags_counts,
498        'data_quality_flags_percentages': dq_flags_seconds,
499        'data_quality_flags_counts': dq_flags_counts,
500        'activity_flags_percentages': ac_flags_seconds,
501        'activity_flags_counts': ac_flags_counts,
502        'timing_quality': tq,
503        'record_count': len(records),
504        'number_of_records_used': used_record_count,
505    }
506
507
508def get_record_information(file_or_file_object, offset=0, endian=None):
509    """
510    Returns record information about given files and file-like object.
511
512    :param endian: If given, the byte order will be enforced. Can be either "<"
513        or ">". If None, it will be determined automatically.
514        Defaults to None.
515
516    .. rubric:: Example
517
518    >>> from obspy.core.util import get_example_file
519    >>> filename = get_example_file("test.mseed")
520    >>> ri = get_record_information(filename)
521    >>> for k, v in sorted(ri.items()):
522    ...     print(k, v)
523    activity_flags 0
524    byteorder >
525    channel BHZ
526    data_quality_flags 0
527    encoding 11
528    endtime 2003-05-29T02:15:51.518400Z
529    excess_bytes 0
530    filesize 8192
531    io_and_clock_flags 0
532    location 00
533    network NL
534    npts 5980
535    number_of_records 2
536    record_length 4096
537    samp_rate 40.0
538    starttime 2003-05-29T02:13:22.043400Z
539    station HGN
540    time_correction 0
541    """
542    if isinstance(file_or_file_object, (str, native_str)):
543        with open(file_or_file_object, 'rb') as f:
544            info = _get_record_information(f, offset=offset, endian=endian)
545    else:
546        info = _get_record_information(file_or_file_object, offset=offset,
547                                       endian=endian)
548    return info
549
550
551def _decode_header_field(name, content):
552    """
553    Helper function to decode header fields. Fairly fault tolerant and it
554    will also raise nice warnings in case in encounters anything wild.
555    """
556    try:
557        return content.decode("ascii", errors="strict")
558    except UnicodeError:
559        r = content.decode("ascii", errors="ignore")
560        msg = (u"Failed to decode {name} code as ASCII. "
561               u"Code in file: '{result}' (\ufffd indicates characters "
562               u"that could not be decoded). "
563               u"Will be interpreted as: '{f_result}'. "
564               u"This is an invalid MiniSEED file - please "
565               u"contact your data provider.")
566        warnings.warn(msg.format(
567            name=name,
568            result=content.decode("ascii", errors="replace"),
569            f_result=r))
570        return r
571
572
573def _get_record_information(file_object, offset=0, endian=None):
574    """
575    Searches the first MiniSEED record stored in file_object at the current
576    position and returns some information about it.
577
578    If offset is given, the MiniSEED record is assumed to start at current
579    position + offset in file_object.
580
581    :param endian: If given, the byte order will be enforced. Can be either "<"
582        or ">". If None, it will be determined automatically.
583        Defaults to None.
584    """
585    initial_position = file_object.tell()
586    record_start = initial_position
587    samp_rate = None
588
589    info = {}
590
591    # Apply the offset.
592    if offset:
593        file_object.seek(offset, 1)
594        record_start += offset
595
596    # Get the size of the buffer.
597    file_object.seek(0, 2)
598    info['filesize'] = int(file_object.tell() - record_start)
599    file_object.seek(record_start, 0)
600
601    _code = file_object.read(8)[6:7]
602    # Reset the offset if starting somewhere in the middle of the file.
603    if info['filesize'] % 128 != 0:
604        # if a multiple of minimal record length 256
605        record_start = 0
606    elif _code not in [b'D', b'R', b'Q', b'M', b' ']:
607        # if valid data record start at all starting with D, R, Q or M
608        record_start = 0
609    # Might be a noise record or completely empty.
610    elif _code == b' ':
611        try:
612            _t = file_object.read(120).decode().strip()
613        except Exception:
614            raise ValueError("Invalid MiniSEED file.")
615        if not _t:
616            info = _get_record_information(file_object=file_object,
617                                           endian=endian)
618            file_object.seek(initial_position, 0)
619            return info
620        else:
621            raise ValueError("Invalid MiniSEED file.")
622    file_object.seek(record_start, 0)
623
624    # check if full SEED or MiniSEED
625    if file_object.read(8)[6:7] == b'V':
626        # found a full SEED record - seek first MiniSEED record
627        # search blockette 005, 008 or 010 which contain the record length
628        blockette_id = file_object.read(3)
629        while blockette_id not in [b'010', b'008', b'005']:
630            if not blockette_id.startswith(b'0'):
631                msg = "SEED Volume Index Control Headers: blockette 0xx" + \
632                      " expected, got %s"
633                raise Exception(msg % blockette_id)
634            # get length and jump to end of current blockette
635            blockette_len = int(file_object.read(4))
636            file_object.seek(blockette_len - 7, 1)
637            # read next blockette id
638            blockette_id = file_object.read(3)
639        # Skip the next bytes containing length of the blockette and version
640        file_object.seek(8, 1)
641        # get record length
642        rec_len = pow(2, int(file_object.read(2)))
643        # reset file pointer
644        file_object.seek(record_start, 0)
645        # cycle through file using record length until first data record found
646        while True:
647            data = file_object.read(7)[6:7]
648            if data in [b'D', b'R', b'Q', b'M']:
649                break
650            # stop looking when hitting end of file
651            # the second check should be enough.. but play it safe
652            if not data or record_start > info['filesize']:
653                msg = "No MiniSEED data record found in file."
654                raise ValueError(msg)
655            record_start += rec_len
656            file_object.seek(record_start, 0)
657
658    # Jump to the network, station, location and channel codes.
659    file_object.seek(record_start + 8, 0)
660    data = file_object.read(12)
661
662    info["station"] = _decode_header_field("station", data[:5].strip())
663    info["location"] = _decode_header_field("location", data[5:7].strip())
664    info["channel"] = _decode_header_field("channel", data[7:10].strip())
665    info["network"] = _decode_header_field("network", data[10:12].strip())
666
667    # Use the date to figure out the byte order.
668    file_object.seek(record_start + 20, 0)
669    # Capital letters indicate unsigned quantities.
670    data = file_object.read(28)
671
672    def fmt(s):
673        return native_str('%sHHBBBxHHhhBBBxlxxH' % s)
674
675    def _parse_time(values):
676        if not (1 <= values[1] <= 366):
677            msg = 'julday out of bounds (wrong endian?): {!s}'.format(
678                values[1])
679            raise InternalMSEEDParseTimeError(msg)
680        # The spec says values[5] (.0001 seconds) must be between 0-9999 but
681        # we've  encountered files which have a value of 10000. We interpret
682        # this as an additional second. The approach here is general enough
683        # to work for any value of values[5].
684        msec = values[5] * 100
685        offset = msec // 1000000
686        if offset:
687            warnings.warn(
688                "Record contains a fractional seconds (.0001 secs) of %i - "
689                "the maximum strictly allowed value is 9999. It will be "
690                "interpreted as one or more additional seconds." % values[5],
691                category=UserWarning)
692        try:
693            t = UTCDateTime(
694                year=values[0], julday=values[1],
695                hour=values[2], minute=values[3], second=values[4],
696                microsecond=msec % 1000000) + offset
697        except TypeError:
698            msg = 'Problem decoding time (wrong endian?)'
699            raise InternalMSEEDParseTimeError(msg)
700        return t
701
702    if endian is None:
703        try:
704            endian = ">"
705            values = unpack(fmt(endian), data)
706            starttime = _parse_time(values)
707        except InternalMSEEDParseTimeError:
708            endian = "<"
709            values = unpack(fmt(endian), data)
710            starttime = _parse_time(values)
711    else:
712        values = unpack(fmt(endian), data)
713        try:
714            starttime = _parse_time(values)
715        except InternalMSEEDParseTimeError:
716            msg = ("Invalid starttime found. The passed byte order is likely "
717                   "wrong.")
718            raise ValueError(msg)
719    npts = values[6]
720    info['npts'] = npts
721    samp_rate_factor = values[7]
722    samp_rate_mult = values[8]
723    info['activity_flags'] = values[9]
724    # Bit 1 of the activity flags.
725    time_correction_applied = bool(info['activity_flags'] & 2)
726    info['io_and_clock_flags'] = values[10]
727    info['data_quality_flags'] = values[11]
728    info['time_correction'] = values[12]
729    time_correction = values[12]
730    blkt_offset = values[13]
731
732    # Correct the starttime if applicable.
733    if (time_correction_applied is False) and time_correction:
734        # Time correction is in units of 0.0001 seconds.
735        starttime += time_correction * 0.0001
736
737    # Traverse the blockettes and parse Blockettes 100, 500, 1000 and/or 1001
738    # if any of those is found.
739    while blkt_offset:
740        file_object.seek(record_start + blkt_offset, 0)
741        blkt_type, next_blkt = unpack(native_str('%sHH' % endian),
742                                      file_object.read(4))
743        if next_blkt != 0 and (next_blkt < 4 or next_blkt - 4 <= blkt_offset):
744            msg = ('Invalid blockette offset (%d) less than or equal to '
745                   'current offset (%d)') % (next_blkt, blkt_offset)
746            raise ValueError(msg)
747        blkt_offset = next_blkt
748
749        # Parse in order of likeliness.
750        if blkt_type == 1000:
751            encoding, word_order, record_length = \
752                unpack(native_str('%sBBB' % endian),
753                       file_object.read(3))
754            if word_order not in ENDIAN:
755                msg = ('Invalid word order "%s" in blockette 1000 for '
756                       'record with ID %s.%s.%s.%s at offset %i.') % (
757                    str(word_order), info["network"], info["station"],
758                    info["location"], info["channel"], offset)
759                warnings.warn(msg, UserWarning)
760            elif ENDIAN[word_order] != endian:
761                msg = 'Inconsistent word order.'
762                warnings.warn(msg, UserWarning)
763            info['encoding'] = encoding
764            info['record_length'] = 2 ** record_length
765        elif blkt_type == 1001:
766            info['timing_quality'], mu_sec = \
767                unpack(native_str('%sBb' % endian),
768                       file_object.read(2))
769            starttime += float(mu_sec) / 1E6
770        elif blkt_type == 500:
771            file_object.seek(14, 1)
772            mu_sec = unpack(native_str('%sb' % endian),
773                            file_object.read(1))[0]
774            starttime += float(mu_sec) / 1E6
775        elif blkt_type == 100:
776            samp_rate = unpack(native_str('%sf' % endian),
777                               file_object.read(4))[0]
778
779    # No blockette 1000 found.
780    if "record_length" not in info:
781        file_object.seek(record_start, 0)
782        # Read 16 kb - should be a safe maximal record length.
783        buf = from_buffer(file_object.read(2 ** 14), dtype=np.int8)
784        # This is a messy check - we just delegate to libmseed.
785        reclen = clibmseed.ms_detect(buf, len(buf))
786        if reclen < 0:
787            raise ValueError("Could not detect data record.")
788        elif reclen == 0:
789            # It might be at the end of the file.
790            if len(buf) in [2 ** _i for _i in range(7, 256)]:
791                reclen = len(buf)
792            else:
793                raise ValueError("Could not determine record length.")
794        info["record_length"] = reclen
795
796    # If samprate not set via blockette 100 calculate the sample rate according
797    # to the SEED manual.
798    if not samp_rate:
799        if samp_rate_factor > 0 and samp_rate_mult > 0:
800            samp_rate = float(samp_rate_factor * samp_rate_mult)
801        elif samp_rate_factor > 0 and samp_rate_mult < 0:
802            samp_rate = -1.0 * float(samp_rate_factor) / float(samp_rate_mult)
803        elif samp_rate_factor < 0 and samp_rate_mult > 0:
804            samp_rate = -1.0 * float(samp_rate_mult) / float(samp_rate_factor)
805        elif samp_rate_factor < 0 and samp_rate_mult < 0:
806            samp_rate = 1.0 / float(samp_rate_factor * samp_rate_mult)
807        else:
808            samp_rate = 0
809
810    info['samp_rate'] = samp_rate
811
812    info['starttime'] = starttime
813    # If sample rate is zero set endtime to startime
814    if samp_rate == 0:
815        info['endtime'] = starttime
816    # Endtime is the time of the last sample.
817    else:
818        info['endtime'] = starttime + (npts - 1) / samp_rate
819    info['byteorder'] = endian
820
821    info['number_of_records'] = int(info['filesize'] //
822                                    info['record_length'])
823    info['excess_bytes'] = int(info['filesize'] % info['record_length'])
824
825    # Reset file pointer.
826    file_object.seek(initial_position, 0)
827    return info
828
829
830def _ctypes_array_2_numpy_array(buffer_, buffer_elements, sampletype):
831    """
832    Takes a Ctypes array and its length and type and returns it as a
833    NumPy array.
834
835    :param buffer_: Ctypes c_void_p pointer to buffer.
836    :param buffer_elements: length of the whole buffer
837    :param sampletype: type of sample, on of "a", "i", "f", "d"
838    """
839    # Allocate NumPy array to move memory to
840    numpy_array = np.empty(buffer_elements, dtype=sampletype)
841    datptr = numpy_array.ctypes.get_data()
842    # Manually copy the contents of the C allocated memory area to
843    # the address of the previously created NumPy array
844    C.memmove(datptr, buffer_, buffer_elements * SAMPLESIZES[sampletype])
845    return numpy_array
846
847
848def _convert_msr_to_dict(m):
849    """
850    Internal method used for setting header attributes.
851    """
852    h = {}
853    attributes = ('network', 'station', 'location', 'channel',
854                  'dataquality', 'starttime', 'samprate',
855                  'samplecnt', 'numsamples', 'sampletype')
856    # loop over attributes
857    for _i in attributes:
858        h[_i] = getattr(m, _i)
859    return h
860
861
862def _convert_datetime_to_mstime(dt):
863    """
864    Takes a obspy.util.UTCDateTime object and returns an epoch time in ms.
865
866    :param dt: obspy.util.UTCDateTime object.
867    """
868    rest = (dt._ns % 10**3) >= 500 and 1 or 0
869    return dt._ns // 10**3 + rest
870
871
872def _convert_mstime_to_datetime(timestring):
873    """
874    Takes a MiniSEED timestamp and returns a obspy.util.UTCDateTime object.
875
876    :param timestamp: MiniSEED timestring (Epoch time string in ms).
877    """
878    return UTCDateTime(ns=int(round(timestring * 10**3)))
879
880
881def _unpack_steim_1(data, npts, swapflag=0, verbose=0):
882    """
883    Unpack steim1 compressed data given as numpy array.
884
885    :type data: :class:`numpy.ndarray`
886    :param data: steim compressed data as a numpy array
887    :param npts: number of data points
888    :param swapflag: Swap bytes, defaults to 0
889    :return: Return data as numpy.ndarray of dtype int32
890    """
891    datasize = len(data)
892    samplecnt = npts
893    datasamples = np.empty(npts, dtype=np.int32)
894
895    nsamples = clibmseed.msr_decode_steim1(
896        data.ctypes.data,
897        datasize, samplecnt, datasamples,
898        npts, None, swapflag)
899    if nsamples != npts:
900        raise Exception("Error in unpack_steim1")
901    return datasamples
902
903
904def _unpack_steim_2(data, npts, swapflag=0, verbose=0):
905    """
906    Unpack steim2 compressed data given as numpy array.
907
908    :type data: :class:`numpy.ndarray`
909    :param data: steim compressed data as a numpy array
910    :param npts: number of data points
911    :param swapflag: Swap bytes, defaults to 0
912    :return: Return data as numpy.ndarray of dtype int32
913    """
914    datasize = len(data)
915    samplecnt = npts
916    datasamples = np.empty(npts, dtype=np.int32)
917
918    nsamples = clibmseed.msr_decode_steim2(
919        data.ctypes.data,
920        datasize, samplecnt, datasamples,
921        npts, None, swapflag)
922    if nsamples != npts:
923        raise Exception("Error in unpack_steim2")
924    return datasamples
925
926
927def set_flags_in_fixed_headers(filename, flags):
928    """
929    Updates a given MiniSEED file with some fixed header flags.
930
931    :type filename: string
932    :param filename: Name of the MiniSEED file to be changed
933    :type flags: dict
934    :param flags: The flags to update in the MiniSEED file
935
936        Flags are stored as a nested dictionary::
937
938            { trace_id:
939                { flag_group:
940                    { flag_name: flag_value,
941                                 ...
942                    },
943                    ...
944                },
945                ...
946            }
947
948        with:
949
950        * ``trace_id``
951            A string identifying the trace. A string looking like
952            ``NETWORK.STATION.LOCATION.CHANNEL`` is expected, the values will
953            be compared to those found in the fixed header of every record. An
954            empty field will be interpreted  as "every possible value", so
955            ``"..."`` will apply to every single trace in the file. Padding
956            spaces are ignored.
957        * ``flag_group``
958            Which flag group is to be changed. One of ``'activity_flags'``,
959            ``'io_clock_flags'``, ``'data_qual_flags'`` is expected. Invalid
960            flag groups raise a ``ValueError``.
961        * ``flag_name``
962            The name of the flag. Possible values are matched with
963            ``obspy.io.mseed.headers.FIXED_HEADER_ACTIVITY_FLAGS``,
964            ``FIXED_HEADER_IO_CLOCK_FLAGS`` or ``FIXED_HEADER_DATA_QUAL_FLAGS``
965            depending on the flag_group. Invalid flags raise a ``ValueError``.
966        * ``flag_value``
967            The value you want for this flag. Expected value is a bool (always
968            ``True``/``False``) or a dict to store the moments and durations
969            when this flag is ``True``. Expected syntax for this dict is
970            accurately described in ``obspy.io.mseed.util._check_flag_value``.
971
972    :raises IOError: if the file is not a MiniSEED file
973    :raises ValueError: if one of the flag group, flag name or flag value is
974        incorrect
975
976    Example: to add a *Calibration Signals Present* flag (which belongs to the
977    Activity Flags section of the fixed header) to every record, flags should
978    be::
979
980        { "..." : { "activity_flags" : { "calib_signal" : True }}}
981
982    Example: to add a *Event in Progress* flag (which belongs to the
983    Activity Flags section of the fixed header) from 2009/12/23 06:00:00 to
984    2009/12/23 06:30:00, from 2009/12/24 10:00:00 to 2009/12/24 10:30:00 and
985    at precise times 2009/12/26 18:00:00 and 2009/12/26 18:04:00,
986    flags should be::
987
988        date1 = UTCDateTime("2009-12-23T06:00:00.0")
989        date2 = UTCDateTime("2009-12-23T06:30:00.0")
990        date3 = UTCDateTime("2009-12-24T10:00:00.0")
991        date4 = UTCDateTime("2009-12-24T10:30:00.0")
992        date5 = UTCDateTime("2009-12-26T18:00:00.0")
993        date6 = UTCDateTime("2009-12-26T18:04:00.0")
994        { "..." :
995            { "activity_flags" :
996                { "event_in_progress" :
997                    {"INSTANT" : [date5, date6],
998                    "DURATION" : [(date1, date2), (date3, date4)]}}}}
999
1000    Alternative way to mark duration::
1001
1002        { "..." :
1003            { "activity_flags" :
1004                { "event_in_progress" :
1005                    { "INSTANT" : [date5, date6],
1006                      "DURATION" : [date1, date2, date3, date4]}}}}
1007
1008    """
1009    # import has to be here to break import loop
1010    from .core import _is_mseed
1011    # Basic check
1012    if not os.path.isfile(filename) or not _is_mseed(filename):
1013        raise IOError("File %s is not a valid MiniSEED file" % filename)
1014    filesize = os.path.getsize(filename)
1015
1016    # Nested dictionaries to allow empty strings as wildcards
1017    class NestedDict(dict):
1018        def __missing__(self, key):
1019            value = self[key] = type(self)()
1020            return value
1021    # Define wildcard character
1022    wildcard = ""
1023
1024    # Check channel identifier value
1025    flags_bytes = NestedDict()
1026    for (key, value) in flags.items():
1027        split_key = key.split(".")
1028        if len(split_key) != 4:
1029            msg = "Invalid channel identifier. " +\
1030                  "Expected 'Network.Station.Location.Channel' " +\
1031                  "(empty fields allowed), got '%s'."
1032            raise ValueError(msg % key)
1033
1034        # Remove padding spaces and store in new dict
1035        net = split_key[0].strip()
1036        sta = split_key[1].strip()
1037        loc = split_key[2].strip()
1038        cha = split_key[3].strip()
1039
1040        # Check flag value for invalid data
1041        for flag_group in value:
1042            # Check invalid flag group, and prepare check for invalid flag name
1043            if flag_group == 'activity_flags':
1044                record_to_check = FIXED_HEADER_ACTIVITY_FLAGS
1045            elif flag_group == 'io_clock_flags':
1046                record_to_check = FIXED_HEADER_IO_CLOCK_FLAGS
1047            elif flag_group == 'data_qual_flags':
1048                record_to_check = FIXED_HEADER_DATA_QUAL_FLAGS
1049            else:
1050                msg = "Invalid flag group %s. One of 'activity_flags', " +\
1051                      "'io_clock_flags', 'data_qual_flags' is expected."
1052                raise ValueError(msg % flag_group)
1053
1054            for flag_name in value[flag_group]:
1055                # Check invalid flag name
1056                if flag_name not in record_to_check.values():
1057                    msg = "Invalid flag name %s. One of %s is expected."
1058                    raise ValueError(msg % (flag_name,
1059                                            str(record_to_check.values())))
1060
1061                # Check flag values and store them in an "easy to parse" way:
1062                # either bool or list of tuples (start, end)
1063                flag_value = value[flag_group][flag_name]
1064                corrected_flag = _check_flag_value(flag_value)
1065                flags_bytes[net][sta][loc][cha][flag_group][flag_name] = \
1066                    corrected_flag
1067
1068    # Open file
1069    with open(filename, 'r+b') as mseed_file:
1070        # Run through all records
1071        while mseed_file.tell() < filesize:
1072            record_start = mseed_file.tell()
1073
1074            # Ignore sequence number and data header
1075            mseed_file.seek(8, os.SEEK_CUR)
1076            # Read identifier
1077            sta = mseed_file.read(5).strip().decode()
1078            loc = mseed_file.read(2).strip().decode()
1079            chan = mseed_file.read(3).strip().decode()
1080            net = mseed_file.read(2).strip().decode()
1081
1082            # Search the nested dict for the network identifier
1083            if net in flags_bytes:
1084                dict_to_use = flags_bytes[net]
1085            elif wildcard in flags_bytes:
1086                dict_to_use = flags_bytes[wildcard]
1087            else:
1088                dict_to_use = None
1089
1090            # Search the nested dict for the station identifier
1091            if dict_to_use is not None and sta in dict_to_use:
1092                dict_to_use = dict_to_use[sta]
1093            elif dict_to_use is not None and wildcard in dict_to_use:
1094                dict_to_use = dict_to_use[wildcard]
1095            else:
1096                dict_to_use = None
1097
1098            # Search the nested dict for the location identifier
1099            if dict_to_use is not None and loc in dict_to_use:
1100                dict_to_use = dict_to_use[loc]
1101            elif dict_to_use is not None and wildcard in dict_to_use:
1102                dict_to_use = dict_to_use[wildcard]
1103            else:
1104                dict_to_use = None
1105
1106            # Search the nested dict for the channel identifier
1107            if dict_to_use is not None and chan in dict_to_use:
1108                flags_value = dict_to_use[chan]
1109            elif dict_to_use is not None and wildcard in dict_to_use:
1110                flags_value = dict_to_use[wildcard]
1111            else:
1112                flags_value = None
1113
1114            if flags_value is not None:
1115                # Calculate the real start and end of the record
1116                recstart = mseed_file.read(10)
1117                (yr, doy, hr, mn, sec, _, mil) = unpack(native_str(">HHBBBBH"),
1118                                                        recstart)
1119                # Transformation to UTCDatetime()
1120                recstart = UTCDateTime(year=yr, julday=doy, hour=hr, minute=mn,
1121                                       second=sec, microsecond=mil * 100)
1122                # Read data to date begin and end of record
1123                (nb_samples, fact, mult) = unpack(native_str(">Hhh"),
1124                                                  mseed_file.read(6))
1125
1126                # Manage time correction
1127                act_flags = unpack(native_str(">B"), mseed_file.read(1))[0]
1128                time_correction_applied = bool(act_flags & 2)
1129                (_, _, _, time_correction) = unpack(native_str(">BBBl"),
1130                                                    mseed_file.read(7))
1131                if (time_correction_applied is False) and time_correction:
1132                    # Time correction is in units of 0.0001 seconds.
1133                    recstart += time_correction * 0.0001
1134
1135                # Manage blockette's datation informations
1136                # Search for blockette 100's "Actual sample rate" field
1137                samp_rate = _search_flag_in_blockette(mseed_file, 4, 100, 4, 1)
1138                if samp_rate is not None:
1139                    samp_rate = unpack(native_str(">b"), samp_rate)[0]
1140                # Search for blockette 1001's "microsec" field
1141                microsec = _search_flag_in_blockette(mseed_file, 4, 1001, 5, 1)
1142                if microsec is not None:
1143                    microsec = unpack(native_str(">b"), microsec)[0]
1144                else:
1145                    microsec = 0
1146
1147                realstarttime = recstart + microsec * 0.000001
1148
1149                # If samprate not set via blockette 100 calculate the sample
1150                # rate according to the SEED manual.
1151                if samp_rate is None:
1152                    if (fact > 0) and (mult) > 0:
1153                        samp_rate = float(fact * mult)
1154                    elif (fact > 0) and (mult) < 0:
1155                        samp_rate = -1.0 * float(fact) / float(mult)
1156                    elif (fact < 0) and (mult) > 0:
1157                        samp_rate = -1.0 * float(mult) / float(fact)
1158                    elif (fact < 0) and (mult) < 0:
1159                        samp_rate = -1.0 / float(fact * mult)
1160                    else:
1161                        # if everything is unset or 0 set sample rate to 1
1162                        samp_rate = 1
1163
1164                # date of the last sample is recstart+samp_rate*(nb_samples-1)
1165                # We assume here that a record with samples [0, 1, ..., n]
1166                # has a period [ date_0, date_n+1 [  AND NOT [ date_0, date_n ]
1167                realendtime = recstart + samp_rate * (nb_samples)
1168
1169                # Convert flags to bytes : activity
1170                if 'activity_flags' in flags_value:
1171                    act_flag = _convert_flags_to_raw_byte(
1172                        FIXED_HEADER_ACTIVITY_FLAGS,
1173                        flags_value['activity_flags'],
1174                        realstarttime, realendtime)
1175                else:
1176                    act_flag = 0
1177
1178                # Convert flags to bytes : i/o and clock
1179                if 'io_clock_flags' in flags_value:
1180                    io_clock_flag = _convert_flags_to_raw_byte(
1181                        FIXED_HEADER_IO_CLOCK_FLAGS,
1182                        flags_value['io_clock_flags'],
1183                        realstarttime, realendtime)
1184                else:
1185                    io_clock_flag = 0
1186
1187                # Convert flags to bytes : data quality flags
1188                if 'data_qual_flags' in flags_value:
1189                    data_qual_flag = _convert_flags_to_raw_byte(
1190                        FIXED_HEADER_DATA_QUAL_FLAGS,
1191                        flags_value['data_qual_flags'],
1192                        realstarttime, realendtime)
1193                else:
1194                    data_qual_flag = 0
1195
1196                flagsbytes = pack("BBB", act_flag,
1197                                  io_clock_flag, data_qual_flag)
1198                # Go back right before the fixed header flags
1199                mseed_file.seek(-8, os.SEEK_CUR)
1200                # Update flags*
1201                mseed_file.write(flagsbytes)
1202                # Seek to first blockette
1203                mseed_file.seek(9, os.SEEK_CUR)
1204            else:
1205                # Seek directly to first blockette
1206                mseed_file.seek(28, os.SEEK_CUR)
1207
1208            # Read record length in blockette 1000 to seek to the next record
1209            reclen_pow = _search_flag_in_blockette(mseed_file, 0, 1000, 6, 1)
1210
1211            if reclen_pow is None:
1212                msg = "Invalid MiniSEED file. No blockette 1000 was found."
1213                raise IOError(msg)
1214            else:
1215                reclen_pow = unpack(native_str("B"), reclen_pow)[0]
1216                reclen = 2**reclen_pow
1217                mseed_file.seek(record_start + reclen, os.SEEK_SET)
1218
1219
1220def _check_flag_value(flag_value):
1221    """
1222    Search for a given flag in a given blockette for the current record.
1223
1224    This is a utility function for set_flags_in_fixed_headers and is not
1225    designed to be called by someone else.
1226
1227    This function checks for valid entries for a flag. A flag can be either
1228    * ``bool`` value to be always True or False for all the records
1229    * ``datetime`` or ``UTCDateTime`` value to add a single 'INSTANT' datation
1230    (see below)
1231    * ``dict`` to allow complex flag datation
1232    ** The dict keys may be the keyword INSTANT to mark arbitrarly short
1233    duration flags, or the keyword DURATION to mark events that span across
1234    time.
1235    ** The dict values are:
1236    *** for the INSTANT value, a single UTCDateTime or datetime object, or a
1237    list of these datation objects
1238    *** for the DURATION value, either a list of
1239    [start1, end1, start2, end2, ...] or a list of tuples
1240    [(start1, end1), (start2, end2), ...]
1241
1242
1243    This function then returns all datation events as a list of tuples
1244    [(start1, end1), ...] to ease the work of _convert_flags_to_raw_byte. Bool
1245    values are unchanged, instant events become a tuple
1246    (event_date, event_date).
1247
1248    If the flag value is incorrect, a ValueError is raised with a (hopefully)
1249    explicit enough message.
1250
1251    :type flag_value: bool or dict
1252    :param flag_value: the flag value to check.
1253    :return: corrected value of the flag.
1254    :raises: If the flag is not the one expected, a ``ValueError`` is raised
1255    """
1256
1257    if isinstance(flag_value, bool):
1258        # bool allowed
1259        corrected_flag = flag_value
1260
1261    elif isinstance(flag_value, datetime) or \
1262            isinstance(flag_value, UTCDateTime):
1263        # A single instant value is allowed
1264        utc_val = UTCDateTime(flag_value)
1265        corrected_flag = [(utc_val, utc_val)]
1266
1267    elif isinstance(flag_value, collections_abc.Mapping):
1268        # dict allowed if it has the right format
1269        corrected_flag = []
1270        for flag_key in flag_value:
1271            if flag_key == "INSTANT":
1272                # Expected: list of UTCDateTime
1273                inst_values = flag_value[flag_key]
1274                if isinstance(inst_values, datetime) or \
1275                   isinstance(inst_values, UTCDateTime):
1276                    # Single value : ensure it's UTCDateTime and store it
1277                    utc_val = UTCDateTime(inst_values)
1278                    corrected_flag.append((utc_val, utc_val))
1279                elif isinstance(inst_values, collections_abc.Sequence):
1280                    # Several instant values : check their types
1281                    # and add each of them
1282                    for value in inst_values:
1283                        if isinstance(value, datetime) or \
1284                           isinstance(value, UTCDateTime):
1285                            utc_val = UTCDateTime(value)
1286                            corrected_flag.append((utc_val, utc_val))
1287                        else:
1288                            msg = "Unexpected type for flag duration " +\
1289                                  "'INSTANT' %s"
1290                            raise ValueError(msg % str(type(inst_values)))
1291                else:
1292                    msg = "Unexpected type for flag duration 'INSTANT' %s"
1293                    raise ValueError(msg % str(type(inst_values)))
1294
1295            elif flag_key == "DURATION":
1296                # Expecting either a list of tuples (start, end) or
1297                # a list of (start1, end1, start1, end1)
1298                dur_values = flag_value[flag_key]
1299                if isinstance(dur_values, collections_abc.Sequence):
1300                    if len(dur_values) != 0:
1301                        # Check first item
1302                        if isinstance(dur_values[0], datetime) or \
1303                           isinstance(dur_values[0], UTCDateTime):
1304                            # List of [start1, end1, start2, end2, etc]
1305                            # Check len
1306                            if len(dur_values) % 2 != 0:
1307                                msg = "Expected even length of duration " +\
1308                                      "values, got %s"
1309                                raise ValueError(msg % len(dur_values))
1310
1311                            # Add values
1312                            duration_iter = iter(dur_values)
1313                            for value in duration_iter:
1314                                start = value
1315                                end = dur_values[dur_values.index(value) + 1]
1316
1317                                # Check start type
1318                                if not isinstance(start, datetime) and \
1319                                   not isinstance(start, UTCDateTime):
1320                                    msg = "Incorrect type for duration " +\
1321                                          "start %s"
1322                                    raise ValueError(msg % str(type(start)))
1323
1324                                # Check end type
1325                                if not isinstance(end, datetime) and \
1326                                   not isinstance(end, UTCDateTime):
1327                                    msg = "Incorrect type for duration " +\
1328                                          "end %s"
1329                                    raise ValueError(msg % str(type(end)))
1330
1331                                # Check duration validity
1332                                start = UTCDateTime(start)
1333                                end = UTCDateTime(end)
1334                                if start <= end:
1335                                    corrected_flag.append((start, end))
1336                                else:
1337                                    msg = "Flag datation: expected end of " +\
1338                                          "duration after its start"
1339                                    raise ValueError(msg)
1340                                next(duration_iter)
1341
1342                        elif isinstance(dur_values[0],
1343                                        collections_abc.Sequence):
1344                            # List of tuples (start, end)
1345                            for value in dur_values:
1346                                if not isinstance(value,
1347                                                  collections_abc.Sequence):
1348                                    msg = "Incorrect type %s for flag duration"
1349                                    raise ValueError(msg % str(type(value)))
1350                                elif len(value) != 2:
1351                                    msg = "Incorrect len %s for flag duration"
1352                                    raise ValueError(msg % len(value))
1353                                else:
1354                                    start = value[0]
1355                                    end = value[1]
1356
1357                                    # Check start type
1358                                    if not isinstance(start, datetime) and \
1359                                       not isinstance(start, UTCDateTime):
1360                                        msg = "Incorrect type for duration " +\
1361                                              "start %s"
1362                                        raise ValueError(msg %
1363                                                         str(type(start)))
1364
1365                                    # Check end type
1366                                    if not isinstance(end, datetime) and \
1367                                       not isinstance(end, UTCDateTime):
1368                                        msg = "Incorrect type for duration " +\
1369                                              "end %s"
1370                                        raise ValueError(msg % str(type(end)))
1371                                    if start <= end:
1372                                        corrected_flag.append((start, end))
1373                                    else:
1374                                        msg = "Flag datation: expected end " +\
1375                                              "of duration after its start"
1376                                        raise ValueError(msg)
1377
1378                    # Else: len(dur_values) == 0, empty duration list:
1379                    # do nothing
1380                else:
1381                    msg = "Incorrect DURATION value: expected a list of " +\
1382                          "tuples (start, end), got %s"
1383                    raise ValueError(msg % str(type(dur_values)))
1384
1385            else:
1386                msg = "Invalid key %s for flag value. One of " +\
1387                      "'INSTANT', 'DURATION' is expected."
1388                raise ValueError(msg % flag_key)
1389    else:
1390        msg = "Invalid type %s for flag value. Allowed values " +\
1391              "are bool or dict"
1392        raise ValueError(msg % str(type(flag_value)))
1393
1394    return corrected_flag
1395
1396
1397def _search_flag_in_blockette(mseed_file_desc, first_blockette_offset,
1398                              blockette_number, field_offset, field_length):
1399    """
1400    Search for a given flag in a given blockette for the current record.
1401
1402    This is a utility function for set_flags_in_fixed_headers and is not
1403    designed to be called by someone else.
1404
1405    This function uses the file descriptor``mseed_file_desc``, seeks
1406    ``first_blockette_offset`` to go to the first blockette, reads through all
1407    the blockettes until it finds the one with number ``blockette_number``,
1408    then skips ``field_offset`` bytes to read ``field_length`` bytes and
1409    returns them. If the blockette does not exist, it returns None
1410
1411    Please note that this function does not decommute the binary value into an
1412    exploitable data (int, float, string, ...)
1413    :type mseed_file_desc: File object
1414    :param mseed_file_desc: a File descriptor to the current miniseed file.
1415    The value of mseed_file_desc.tell() is set back by this funcion before
1416    returning, use in multithread applications at your own risk.
1417    :type first_blockette_offset: int
1418    :param first_blockette_offset: tells the function where the first blockette
1419    of the record is compared to the mseed_file_desc current position in the
1420    file. A positive value means the blockette is after the current position.
1421    :type blockette_number: int
1422    :param blockette_number: the blockette number to search for
1423    :type field_offset: int
1424    :param field_offset: how many bytes we have to skip before attaining the
1425    wanted field. Please note that it also counts blockette number and next
1426    blockette index's field.
1427    :type field_length: int
1428    :param field_length: length of the wanted field, in bytes
1429    :return: bytes containing the field's value in this record
1430
1431    """
1432    previous_position = mseed_file_desc.tell()
1433
1434    try:
1435        # Go to first blockette
1436        mseed_file_desc.seek(first_blockette_offset, os.SEEK_CUR)
1437        mseed_record_start = mseed_file_desc.tell() - 48
1438        read_data = mseed_file_desc.read(4)
1439        # Read info in the first blockette
1440        [cur_blkt_number, next_blkt_offset] = unpack(native_str(">HH"),
1441                                                     read_data)
1442
1443        while cur_blkt_number != blockette_number \
1444                and next_blkt_offset != 0:
1445            # Nothing here, read next blockette
1446            mseed_file_desc.seek(mseed_record_start + next_blkt_offset,
1447                                 os.SEEK_SET)
1448            read_data = mseed_file_desc.read(4)
1449            [cur_blkt_number, next_blkt_offset] = unpack(native_str(">HH"),
1450                                                         read_data)
1451
1452        if cur_blkt_number == blockette_number:
1453            # Blockette found: we want to skip ``field_offset`` bytes but we
1454            # have already read 4 of the offset to get informations about the
1455            # current blockette, so we remove them from skipped data
1456            mseed_file_desc.seek(field_offset - 4, os.SEEK_CUR)
1457            returned_bytes = mseed_file_desc.read(field_length)
1458        else:
1459            returned_bytes = None
1460
1461    finally:
1462        mseed_file_desc.seek(previous_position, os.SEEK_SET)
1463
1464    return returned_bytes
1465
1466
1467def _convert_flags_to_raw_byte(expected_flags, user_flags, recstart, recend):
1468    """
1469    Converts a flag dictionary to a byte, ready to be encoded in a MiniSEED
1470    header.
1471
1472    This is a utility function for set_flags_in_fixed_headers and is not
1473    designed to be called by someone else.
1474
1475    expected_signals describes all the possible bit names for the user flags
1476    and their place in the result byte. Expected: dict { exponent: bit_name }.
1477    The fixed header flags are available in obspy.io.mseed.headers as
1478    FIXED_HEADER_ACTIVITY_FLAGS, FIXED_HEADER_DATA_QUAL_FLAGS and
1479    FIXED_HEADER_IO_CLOCK_FLAGS.
1480
1481    This expects a user_flags as a dictionary { bit_name : value }. bit_name is
1482    compared to the expected_signals, and its value is converted to bool.
1483    Missing values are considered false.
1484
1485    :type expected_flags: dict {int: str}
1486    :param expected_flags: every possible flag in this field, with its offset
1487    :type user_flags: dict {str: bool}
1488    :param user_flags: user defined flags and its value
1489    :type recstart: UTCDateTime
1490    :param recstart: date of the first sample of the current record
1491    :type recstart: UTCDateTime
1492    :param recend: date of the last sample of the current record
1493    :return: raw int value for the flag group
1494    """
1495
1496    flag_byte = 0
1497
1498    for (bit, key) in expected_flags.items():
1499        use_in_this_record = False
1500        if key in user_flags:
1501            if isinstance(user_flags[key], bool) and user_flags[key]:
1502                # Boolean value, we accept it for all records
1503                use_in_this_record = True
1504            elif isinstance(user_flags[key], collections_abc.Sequence):
1505                # List of tuples (start, end)
1506                use_in_this_record = False
1507                for tuple_value in user_flags[key]:
1508                    # Check wether this record is concerned
1509                    event_start = tuple_value[0]
1510                    event_end = tuple_value[1]
1511
1512                    if(event_start < recend) and (recstart <= event_end):
1513                        use_in_this_record = True
1514                        break
1515
1516        if use_in_this_record:
1517            flag_byte += 2**bit
1518
1519    return flag_byte
1520
1521
1522def shift_time_of_file(input_file, output_file, timeshift):
1523    """
1524    Takes a MiniSEED file and shifts the time of every record by the given
1525    amount.
1526
1527    The same could be achieved by reading the MiniSEED file with ObsPy,
1528    modifying the starttime and writing it again. The problem with this
1529    approach is that some record specific flags and special blockettes might
1530    not be conserved. This function directly operates on the file and simply
1531    changes some header fields, not touching the rest, thus preserving it.
1532
1533    Will only work correctly if all records have the same record length which
1534    usually should be the case.
1535
1536    All times are in 0.0001 seconds, that is in 1/10000 seconds. NOT ms but one
1537    order of magnitude smaller! This is due to the way time corrections are
1538    stored in the MiniSEED format.
1539
1540    :type input_file: str
1541    :param input_file: The input filename.
1542    :type output_file: str
1543    :param output_file: The output filename.
1544    :type timeshift: int
1545    :param timeshift: The time-shift to be applied in 0.0001, e.g. 1E-4
1546        seconds. Use an integer number.
1547
1548    Please do NOT use identical input and output files because if something
1549    goes wrong, your data WILL be corrupted/destroyed. Also always check the
1550    resulting output file.
1551
1552    .. rubric:: Technical details
1553
1554    The function will loop over every record and change the "Time correction"
1555    field in the fixed section of the MiniSEED data header by the specified
1556    amount. Unfortunately a further flag (bit 1 in the "Activity flags" field)
1557    determines whether or not the time correction has already been applied to
1558    the record start time. If it has not, all is fine and changing the "Time
1559    correction" field is enough. Otherwise the actual time also needs to be
1560    changed.
1561
1562    One further detail: If bit 1 in the "Activity flags" field is 1 (True) and
1563    the "Time correction" field is 0, then the bit will be set to 0 and only
1564    the time correction will be changed thus avoiding the need to change the
1565    record start time which is preferable.
1566    """
1567    timeshift = int(timeshift)
1568    # A timeshift of zero makes no sense.
1569    if timeshift == 0:
1570        msg = "The timeshift must to be not equal to 0."
1571        raise ValueError(msg)
1572
1573    # Get the necessary information from the file.
1574    info = get_record_information(input_file)
1575    record_length = info["record_length"]
1576
1577    byteorder = info["byteorder"]
1578    sys_byteorder = "<" if (sys.byteorder == "little") else ">"
1579    do_swap = False if (byteorder == sys_byteorder) else True
1580
1581    # This is in this scenario somewhat easier to use than BytesIO because one
1582    # can directly modify the data array.
1583    data = np.fromfile(input_file, dtype=np.uint8)
1584    array_length = len(data)
1585    record_offset = 0
1586    # Loop over every record.
1587    while True:
1588        remaining_bytes = array_length - record_offset
1589        if remaining_bytes < 48:
1590            if remaining_bytes > 0:
1591                msg = "%i excessive byte(s) in the file. " % remaining_bytes
1592                msg += "They will be appended to the output file."
1593                warnings.warn(msg)
1594            break
1595        # Use a slice for the current record.
1596        current_record = data[record_offset: record_offset + record_length]
1597        record_offset += record_length
1598
1599        activity_flags = current_record[36]
1600        is_time_correction_applied = bool(activity_flags & 2)
1601
1602        current_time_shift = current_record[40:44]
1603        current_time_shift.dtype = np.int32
1604        if do_swap:
1605            current_time_shift = current_time_shift.byteswap(False)
1606        current_time_shift = current_time_shift[0]
1607
1608        # If the time correction has been applied, but there is no actual
1609        # time correction, then simply set the time correction applied
1610        # field to false and process normally.
1611        # This should rarely be the case.
1612        if current_time_shift == 0 and is_time_correction_applied:
1613            # This sets bit 2 of the activity flags to 0.
1614            current_record[36] = current_record[36] & (~2)
1615            is_time_correction_applied = False
1616        # This is the case if the time correction has been applied. This
1617        # requires some more work by changing both, the actual time and the
1618        # time correction field.
1619        elif is_time_correction_applied:
1620            msg = "The timeshift can only be applied by actually changing the "
1621            msg += "time. This is experimental. Please make sure the output "
1622            msg += "file is correct."
1623            warnings.warn(msg)
1624            # The whole process is not particularly fast or optimized but
1625            # instead intends to avoid errors.
1626            # Get the time variables.
1627            time = current_record[20:30]
1628            year = time[0:2]
1629            julday = time[2:4]
1630            hour = time[4:5]
1631            minute = time[5:6]
1632            second = time[6:7]
1633            msecs = time[8:10]
1634            # Change dtype of multibyte values.
1635            year.dtype = np.uint16
1636            julday.dtype = np.uint16
1637            msecs.dtype = np.uint16
1638            if do_swap:
1639                year = year.byteswap(False)
1640                julday = julday.byteswap(False)
1641                msecs = msecs.byteswap(False)
1642            dtime = UTCDateTime(year=year[0], julday=julday[0], hour=hour[0],
1643                                minute=minute[0], second=second[0],
1644                                microsecond=msecs[0] * 100)
1645            dtime += (float(timeshift) / 10000)
1646            year[0] = dtime.year
1647            julday[0] = dtime.julday
1648            hour[0] = dtime.hour
1649            minute[0] = dtime.minute
1650            second[0] = dtime.second
1651            msecs[0] = dtime.microsecond / 100
1652            # Swap again.
1653            if do_swap:
1654                year = year.byteswap(False)
1655                julday = julday.byteswap(False)
1656                msecs = msecs.byteswap(False)
1657            # Change dtypes back.
1658            year.dtype = np.uint8
1659            julday.dtype = np.uint8
1660            msecs.dtype = np.uint8
1661            # Write to current record.
1662            time[0:2] = year[:]
1663            time[2:4] = julday[:]
1664            time[4] = hour[:]
1665            time[5] = minute[:]
1666            time[6] = second[:]
1667            time[8:10] = msecs[:]
1668            current_record[20:30] = time[:]
1669
1670        # Now modify the time correction flag.
1671        current_time_shift += timeshift
1672        current_time_shift = np.array([current_time_shift], np.int32)
1673        if do_swap:
1674            current_time_shift = current_time_shift.byteswap(False)
1675        current_time_shift.dtype = np.uint8
1676        current_record[40:44] = current_time_shift[:]
1677
1678    # Write to the output file.
1679    data.tofile(output_file)
1680
1681
1682def _convert_and_check_encoding_for_writing(encoding):
1683    """
1684    Helper function to handle and test encodings.
1685
1686    If encoding is a string, it will be converted to the appropriate
1687    integer. It will furthermore be checked if the specified encoding can be
1688    written using libmseed. Appropriate errors will be raised if necessary.
1689    """
1690    # Check if encoding kwarg is set and catch invalid encodings.
1691    encoding_strings = {v[0]: k for k, v in ENCODINGS.items()}
1692
1693    try:
1694        encoding = int(encoding)
1695    except Exception:
1696        pass
1697
1698    if isinstance(encoding, int):
1699        if (encoding in ENCODINGS and ENCODINGS[encoding][3] is False) or \
1700                encoding in UNSUPPORTED_ENCODINGS:
1701            msg = ("Encoding %i cannot be written with ObsPy. Please "
1702                   "use another encoding.") % encoding
1703            raise ValueError(msg)
1704        elif encoding not in ENCODINGS:
1705            raise ValueError("Unknown encoding: %i." % encoding)
1706    else:
1707        if encoding not in encoding_strings:
1708            raise ValueError("Unknown encoding: '%s'." % str(encoding))
1709        elif ENCODINGS[encoding_strings[encoding]][3] is False:
1710            msg = ("Encoding '%s' cannot be written with ObsPy. Please "
1711                   "use another encoding.") % encoding
1712            raise ValueError(msg)
1713        encoding = encoding_strings[encoding]
1714    return encoding
1715
1716
1717def get_timing_and_data_quality(file_or_file_object):
1718    warnings.warn("The obspy.io.mseed.util.get_timing_and_data_quality() "
1719                  "function is deprecated and will be removed with the next "
1720                  "release. Please use the "
1721                  "improved obspy.io.mseed.util.get_flags() method instead.",
1722                  ObsPyDeprecationWarning)
1723    flags = get_flags(files=file_or_file_object, io_flags=False,
1724                      activity_flags=False, data_quality_flags=True,
1725                      timing_quality=True)
1726
1727    ret_val = {}
1728    ret_val["data_quality_flags"] = \
1729        list(flags["data_quality_flags_counts"].values())
1730
1731    if flags["timing_quality"]:
1732        tq = flags["timing_quality"]
1733        ret_val["timing_quality_average"] = tq["mean"]
1734        ret_val["timing_quality_lower_quantile"] = tq["lower_quartile"]
1735        ret_val["timing_quality_max"] = tq["max"]
1736        ret_val["timing_quality_median"] = tq["median"]
1737        ret_val["timing_quality_min"] = tq["min"]
1738        ret_val["timing_quality_upper_quantile"] = tq["lower_quartile"]
1739
1740    return ret_val
1741
1742
1743if __name__ == '__main__':
1744    import doctest
1745    doctest.testmod(exclude_empty=True)
1746