1# -*- coding: utf-8 -*-
2"""
3REFTEK130 read support, core routines.
4"""
5from __future__ import (absolute_import, division, print_function,
6                        unicode_literals)
7from future.builtins import *  # NOQA
8from future.utils import native_str
9
10import copy
11import io
12import os
13import warnings
14
15import numpy as np
16
17from obspy import Trace, Stream, UTCDateTime
18from obspy.core.util.obspy_types import ObsPyException
19
20from .packet import (Packet, EHPacket, _initial_unpack_packets, PACKET_TYPES,
21                     PACKET_TYPES_IMPLEMENTED, PACKET_FINAL_DTYPE,
22                     Reftek130UnpackPacketError, _unpack_C0_C2_data)
23
24
25NOW = UTCDateTime()
26
27
28class Reftek130Exception(ObsPyException):
29    pass
30
31
32def _is_reftek130(filename):
33    """
34    Checks whether a file is REFTEK130 format or not.
35
36    :type filename: str
37    :param filename: REFTEK130 file to be checked.
38    :rtype: bool
39    :return: ``True`` if a REFTEK130 file.
40
41    Checks if overall length of file is consistent (i.e. multiple of 1024
42    bytes) and checks for valid packet type identifiers in the first 20
43    expected packet positions.
44    """
45    if not os.path.isfile(filename):
46        return False
47    filesize = os.stat(filename).st_size
48    # check if overall file size is a multiple of 1024
49    if filesize < 1024 or filesize % 1024 != 0:
50        return False
51
52    with open(filename, 'rb') as fp:
53        # check first 20 expected packets' type header field
54        for i in range(20):
55            packet_type = fp.read(2).decode("ASCII", "replace")
56            if not packet_type:
57                # reached end of file..
58                break
59            if packet_type not in PACKET_TYPES:
60                return False
61            fp.seek(1022, os.SEEK_CUR)
62    return True
63
64
65def _read_reftek130(filename, network="", location="", component_codes=None,
66                    headonly=False, verbose=False,
67                    sort_permuted_package_sequence=False, **kwargs):
68    """
69    Read a REFTEK130 file into an ObsPy Stream.
70
71    :type filename: str
72    :param filename: REFTEK130 file to be read.
73    :type network: str
74    :param network: Network code to fill in for all data (network code is not
75        stored in EH/ET/DT packets).
76    :type location: str
77    :param location: Location code to fill in for all data (network code is not
78        stored in EH/ET/DT packets).
79    :type component_codes: list
80    :param component_codes: Iterable of single-character component codes (e.g.
81        ``['Z', 'N', 'E']``) to be appended to two-character stream name parsed
82        from event header packet (e.g. ``'HH'``) for each of the channels in
83        the data (e.g. to make the channel codes in a three channel data file
84        to ``'HHZ'``, ``'HHN'``, ``'HHE'`` in the created stream object).
85    :type headonly: bool
86    :param headonly: Determines whether or not to unpack the data or just
87        read the headers.
88    :type sort_permuted_package_sequence: bool
89    :param sort_permuted_package_sequence: Determines whether or not the
90        package list is sorted when a permuted package sequence is encountered.
91        This should only be used if problems occur with files that have a
92        permuted package sequence (showing the related warning message).
93    :rtype: :class:`~obspy.core.stream.Stream`
94    """
95    # Reftek 130 data format stores only the last two digits of the year.  We
96    # currently assume that 00-49 are years 2000-2049 and 50-99 are years
97    # 2050-2099. We deliberately raise an exception if the current year will
98    # become 2050 (just in case someone really still uses this code then.. ;-)
99    # At that point the year would probably have to be explicitly specified
100    # when reading data to be on the safe side.
101    if NOW.year > 2050:
102        raise NotImplementedError()
103    try:
104        rt130 = Reftek130.from_file(filename)
105        st = rt130.to_stream(
106            network=network, location=location,
107            component_codes=component_codes, headonly=headonly,
108            verbose=verbose,
109            sort_permuted_package_sequence=sort_permuted_package_sequence)
110        st.merge(-1)
111        st.sort()
112        return st
113    except Reftek130UnpackPacketError:
114        msg = ("Unable to read file '{}' as a Reftek130 file. Please contact "
115               "developers if you think this is a valid Reftek130 file.")
116        raise Reftek130Exception(msg.format(filename))
117
118
119class Reftek130(object):
120    _info_header = "Reftek130 ({:d} packets{})"
121    _info_compact_header = [
122        "Packet Sequence  Byte Count  Data Fmt  Sampling Rate      Time",
123        "  | Packet Type   |  Event #  | Station | Channel #         |",
124        "  |   |  Unit ID  |    | Data Stream #  |   |  # of samples |",
125        "  |   |   |  Exper.#   |   |  |  |      |   |    |          |"]
126    _info_compact_footer = ("(detailed packet information with: "
127                            "'print(Reftek130.__str__(compact=False))')")
128
129    def __init__(self):
130        self._data = np.array([], dtype=PACKET_FINAL_DTYPE)
131        self._filename = None
132
133    def __str__(self, compact=True):
134        filename = self._filename and ', file: {}'.format(self._filename) or ''
135        info = [self._info_header.format(len(self._data), filename)]
136        if compact:
137            info += copy.deepcopy(self._info_compact_header)
138            info[0] = info[0].format(len(self._data))
139            for data in self._data:
140                info.append(Packet.from_data(data).__str__(compact=True))
141            info.append(self._info_compact_footer)
142        else:
143            for data in self._data:
144                info.append(str(Packet.from_data(data)))
145        return "\n".join(info)
146
147    @staticmethod
148    def from_file(filename):
149        with io.open(filename, "rb") as fh:
150            string = fh.read()
151        rt = Reftek130()
152        rt._data = _initial_unpack_packets(string)
153        rt._filename = filename
154        return rt
155
156    def check_packet_sequence_and_sort(self, sort_permuted_package_sequence):
157        """
158        Checks if packet sequence is ordered. If not, shows a warning and sorts
159        packets by packet sequence if ``sort_permuted_package_sequence=True``.
160        This should ensure that data (DT) packets are properly enclosed by the
161        appropriate event header/trailer (EH/ET) packets.
162        """
163        diff = np.diff(self._data['packet_sequence'].astype(np.int16))
164        # rollover from 9999 to 0 is not a packet sequence jump..
165        jump = (diff < 1) & (diff != -9999)
166        if np.any(jump):
167            msg = ("Detected permuted packet sequence, sorting.")
168            warnings.warn(msg)
169            if sort_permuted_package_sequence:
170                self._data.sort(order=[
171                    native_str(key) for key in ("packet_sequence", "time")])
172
173    def check_packet_sequence_contiguous(self):
174        """
175        Checks if packet sequence is contiguous, i.e. without missing packets
176        in between. Currently raises if that is the case because this case is
177        not covered by test data yet.
178        """
179        diff = np.diff(self._data['packet_sequence'].astype(np.int16))
180        if np.any(diff > 1):
181            msg = ("Detected a non-contiguous packet sequence!")
182            warnings.warn(msg)
183
184    def drop_not_implemented_packet_types(self):
185        """
186        Checks if there are packets of a type that is currently not implemented
187        and drop them showing a warning message.
188        """
189        is_implemented = np.in1d(
190            self._data['packet_type'],
191            [x.encode() for x in PACKET_TYPES_IMPLEMENTED])
192        # if all packets are of a type that is implemented, the nothing to do..
193        if np.all(is_implemented):
194            return
195        # otherwise reduce packet list to what is implemented and warn
196        not_implemented = np.invert(is_implemented)
197        count_not_implemented = not_implemented.sum()
198        types_not_implemented = np.unique(
199            self._data['packet_type'][not_implemented])
200        msg = ("Encountered some packets of types that are not "
201               "implemented yet (types: {}). Dropped {:d} packets "
202               "overall.")
203        msg = msg.format(types_not_implemented.tolist(),
204                         count_not_implemented)
205        warnings.warn(msg)
206        self._data = self._data[is_implemented]
207
208    def to_stream(self, network="", location="", component_codes=None,
209                  headonly=False, verbose=False,
210                  sort_permuted_package_sequence=False):
211        """
212        :type headonly: bool
213        :param headonly: Determines whether or not to unpack the data or just
214            read the headers.
215        """
216        if verbose:
217            print(self)
218        if not len(self._data):
219            msg = "No packet data in Reftek130 object (file: {})"
220            raise Reftek130Exception(msg.format(self._filename))
221        self.check_packet_sequence_and_sort(sort_permuted_package_sequence)
222        self.check_packet_sequence_contiguous()
223        self.drop_not_implemented_packet_types()
224        if not len(self._data):
225            msg = ("No packet data left in Reftek130 object after dropping "
226                   "non-implemented packets (file: {})").format(self._filename)
227            raise Reftek130Exception(msg)
228        st = Stream()
229        for event_number in np.unique(self._data['event_number']):
230            data = self._data[self._data['event_number'] == event_number]
231            # we should have exactly one EH and one ET packet, truncated data
232            # sometimes misses the header or trailer packet.
233            eh_packets = data[data['packet_type'] == b"EH"]
234            et_packets = data[data['packet_type'] == b"ET"]
235            if len(eh_packets) == 0 and len(et_packets) == 0:
236                msg = ("Reftek data contains data packets without "
237                       "corresponding header or trailer packet.")
238                raise Reftek130Exception(msg)
239            if len(eh_packets) > 1 or len(et_packets) > 1:
240                msg = ("Reftek data contains data packets with multiple "
241                       "corresponding header or trailer packets.")
242                raise Reftek130Exception(msg)
243            if len(eh_packets) != 1:
244                msg = ("No event header (EH) packets in packet sequence. "
245                       "File might be truncated.")
246                warnings.warn(msg)
247            if len(et_packets) != 1:
248                msg = ("No event trailer (ET) packets in packet sequence. "
249                       "File might be truncated.")
250                warnings.warn(msg)
251            # use either the EH or ET packet, they have the same content (only
252            # trigger stop time is not in EH)
253            if len(eh_packets):
254                eh = EHPacket(eh_packets[0])
255            else:
256                eh = EHPacket(et_packets[0])
257            # only C0, C2, 16, 32 encodings supported right now
258            if eh.data_format == b"C0":
259                encoding = 'C0'
260            elif eh.data_format == b"C2":
261                encoding = 'C2'
262            elif eh.data_format == b"16":
263                msg = ("Reftek130 encoding '16' is implemented but untested, "
264                       "please provide example data for testing at "
265                       "https://github.com/obspy/obspy/issues/new.")
266                warnings.warn(msg)
267                encoding = '16'
268            elif eh.data_format == b"32":
269                encoding = '32'
270            else:
271                msg = ("Reftek data encoding '{}' not implemented yet. Please "
272                       "open an issue on GitHub and provide a small (< 50kb) "
273                       "test file.").format(eh.data_format)
274                raise NotImplementedError(msg)
275            header = {
276                "network": network,
277                "station": (eh.station_name +
278                            eh.station_name_extension).strip(),
279                "location": location, "sampling_rate": eh.sampling_rate,
280                "reftek130": eh._to_dict()}
281            delta = 1.0 / eh.sampling_rate
282            delta_nanoseconds = int(delta * 1e9)
283            inds_dt = data['packet_type'] == b"DT"
284            data_channels = np.unique(data[inds_dt]['channel_number'])
285            for channel_number in data_channels:
286                inds = data['channel_number'] == channel_number
287                # channel number of EH/ET packets also equals zero (one of the
288                # three unused bytes in the extended header of EH/ET packets)
289                inds &= data['packet_type'] == b"DT"
290                packets = data[inds]
291
292                # split into contiguous blocks, i.e. find gaps. packet sequence
293                # was sorted already..
294                endtimes = (
295                    packets[:-1]["time"] +
296                    packets[:-1]["number_of_samples"].astype(np.int64) *
297                    delta_nanoseconds)
298                # check if next starttime matches seamless to last chunk
299                # 1e-3 seconds == 1e6 nanoseconds is the smallest time
300                # difference reftek130 format can represent, so anything larger
301                # or equal means a gap/overlap.
302                time_diffs_milliseconds_abs = np.abs(
303                    packets[1:]["time"] - endtimes) / 1000000
304                gaps = time_diffs_milliseconds_abs >= 1
305                if np.any(gaps):
306                    gap_split_indices = np.nonzero(gaps)[0] + 1
307                    contiguous = np.array_split(packets, gap_split_indices)
308                else:
309                    contiguous = [packets]
310
311                for packets_ in contiguous:
312                    starttime = packets_[0]['time']
313
314                    if headonly:
315                        sample_data = np.array([], dtype=np.int32)
316                        npts = packets_["number_of_samples"].sum()
317                    else:
318                        if encoding in ('C0', 'C2'):
319                            sample_data = _unpack_C0_C2_data(packets_,
320                                                             encoding)
321                        elif encoding in ('16', '32'):
322                            dtype = {'16': np.int16, '32': np.int32}[encoding]
323                            # just fix endianness and use correct dtype
324                            sample_data = np.require(
325                                packets_['payload'],
326                                requirements=['C_CONTIGUOUS'])
327                            # either int16 or int32
328                            sample_data = sample_data.flatten().view(dtype)
329                            # switch endianness, rt130 stores in big endian
330                            sample_data = sample_data.byteswap()
331                        npts = len(sample_data)
332
333                    tr = Trace(data=sample_data, header=copy.deepcopy(header))
334                    # channel number is not included in the EH/ET packet
335                    # payload, so add it to stats as well..
336                    tr.stats.reftek130['channel_number'] = channel_number
337                    if headonly:
338                        tr.stats.npts = npts
339                    tr.stats.starttime = UTCDateTime(ns=starttime)
340                    # if component codes were explicitly provided, use them
341                    # together with the stream label
342                    if component_codes is not None:
343                        tr.stats.channel = (eh.stream_name.strip() +
344                                            component_codes[channel_number])
345                    # otherwise check if channel code is set for the given
346                    # channel (seems to be not the case usually)
347                    elif eh.channel_code[channel_number] is not None:
348                        tr.stats.channel = eh.channel_code[channel_number]
349                    # otherwise fall back to using the stream label together
350                    # with the number of the channel in the file (starting with
351                    # 0, as Z-1-2 is common use for data streams not oriented
352                    # against North)
353                    else:
354                        msg = ("No channel code specified in the data file "
355                               "and no component codes specified. Using "
356                               "stream label and number of channel in file as "
357                               "channel codes.")
358                        warnings.warn(msg)
359                        tr.stats.channel = (
360                            eh.stream_name.strip() + str(channel_number))
361                    # check if endtime of trace is consistent
362                    t_last = packets_[-1]['time']
363                    npts_last = packets_[-1]['number_of_samples']
364                    try:
365                        if not headonly:
366                            assert npts == len(sample_data)
367                        if npts_last:
368                            assert tr.stats.endtime == UTCDateTime(
369                                ns=t_last) + (npts_last - 1) * delta
370                        if npts:
371                            assert tr.stats.endtime == (
372                                tr.stats.starttime + (npts - 1) * delta)
373                    except AssertionError:
374                        msg = ("Reftek file has a trace with an inconsistent "
375                               "endtime or number of samples. Please open an "
376                               "issue on GitHub and provide your file for"
377                               "testing.")
378                        raise Reftek130Exception(msg)
379                    st += tr
380
381        return st
382
383
384if __name__ == '__main__':
385    import doctest
386    doctest.testmod(exclude_empty=True)
387