1# -*- coding: utf-8 -*- 2""" 3REFTEK130 read support, core routines. 4""" 5from __future__ import (absolute_import, division, print_function, 6 unicode_literals) 7from future.builtins import * # NOQA 8from future.utils import native_str 9 10import copy 11import io 12import os 13import warnings 14 15import numpy as np 16 17from obspy import Trace, Stream, UTCDateTime 18from obspy.core.util.obspy_types import ObsPyException 19 20from .packet import (Packet, EHPacket, _initial_unpack_packets, PACKET_TYPES, 21 PACKET_TYPES_IMPLEMENTED, PACKET_FINAL_DTYPE, 22 Reftek130UnpackPacketError, _unpack_C0_C2_data) 23 24 25NOW = UTCDateTime() 26 27 28class Reftek130Exception(ObsPyException): 29 pass 30 31 32def _is_reftek130(filename): 33 """ 34 Checks whether a file is REFTEK130 format or not. 35 36 :type filename: str 37 :param filename: REFTEK130 file to be checked. 38 :rtype: bool 39 :return: ``True`` if a REFTEK130 file. 40 41 Checks if overall length of file is consistent (i.e. multiple of 1024 42 bytes) and checks for valid packet type identifiers in the first 20 43 expected packet positions. 44 """ 45 if not os.path.isfile(filename): 46 return False 47 filesize = os.stat(filename).st_size 48 # check if overall file size is a multiple of 1024 49 if filesize < 1024 or filesize % 1024 != 0: 50 return False 51 52 with open(filename, 'rb') as fp: 53 # check first 20 expected packets' type header field 54 for i in range(20): 55 packet_type = fp.read(2).decode("ASCII", "replace") 56 if not packet_type: 57 # reached end of file.. 58 break 59 if packet_type not in PACKET_TYPES: 60 return False 61 fp.seek(1022, os.SEEK_CUR) 62 return True 63 64 65def _read_reftek130(filename, network="", location="", component_codes=None, 66 headonly=False, verbose=False, 67 sort_permuted_package_sequence=False, **kwargs): 68 """ 69 Read a REFTEK130 file into an ObsPy Stream. 70 71 :type filename: str 72 :param filename: REFTEK130 file to be read. 73 :type network: str 74 :param network: Network code to fill in for all data (network code is not 75 stored in EH/ET/DT packets). 76 :type location: str 77 :param location: Location code to fill in for all data (network code is not 78 stored in EH/ET/DT packets). 79 :type component_codes: list 80 :param component_codes: Iterable of single-character component codes (e.g. 81 ``['Z', 'N', 'E']``) to be appended to two-character stream name parsed 82 from event header packet (e.g. ``'HH'``) for each of the channels in 83 the data (e.g. to make the channel codes in a three channel data file 84 to ``'HHZ'``, ``'HHN'``, ``'HHE'`` in the created stream object). 85 :type headonly: bool 86 :param headonly: Determines whether or not to unpack the data or just 87 read the headers. 88 :type sort_permuted_package_sequence: bool 89 :param sort_permuted_package_sequence: Determines whether or not the 90 package list is sorted when a permuted package sequence is encountered. 91 This should only be used if problems occur with files that have a 92 permuted package sequence (showing the related warning message). 93 :rtype: :class:`~obspy.core.stream.Stream` 94 """ 95 # Reftek 130 data format stores only the last two digits of the year. We 96 # currently assume that 00-49 are years 2000-2049 and 50-99 are years 97 # 2050-2099. We deliberately raise an exception if the current year will 98 # become 2050 (just in case someone really still uses this code then.. ;-) 99 # At that point the year would probably have to be explicitly specified 100 # when reading data to be on the safe side. 101 if NOW.year > 2050: 102 raise NotImplementedError() 103 try: 104 rt130 = Reftek130.from_file(filename) 105 st = rt130.to_stream( 106 network=network, location=location, 107 component_codes=component_codes, headonly=headonly, 108 verbose=verbose, 109 sort_permuted_package_sequence=sort_permuted_package_sequence) 110 st.merge(-1) 111 st.sort() 112 return st 113 except Reftek130UnpackPacketError: 114 msg = ("Unable to read file '{}' as a Reftek130 file. Please contact " 115 "developers if you think this is a valid Reftek130 file.") 116 raise Reftek130Exception(msg.format(filename)) 117 118 119class Reftek130(object): 120 _info_header = "Reftek130 ({:d} packets{})" 121 _info_compact_header = [ 122 "Packet Sequence Byte Count Data Fmt Sampling Rate Time", 123 " | Packet Type | Event # | Station | Channel # |", 124 " | | Unit ID | | Data Stream # | | # of samples |", 125 " | | | Exper.# | | | | | | | |"] 126 _info_compact_footer = ("(detailed packet information with: " 127 "'print(Reftek130.__str__(compact=False))')") 128 129 def __init__(self): 130 self._data = np.array([], dtype=PACKET_FINAL_DTYPE) 131 self._filename = None 132 133 def __str__(self, compact=True): 134 filename = self._filename and ', file: {}'.format(self._filename) or '' 135 info = [self._info_header.format(len(self._data), filename)] 136 if compact: 137 info += copy.deepcopy(self._info_compact_header) 138 info[0] = info[0].format(len(self._data)) 139 for data in self._data: 140 info.append(Packet.from_data(data).__str__(compact=True)) 141 info.append(self._info_compact_footer) 142 else: 143 for data in self._data: 144 info.append(str(Packet.from_data(data))) 145 return "\n".join(info) 146 147 @staticmethod 148 def from_file(filename): 149 with io.open(filename, "rb") as fh: 150 string = fh.read() 151 rt = Reftek130() 152 rt._data = _initial_unpack_packets(string) 153 rt._filename = filename 154 return rt 155 156 def check_packet_sequence_and_sort(self, sort_permuted_package_sequence): 157 """ 158 Checks if packet sequence is ordered. If not, shows a warning and sorts 159 packets by packet sequence if ``sort_permuted_package_sequence=True``. 160 This should ensure that data (DT) packets are properly enclosed by the 161 appropriate event header/trailer (EH/ET) packets. 162 """ 163 diff = np.diff(self._data['packet_sequence'].astype(np.int16)) 164 # rollover from 9999 to 0 is not a packet sequence jump.. 165 jump = (diff < 1) & (diff != -9999) 166 if np.any(jump): 167 msg = ("Detected permuted packet sequence, sorting.") 168 warnings.warn(msg) 169 if sort_permuted_package_sequence: 170 self._data.sort(order=[ 171 native_str(key) for key in ("packet_sequence", "time")]) 172 173 def check_packet_sequence_contiguous(self): 174 """ 175 Checks if packet sequence is contiguous, i.e. without missing packets 176 in between. Currently raises if that is the case because this case is 177 not covered by test data yet. 178 """ 179 diff = np.diff(self._data['packet_sequence'].astype(np.int16)) 180 if np.any(diff > 1): 181 msg = ("Detected a non-contiguous packet sequence!") 182 warnings.warn(msg) 183 184 def drop_not_implemented_packet_types(self): 185 """ 186 Checks if there are packets of a type that is currently not implemented 187 and drop them showing a warning message. 188 """ 189 is_implemented = np.in1d( 190 self._data['packet_type'], 191 [x.encode() for x in PACKET_TYPES_IMPLEMENTED]) 192 # if all packets are of a type that is implemented, the nothing to do.. 193 if np.all(is_implemented): 194 return 195 # otherwise reduce packet list to what is implemented and warn 196 not_implemented = np.invert(is_implemented) 197 count_not_implemented = not_implemented.sum() 198 types_not_implemented = np.unique( 199 self._data['packet_type'][not_implemented]) 200 msg = ("Encountered some packets of types that are not " 201 "implemented yet (types: {}). Dropped {:d} packets " 202 "overall.") 203 msg = msg.format(types_not_implemented.tolist(), 204 count_not_implemented) 205 warnings.warn(msg) 206 self._data = self._data[is_implemented] 207 208 def to_stream(self, network="", location="", component_codes=None, 209 headonly=False, verbose=False, 210 sort_permuted_package_sequence=False): 211 """ 212 :type headonly: bool 213 :param headonly: Determines whether or not to unpack the data or just 214 read the headers. 215 """ 216 if verbose: 217 print(self) 218 if not len(self._data): 219 msg = "No packet data in Reftek130 object (file: {})" 220 raise Reftek130Exception(msg.format(self._filename)) 221 self.check_packet_sequence_and_sort(sort_permuted_package_sequence) 222 self.check_packet_sequence_contiguous() 223 self.drop_not_implemented_packet_types() 224 if not len(self._data): 225 msg = ("No packet data left in Reftek130 object after dropping " 226 "non-implemented packets (file: {})").format(self._filename) 227 raise Reftek130Exception(msg) 228 st = Stream() 229 for event_number in np.unique(self._data['event_number']): 230 data = self._data[self._data['event_number'] == event_number] 231 # we should have exactly one EH and one ET packet, truncated data 232 # sometimes misses the header or trailer packet. 233 eh_packets = data[data['packet_type'] == b"EH"] 234 et_packets = data[data['packet_type'] == b"ET"] 235 if len(eh_packets) == 0 and len(et_packets) == 0: 236 msg = ("Reftek data contains data packets without " 237 "corresponding header or trailer packet.") 238 raise Reftek130Exception(msg) 239 if len(eh_packets) > 1 or len(et_packets) > 1: 240 msg = ("Reftek data contains data packets with multiple " 241 "corresponding header or trailer packets.") 242 raise Reftek130Exception(msg) 243 if len(eh_packets) != 1: 244 msg = ("No event header (EH) packets in packet sequence. " 245 "File might be truncated.") 246 warnings.warn(msg) 247 if len(et_packets) != 1: 248 msg = ("No event trailer (ET) packets in packet sequence. " 249 "File might be truncated.") 250 warnings.warn(msg) 251 # use either the EH or ET packet, they have the same content (only 252 # trigger stop time is not in EH) 253 if len(eh_packets): 254 eh = EHPacket(eh_packets[0]) 255 else: 256 eh = EHPacket(et_packets[0]) 257 # only C0, C2, 16, 32 encodings supported right now 258 if eh.data_format == b"C0": 259 encoding = 'C0' 260 elif eh.data_format == b"C2": 261 encoding = 'C2' 262 elif eh.data_format == b"16": 263 msg = ("Reftek130 encoding '16' is implemented but untested, " 264 "please provide example data for testing at " 265 "https://github.com/obspy/obspy/issues/new.") 266 warnings.warn(msg) 267 encoding = '16' 268 elif eh.data_format == b"32": 269 encoding = '32' 270 else: 271 msg = ("Reftek data encoding '{}' not implemented yet. Please " 272 "open an issue on GitHub and provide a small (< 50kb) " 273 "test file.").format(eh.data_format) 274 raise NotImplementedError(msg) 275 header = { 276 "network": network, 277 "station": (eh.station_name + 278 eh.station_name_extension).strip(), 279 "location": location, "sampling_rate": eh.sampling_rate, 280 "reftek130": eh._to_dict()} 281 delta = 1.0 / eh.sampling_rate 282 delta_nanoseconds = int(delta * 1e9) 283 inds_dt = data['packet_type'] == b"DT" 284 data_channels = np.unique(data[inds_dt]['channel_number']) 285 for channel_number in data_channels: 286 inds = data['channel_number'] == channel_number 287 # channel number of EH/ET packets also equals zero (one of the 288 # three unused bytes in the extended header of EH/ET packets) 289 inds &= data['packet_type'] == b"DT" 290 packets = data[inds] 291 292 # split into contiguous blocks, i.e. find gaps. packet sequence 293 # was sorted already.. 294 endtimes = ( 295 packets[:-1]["time"] + 296 packets[:-1]["number_of_samples"].astype(np.int64) * 297 delta_nanoseconds) 298 # check if next starttime matches seamless to last chunk 299 # 1e-3 seconds == 1e6 nanoseconds is the smallest time 300 # difference reftek130 format can represent, so anything larger 301 # or equal means a gap/overlap. 302 time_diffs_milliseconds_abs = np.abs( 303 packets[1:]["time"] - endtimes) / 1000000 304 gaps = time_diffs_milliseconds_abs >= 1 305 if np.any(gaps): 306 gap_split_indices = np.nonzero(gaps)[0] + 1 307 contiguous = np.array_split(packets, gap_split_indices) 308 else: 309 contiguous = [packets] 310 311 for packets_ in contiguous: 312 starttime = packets_[0]['time'] 313 314 if headonly: 315 sample_data = np.array([], dtype=np.int32) 316 npts = packets_["number_of_samples"].sum() 317 else: 318 if encoding in ('C0', 'C2'): 319 sample_data = _unpack_C0_C2_data(packets_, 320 encoding) 321 elif encoding in ('16', '32'): 322 dtype = {'16': np.int16, '32': np.int32}[encoding] 323 # just fix endianness and use correct dtype 324 sample_data = np.require( 325 packets_['payload'], 326 requirements=['C_CONTIGUOUS']) 327 # either int16 or int32 328 sample_data = sample_data.flatten().view(dtype) 329 # switch endianness, rt130 stores in big endian 330 sample_data = sample_data.byteswap() 331 npts = len(sample_data) 332 333 tr = Trace(data=sample_data, header=copy.deepcopy(header)) 334 # channel number is not included in the EH/ET packet 335 # payload, so add it to stats as well.. 336 tr.stats.reftek130['channel_number'] = channel_number 337 if headonly: 338 tr.stats.npts = npts 339 tr.stats.starttime = UTCDateTime(ns=starttime) 340 # if component codes were explicitly provided, use them 341 # together with the stream label 342 if component_codes is not None: 343 tr.stats.channel = (eh.stream_name.strip() + 344 component_codes[channel_number]) 345 # otherwise check if channel code is set for the given 346 # channel (seems to be not the case usually) 347 elif eh.channel_code[channel_number] is not None: 348 tr.stats.channel = eh.channel_code[channel_number] 349 # otherwise fall back to using the stream label together 350 # with the number of the channel in the file (starting with 351 # 0, as Z-1-2 is common use for data streams not oriented 352 # against North) 353 else: 354 msg = ("No channel code specified in the data file " 355 "and no component codes specified. Using " 356 "stream label and number of channel in file as " 357 "channel codes.") 358 warnings.warn(msg) 359 tr.stats.channel = ( 360 eh.stream_name.strip() + str(channel_number)) 361 # check if endtime of trace is consistent 362 t_last = packets_[-1]['time'] 363 npts_last = packets_[-1]['number_of_samples'] 364 try: 365 if not headonly: 366 assert npts == len(sample_data) 367 if npts_last: 368 assert tr.stats.endtime == UTCDateTime( 369 ns=t_last) + (npts_last - 1) * delta 370 if npts: 371 assert tr.stats.endtime == ( 372 tr.stats.starttime + (npts - 1) * delta) 373 except AssertionError: 374 msg = ("Reftek file has a trace with an inconsistent " 375 "endtime or number of samples. Please open an " 376 "issue on GitHub and provide your file for" 377 "testing.") 378 raise Reftek130Exception(msg) 379 st += tr 380 381 return st 382 383 384if __name__ == '__main__': 385 import doctest 386 doctest.testmod(exclude_empty=True) 387