1# -*- coding: utf-8 -*- 2""" 3MiniSEED specific utilities. 4""" 5from __future__ import (absolute_import, division, print_function, 6 unicode_literals) 7from future.builtins import * # NOQA 8from future.utils import native_str 9 10import collections 11import ctypes as C # NOQA 12import os 13import sys 14import warnings 15from datetime import datetime 16from struct import pack, unpack 17 18import numpy as np 19 20from obspy import UTCDateTime 21from obspy.core.compatibility import from_buffer, collections_abc 22from obspy.core.util.decorator import ObsPyDeprecationWarning 23from . import InternalMSEEDParseTimeError 24from .headers import (ENCODINGS, ENDIAN, FIXED_HEADER_ACTIVITY_FLAGS, 25 FIXED_HEADER_DATA_QUAL_FLAGS, 26 FIXED_HEADER_IO_CLOCK_FLAGS, HPTMODULUS, 27 SAMPLESIZES, UNSUPPORTED_ENCODINGS, MSRecord, 28 MS_NOERROR, clibmseed) 29 30 31def get_start_and_end_time(file_or_file_object): 32 """ 33 Returns the start and end time of a MiniSEED file or file-like object. 34 35 :type file_or_file_object: str or file 36 :param file_or_file_object: MiniSEED file name or open file-like object 37 containing a MiniSEED record. 38 :return: tuple (start time of first record, end time of last record) 39 40 This method will return the start time of the first record and the end time 41 of the last record. Keep in mind that it will not return the correct result 42 if the records in the MiniSEED file do not have a chronological ordering. 43 44 The returned end time is the time of the last data sample and not the 45 time that the last sample covers. 46 47 .. rubric:: Example 48 49 >>> from obspy.core.util import get_example_file 50 >>> filename = get_example_file( 51 ... "BW.BGLD.__.EHE.D.2008.001.first_10_records") 52 >>> get_start_and_end_time(filename) # doctest: +NORMALIZE_WHITESPACE 53 (UTCDateTime(2007, 12, 31, 23, 59, 59, 915000), 54 UTCDateTime(2008, 1, 1, 0, 0, 20, 510000)) 55 56 It also works with an open file pointer. The file pointer itself will not 57 be changed. 58 59 >>> f = open(filename, 'rb') 60 >>> get_start_and_end_time(f) # doctest: +NORMALIZE_WHITESPACE 61 (UTCDateTime(2007, 12, 31, 23, 59, 59, 915000), 62 UTCDateTime(2008, 1, 1, 0, 0, 20, 510000)) 63 64 And also with a MiniSEED file stored in a BytesIO 65 66 >>> import io 67 >>> file_object = io.BytesIO(f.read()) 68 >>> get_start_and_end_time(file_object) # doctest: +NORMALIZE_WHITESPACE 69 (UTCDateTime(2007, 12, 31, 23, 59, 59, 915000), 70 UTCDateTime(2008, 1, 1, 0, 0, 20, 510000)) 71 >>> file_object.close() 72 73 If the file pointer does not point to the first record, the start time will 74 refer to the record it points to. 75 76 >>> _ = f.seek(512) 77 >>> get_start_and_end_time(f) # doctest: +NORMALIZE_WHITESPACE 78 (UTCDateTime(2008, 1, 1, 0, 0, 1, 975000), 79 UTCDateTime(2008, 1, 1, 0, 0, 20, 510000)) 80 81 The same is valid for a file-like object. 82 83 >>> file_object = io.BytesIO(f.read()) 84 >>> get_start_and_end_time(file_object) # doctest: +NORMALIZE_WHITESPACE 85 (UTCDateTime(2008, 1, 1, 0, 0, 1, 975000), 86 UTCDateTime(2008, 1, 1, 0, 0, 20, 510000)) 87 >>> f.close() 88 """ 89 # Get the starttime of the first record. 90 info = get_record_information(file_or_file_object) 91 starttime = info['starttime'] 92 # Get the end time of the last record. 93 info = get_record_information( 94 file_or_file_object, 95 (info['number_of_records'] - 1) * info['record_length']) 96 endtime = info['endtime'] 97 return starttime, endtime 98 99 100def get_flags(files, starttime=None, endtime=None, 101 io_flags=True, activity_flags=True, 102 data_quality_flags=True, timing_quality=True): 103 """ 104 Counts all data quality, I/O, and activity flags of the given MiniSEED 105 file and returns statistics about the timing quality if applicable. 106 107 :param files: MiniSEED file or list of MiniSEED files. 108 :type files: list, str, file-like object 109 :param starttime: Only use records whose end time is larger than this 110 given time. 111 :type starttime: str or :class:`obspy.core.utcdatetime.UTCDateTime` 112 :param endtime: Only use records whose start time is smaller than this 113 given time. 114 :type endtime: str or :class:`obspy.core.utcdatetime.UTCDateTime` 115 :param io_flags: Extract I/O flag counts. 116 :type io_flags: bool 117 :param activity_flags: Extract activity flag counts. 118 :type activity_flags: bool 119 :param data_quality_flags: Extract data quality flag counts. 120 :type data_quality_flags: bool 121 :param timing_quality: Extract timing quality and corresponding statistics. 122 :type timing_quality: bool 123 124 :return: Dictionary with information about the timing quality and the data 125 quality, I/O, and activity flags. It has the following keys: 126 ``"number_of_records_used"``, ``"record_count"``, 127 ``"timing_correction"``, ``"timing_correction_count"``, 128 ``"data_quality_flags_counts"``, ``"activity_flags_counts"``, 129 ``"io_and_clock_flags_counts"``, ``"data_quality_flags_percentages"``, 130 ``"activity_flags_percentages"``, ``"io_and_clock_flags_percentages"``, 131 and ``"timing_quality"``. 132 133 .. rubric:: Flags 134 135 This method will count all set bit flags in the fixed header of a MiniSEED 136 file and return the total count for each flag type. The following flags 137 are extracted: 138 139 **Data quality flags:** 140 141 ======== ================================================= 142 Bit Description 143 ======== ================================================= 144 [Bit 0] Amplifier saturation detected (station dependent) 145 [Bit 1] Digitizer clipping detected 146 [Bit 2] Spikes detected 147 [Bit 3] Glitches detected 148 [Bit 4] Missing/padded data present 149 [Bit 5] Telemetry synchronization error 150 [Bit 6] A digital filter may be charging 151 [Bit 7] Time tag is questionable 152 ======== ================================================= 153 154 **Activity flags:** 155 156 ======== ================================================= 157 Bit Description 158 ======== ================================================= 159 [Bit 0] Calibration signals present 160 [Bit 1] Time correction applied 161 [Bit 2] Beginning of an event, station trigger 162 [Bit 3] End of the event, station detriggers 163 [Bit 4] A positive leap second happened during this record 164 [Bit 5] A negative leap second happened during this record 165 [Bit 6] Event in progress 166 ======== ================================================= 167 168 **I/O and clock flags:** 169 170 ======== ================================================= 171 Bit Description 172 ======== ================================================= 173 [Bit 0] Station volume parity error possibly present 174 [Bit 1] Long record read (possibly no problem) 175 [Bit 2] Short record read (record padded) 176 [Bit 3] Start of time series 177 [Bit 4] End of time series 178 [Bit 5] Clock locked 179 ======== ================================================= 180 181 .. rubric:: Timing quality 182 183 If the file has a Blockette 1001 statistics about the timing quality will 184 be returned if ``timing_quality`` is True. See the doctests for more 185 information. 186 187 This method will read the timing quality in Blockette 1001 for each 188 record in the file if available and return the following statistics: 189 Minima, maxima, average, median and upper and lower quartiles. 190 191 .. rubric:: Examples 192 193 >>> from obspy.core.util import get_example_file 194 >>> filename = get_example_file("qualityflags.mseed") 195 >>> flags = get_flags(filename) 196 >>> for k, v in flags["data_quality_flags_counts"].items(): 197 ... print(k, v) 198 amplifier_saturation 9 199 digitizer_clipping 8 200 spikes 7 201 glitches 6 202 missing_padded_data 5 203 telemetry_sync_error 4 204 digital_filter_charging 3 205 suspect_time_tag 2 206 207 Reading a file with Blockette 1001 will return timing quality statistics if 208 requested. 209 210 >>> filename = get_example_file("timingquality.mseed") 211 >>> flags = get_flags(filename) 212 >>> for k, v in sorted(flags["timing_quality"].items()): 213 ... print(k, v) # doctest: +ELLIPSIS +NORMALIZE_WHITESPACE 214 all_values [...] 215 lower_quartile 25.0 216 max 100.0 217 mean 50.0 218 median 50.0 219 min 0.0 220 upper_quartile 75.0 221 """ 222 # Splat input files to array 223 if not isinstance(files, list): 224 files = [files] 225 226 starttime = float(UTCDateTime(starttime)) if starttime else None 227 endtime = float(UTCDateTime(endtime)) if endtime else None 228 229 records = [] 230 231 # Use clibmseed to get header parameters for all files 232 for file in files: 233 234 # If it's a file name just read it. 235 if isinstance(file, (str, native_str)): 236 # Read to NumPy array which is used as a buffer. 237 bfr_np = np.fromfile(file, dtype=np.int8) 238 elif hasattr(file, 'read'): 239 bfr_np = from_buffer(file.read(), dtype=np.int8) 240 241 offset = 0 242 243 msr = clibmseed.msr_init(C.POINTER(MSRecord)()) 244 245 while(True): 246 # Read up to max record length. 247 record = bfr_np[offset: offset + 8192] 248 if len(record) < 48: 249 break 250 251 retcode = clibmseed.msr_parse(record, len(record), C.pointer(msr), 252 -1, 0, 0) 253 254 if retcode != MS_NOERROR: 255 break 256 257 offset += msr.contents.reclen 258 259 # Check starttime and endtime of the record 260 # Read the sampling rate from the header (is this unsafe?) 261 # Tried using msr.msr_nomsamprate(msr) but the return is strange 262 r_start = clibmseed.msr_starttime(msr) / HPTMODULUS 263 r_delta = (1 / msr.contents.samprate) 264 r_end = (clibmseed.msr_endtime(msr) / HPTMODULUS) + r_delta 265 266 # Cut off records to start & endtime 267 if starttime is not None: 268 if r_end <= starttime: 269 continue 270 if r_start < starttime: 271 r_start = starttime 272 273 if endtime is not None: 274 if r_start >= endtime: 275 continue 276 if r_end > endtime: 277 r_end = endtime 278 279 # Get the timing quality in blockette1001 if it exists 280 if msr.contents.Blkt1001: 281 r_tq = msr.contents.Blkt1001.contents.timing_qual 282 else: 283 r_tq = None 284 285 # Collect all records and parameters in the range 286 records.append({ 287 'start': r_start, 288 'delta': r_delta, 289 'end': r_end, 290 'tq': r_tq, 291 'tc': msr.contents.fsdh.contents.time_correct, 292 'io': msr.contents.fsdh.contents.io_flags, 293 'dq': msr.contents.fsdh.contents.dq_flags, 294 'ac': msr.contents.fsdh.contents.act_flags 295 }) 296 297 # Free memory to be ready for next file. 298 clibmseed.msr_free(C.pointer(msr)) 299 300 # It should be faster to sort by record endtime 301 # if we reverse the array first 302 records.reverse() 303 records.sort(key=lambda x: x["end"], reverse=True) 304 305 # Create collections for the flags 306 dq_flags_counts = collections.OrderedDict([ 307 ("amplifier_saturation", 0), 308 ("digitizer_clipping", 0), 309 ("spikes", 0), 310 ("glitches", 0), 311 ("missing_padded_data", 0), 312 ("telemetry_sync_error", 0), 313 ("digital_filter_charging", 0), 314 ("suspect_time_tag", 0) 315 ]) 316 dq_flags_seconds = collections.OrderedDict([ 317 ("amplifier_saturation", 0), 318 ("digitizer_clipping", 0), 319 ("spikes", 0), 320 ("glitches", 0), 321 ("missing_padded_data", 0), 322 ("telemetry_sync_error", 0), 323 ("digital_filter_charging", 0), 324 ("suspect_time_tag", 0) 325 ]) 326 327 io_flags_counts = collections.OrderedDict([ 328 ("station_volume", 0), 329 ("long_record_read", 0), 330 ("short_record_read", 0), 331 ("start_time_series", 0), 332 ("end_time_series", 0), 333 ("clock_locked", 0) 334 ]) 335 io_flags_seconds = collections.OrderedDict([ 336 ("station_volume", 0), 337 ("long_record_read", 0), 338 ("short_record_read", 0), 339 ("start_time_series", 0), 340 ("end_time_series", 0), 341 ("clock_locked", 0) 342 ]) 343 344 ac_flags_counts = collections.OrderedDict([ 345 ("calibration_signal", 0), 346 ("time_correction_applied", 0), 347 ("event_begin", 0), 348 ("event_end", 0), 349 ("positive_leap", 0), 350 ("negative_leap", 0), 351 ("event_in_progress", 0) 352 ]) 353 ac_flags_seconds = collections.OrderedDict([ 354 ("calibration_signal", 0), 355 ("time_correction_applied", 0), 356 ("event_begin", 0), 357 ("event_end", 0), 358 ("positive_leap", 0), 359 ("negative_leap", 0), 360 ("event_in_progress", 0) 361 ]) 362 363 coverage = None 364 used_record_count = 0 365 timing_correction = 0.0 366 timing_correction_count = 0 367 tq = [] 368 369 # Go over all sorted records from back to front 370 for record in records: 371 372 # For counts we do not care about overlaps 373 # simply count contribution from all the records 374 if io_flags: 375 for _i, key in enumerate(io_flags_seconds.keys()): 376 if (record["io"] & (1 << _i)) != 0: 377 io_flags_counts[key] += 1 378 379 if activity_flags: 380 for _i, key in enumerate(ac_flags_seconds.keys()): 381 if (record["ac"] & (1 << _i)) != 0: 382 ac_flags_counts[key] += 1 383 384 if data_quality_flags: 385 for _i, key in enumerate(dq_flags_seconds.keys()): 386 if (record["dq"] & (1 << _i)) != 0: 387 dq_flags_counts[key] += 1 388 389 # Coverage is the timewindow that is covered by the records 390 # so bits in overlapping records are not counted 391 # The first record starts a clean window 392 if coverage is None: 393 coverage = [record["start"], record["end"]] 394 else: 395 396 # Account for time tolerance 397 time_tolerance = 0.5 * record['delta'] 398 tolerated_end = coverage[0] - time_tolerance 399 400 # Start is beyond coverage, skip the overlapping record 401 if record["start"] >= coverage[0]: 402 continue 403 404 # Fix end to the start of the coverage if it is overlaps 405 # with the coverage window. Or if it is within the allowed 406 # time tolerance 407 if record["end"] > coverage[0] or record["end"] > tolerated_end: 408 record["end"] = coverage[0] 409 410 # Extend the coverage 411 if record["start"] < coverage[0]: 412 coverage[0] = record["start"] 413 414 # Get the record length in seconds 415 record_length_seconds = (record["end"] - record["start"]) 416 417 # Skip if the record length is 0 (or negative) 418 if record_length_seconds <= 0.0: 419 continue 420 421 # Overlapping records do not count ot the used_records 422 # used records tracks the amount of timing quality 423 # parameters we expect 424 used_record_count += 1 425 426 # Bitwise AND to count flags and store in orderedDicts 427 if io_flags: 428 for _i, key in enumerate(io_flags_seconds.keys()): 429 if (record["io"] & (1 << _i)) != 0: 430 io_flags_seconds[key] += record_length_seconds 431 432 if activity_flags: 433 for _i, key in enumerate(ac_flags_seconds.keys()): 434 if (record["ac"] & (1 << _i)) != 0: 435 ac_flags_seconds[key] += record_length_seconds 436 437 if data_quality_flags: 438 for _i, key in enumerate(dq_flags_seconds.keys()): 439 if (record["dq"] & (1 << _i)) != 0: 440 dq_flags_seconds[key] += record_length_seconds 441 442 # Get the timing quality parameter and append to array 443 if timing_quality and record["tq"] is not None: 444 tq.append(float(record["tq"])) 445 446 # Check if a timing correction is specified 447 # (not whether it has been applied) 448 if record["tc"] != 0: 449 timing_correction += record_length_seconds 450 timing_correction_count += 1 451 452 # Get the total time analyzed 453 if endtime is not None and starttime is not None: 454 total_time_seconds = endtime - starttime 455 # If zero records agree with the selections, zero seconds have been 456 # analysed. 457 elif coverage is None: 458 total_time_seconds = 0 459 else: 460 total_time_seconds = coverage[1] - coverage[0] 461 462 # Percentage of time of bit flags set 463 if total_time_seconds: 464 if io_flags: 465 for _i, key in enumerate(io_flags_seconds.keys()): 466 io_flags_seconds[key] /= total_time_seconds * 1e-2 467 if data_quality_flags: 468 for _i, key in enumerate(dq_flags_seconds.keys()): 469 dq_flags_seconds[key] /= total_time_seconds * 1e-2 470 if activity_flags: 471 for _i, key in enumerate(ac_flags_seconds.keys()): 472 ac_flags_seconds[key] /= total_time_seconds * 1e-2 473 474 timing_correction /= total_time_seconds * 1e-2 475 476 # Add the timing quality if it is set for all used records 477 if timing_quality: 478 if len(tq) == used_record_count: 479 tq = np.array(tq, dtype=np.float64) 480 tq = { 481 "all_values": tq, 482 "min": tq.min(), 483 "max": tq.max(), 484 "mean": tq.mean(), 485 "median": np.median(tq), 486 "lower_quartile": np.percentile(tq, 25), 487 "upper_quartile": np.percentile(tq, 75) 488 } 489 490 else: 491 tq = {} 492 493 return { 494 'timing_correction': timing_correction, 495 'timing_correction_count': timing_correction_count, 496 'io_and_clock_flags_percentages': io_flags_seconds, 497 'io_and_clock_flags_counts': io_flags_counts, 498 'data_quality_flags_percentages': dq_flags_seconds, 499 'data_quality_flags_counts': dq_flags_counts, 500 'activity_flags_percentages': ac_flags_seconds, 501 'activity_flags_counts': ac_flags_counts, 502 'timing_quality': tq, 503 'record_count': len(records), 504 'number_of_records_used': used_record_count, 505 } 506 507 508def get_record_information(file_or_file_object, offset=0, endian=None): 509 """ 510 Returns record information about given files and file-like object. 511 512 :param endian: If given, the byte order will be enforced. Can be either "<" 513 or ">". If None, it will be determined automatically. 514 Defaults to None. 515 516 .. rubric:: Example 517 518 >>> from obspy.core.util import get_example_file 519 >>> filename = get_example_file("test.mseed") 520 >>> ri = get_record_information(filename) 521 >>> for k, v in sorted(ri.items()): 522 ... print(k, v) 523 activity_flags 0 524 byteorder > 525 channel BHZ 526 data_quality_flags 0 527 encoding 11 528 endtime 2003-05-29T02:15:51.518400Z 529 excess_bytes 0 530 filesize 8192 531 io_and_clock_flags 0 532 location 00 533 network NL 534 npts 5980 535 number_of_records 2 536 record_length 4096 537 samp_rate 40.0 538 starttime 2003-05-29T02:13:22.043400Z 539 station HGN 540 time_correction 0 541 """ 542 if isinstance(file_or_file_object, (str, native_str)): 543 with open(file_or_file_object, 'rb') as f: 544 info = _get_record_information(f, offset=offset, endian=endian) 545 else: 546 info = _get_record_information(file_or_file_object, offset=offset, 547 endian=endian) 548 return info 549 550 551def _decode_header_field(name, content): 552 """ 553 Helper function to decode header fields. Fairly fault tolerant and it 554 will also raise nice warnings in case in encounters anything wild. 555 """ 556 try: 557 return content.decode("ascii", errors="strict") 558 except UnicodeError: 559 r = content.decode("ascii", errors="ignore") 560 msg = (u"Failed to decode {name} code as ASCII. " 561 u"Code in file: '{result}' (\ufffd indicates characters " 562 u"that could not be decoded). " 563 u"Will be interpreted as: '{f_result}'. " 564 u"This is an invalid MiniSEED file - please " 565 u"contact your data provider.") 566 warnings.warn(msg.format( 567 name=name, 568 result=content.decode("ascii", errors="replace"), 569 f_result=r)) 570 return r 571 572 573def _get_record_information(file_object, offset=0, endian=None): 574 """ 575 Searches the first MiniSEED record stored in file_object at the current 576 position and returns some information about it. 577 578 If offset is given, the MiniSEED record is assumed to start at current 579 position + offset in file_object. 580 581 :param endian: If given, the byte order will be enforced. Can be either "<" 582 or ">". If None, it will be determined automatically. 583 Defaults to None. 584 """ 585 initial_position = file_object.tell() 586 record_start = initial_position 587 samp_rate = None 588 589 info = {} 590 591 # Apply the offset. 592 if offset: 593 file_object.seek(offset, 1) 594 record_start += offset 595 596 # Get the size of the buffer. 597 file_object.seek(0, 2) 598 info['filesize'] = int(file_object.tell() - record_start) 599 file_object.seek(record_start, 0) 600 601 _code = file_object.read(8)[6:7] 602 # Reset the offset if starting somewhere in the middle of the file. 603 if info['filesize'] % 128 != 0: 604 # if a multiple of minimal record length 256 605 record_start = 0 606 elif _code not in [b'D', b'R', b'Q', b'M', b' ']: 607 # if valid data record start at all starting with D, R, Q or M 608 record_start = 0 609 # Might be a noise record or completely empty. 610 elif _code == b' ': 611 try: 612 _t = file_object.read(120).decode().strip() 613 except Exception: 614 raise ValueError("Invalid MiniSEED file.") 615 if not _t: 616 info = _get_record_information(file_object=file_object, 617 endian=endian) 618 file_object.seek(initial_position, 0) 619 return info 620 else: 621 raise ValueError("Invalid MiniSEED file.") 622 file_object.seek(record_start, 0) 623 624 # check if full SEED or MiniSEED 625 if file_object.read(8)[6:7] == b'V': 626 # found a full SEED record - seek first MiniSEED record 627 # search blockette 005, 008 or 010 which contain the record length 628 blockette_id = file_object.read(3) 629 while blockette_id not in [b'010', b'008', b'005']: 630 if not blockette_id.startswith(b'0'): 631 msg = "SEED Volume Index Control Headers: blockette 0xx" + \ 632 " expected, got %s" 633 raise Exception(msg % blockette_id) 634 # get length and jump to end of current blockette 635 blockette_len = int(file_object.read(4)) 636 file_object.seek(blockette_len - 7, 1) 637 # read next blockette id 638 blockette_id = file_object.read(3) 639 # Skip the next bytes containing length of the blockette and version 640 file_object.seek(8, 1) 641 # get record length 642 rec_len = pow(2, int(file_object.read(2))) 643 # reset file pointer 644 file_object.seek(record_start, 0) 645 # cycle through file using record length until first data record found 646 while True: 647 data = file_object.read(7)[6:7] 648 if data in [b'D', b'R', b'Q', b'M']: 649 break 650 # stop looking when hitting end of file 651 # the second check should be enough.. but play it safe 652 if not data or record_start > info['filesize']: 653 msg = "No MiniSEED data record found in file." 654 raise ValueError(msg) 655 record_start += rec_len 656 file_object.seek(record_start, 0) 657 658 # Jump to the network, station, location and channel codes. 659 file_object.seek(record_start + 8, 0) 660 data = file_object.read(12) 661 662 info["station"] = _decode_header_field("station", data[:5].strip()) 663 info["location"] = _decode_header_field("location", data[5:7].strip()) 664 info["channel"] = _decode_header_field("channel", data[7:10].strip()) 665 info["network"] = _decode_header_field("network", data[10:12].strip()) 666 667 # Use the date to figure out the byte order. 668 file_object.seek(record_start + 20, 0) 669 # Capital letters indicate unsigned quantities. 670 data = file_object.read(28) 671 672 def fmt(s): 673 return native_str('%sHHBBBxHHhhBBBxlxxH' % s) 674 675 def _parse_time(values): 676 if not (1 <= values[1] <= 366): 677 msg = 'julday out of bounds (wrong endian?): {!s}'.format( 678 values[1]) 679 raise InternalMSEEDParseTimeError(msg) 680 # The spec says values[5] (.0001 seconds) must be between 0-9999 but 681 # we've encountered files which have a value of 10000. We interpret 682 # this as an additional second. The approach here is general enough 683 # to work for any value of values[5]. 684 msec = values[5] * 100 685 offset = msec // 1000000 686 if offset: 687 warnings.warn( 688 "Record contains a fractional seconds (.0001 secs) of %i - " 689 "the maximum strictly allowed value is 9999. It will be " 690 "interpreted as one or more additional seconds." % values[5], 691 category=UserWarning) 692 try: 693 t = UTCDateTime( 694 year=values[0], julday=values[1], 695 hour=values[2], minute=values[3], second=values[4], 696 microsecond=msec % 1000000) + offset 697 except TypeError: 698 msg = 'Problem decoding time (wrong endian?)' 699 raise InternalMSEEDParseTimeError(msg) 700 return t 701 702 if endian is None: 703 try: 704 endian = ">" 705 values = unpack(fmt(endian), data) 706 starttime = _parse_time(values) 707 except InternalMSEEDParseTimeError: 708 endian = "<" 709 values = unpack(fmt(endian), data) 710 starttime = _parse_time(values) 711 else: 712 values = unpack(fmt(endian), data) 713 try: 714 starttime = _parse_time(values) 715 except InternalMSEEDParseTimeError: 716 msg = ("Invalid starttime found. The passed byte order is likely " 717 "wrong.") 718 raise ValueError(msg) 719 npts = values[6] 720 info['npts'] = npts 721 samp_rate_factor = values[7] 722 samp_rate_mult = values[8] 723 info['activity_flags'] = values[9] 724 # Bit 1 of the activity flags. 725 time_correction_applied = bool(info['activity_flags'] & 2) 726 info['io_and_clock_flags'] = values[10] 727 info['data_quality_flags'] = values[11] 728 info['time_correction'] = values[12] 729 time_correction = values[12] 730 blkt_offset = values[13] 731 732 # Correct the starttime if applicable. 733 if (time_correction_applied is False) and time_correction: 734 # Time correction is in units of 0.0001 seconds. 735 starttime += time_correction * 0.0001 736 737 # Traverse the blockettes and parse Blockettes 100, 500, 1000 and/or 1001 738 # if any of those is found. 739 while blkt_offset: 740 file_object.seek(record_start + blkt_offset, 0) 741 blkt_type, next_blkt = unpack(native_str('%sHH' % endian), 742 file_object.read(4)) 743 if next_blkt != 0 and (next_blkt < 4 or next_blkt - 4 <= blkt_offset): 744 msg = ('Invalid blockette offset (%d) less than or equal to ' 745 'current offset (%d)') % (next_blkt, blkt_offset) 746 raise ValueError(msg) 747 blkt_offset = next_blkt 748 749 # Parse in order of likeliness. 750 if blkt_type == 1000: 751 encoding, word_order, record_length = \ 752 unpack(native_str('%sBBB' % endian), 753 file_object.read(3)) 754 if word_order not in ENDIAN: 755 msg = ('Invalid word order "%s" in blockette 1000 for ' 756 'record with ID %s.%s.%s.%s at offset %i.') % ( 757 str(word_order), info["network"], info["station"], 758 info["location"], info["channel"], offset) 759 warnings.warn(msg, UserWarning) 760 elif ENDIAN[word_order] != endian: 761 msg = 'Inconsistent word order.' 762 warnings.warn(msg, UserWarning) 763 info['encoding'] = encoding 764 info['record_length'] = 2 ** record_length 765 elif blkt_type == 1001: 766 info['timing_quality'], mu_sec = \ 767 unpack(native_str('%sBb' % endian), 768 file_object.read(2)) 769 starttime += float(mu_sec) / 1E6 770 elif blkt_type == 500: 771 file_object.seek(14, 1) 772 mu_sec = unpack(native_str('%sb' % endian), 773 file_object.read(1))[0] 774 starttime += float(mu_sec) / 1E6 775 elif blkt_type == 100: 776 samp_rate = unpack(native_str('%sf' % endian), 777 file_object.read(4))[0] 778 779 # No blockette 1000 found. 780 if "record_length" not in info: 781 file_object.seek(record_start, 0) 782 # Read 16 kb - should be a safe maximal record length. 783 buf = from_buffer(file_object.read(2 ** 14), dtype=np.int8) 784 # This is a messy check - we just delegate to libmseed. 785 reclen = clibmseed.ms_detect(buf, len(buf)) 786 if reclen < 0: 787 raise ValueError("Could not detect data record.") 788 elif reclen == 0: 789 # It might be at the end of the file. 790 if len(buf) in [2 ** _i for _i in range(7, 256)]: 791 reclen = len(buf) 792 else: 793 raise ValueError("Could not determine record length.") 794 info["record_length"] = reclen 795 796 # If samprate not set via blockette 100 calculate the sample rate according 797 # to the SEED manual. 798 if not samp_rate: 799 if samp_rate_factor > 0 and samp_rate_mult > 0: 800 samp_rate = float(samp_rate_factor * samp_rate_mult) 801 elif samp_rate_factor > 0 and samp_rate_mult < 0: 802 samp_rate = -1.0 * float(samp_rate_factor) / float(samp_rate_mult) 803 elif samp_rate_factor < 0 and samp_rate_mult > 0: 804 samp_rate = -1.0 * float(samp_rate_mult) / float(samp_rate_factor) 805 elif samp_rate_factor < 0 and samp_rate_mult < 0: 806 samp_rate = 1.0 / float(samp_rate_factor * samp_rate_mult) 807 else: 808 samp_rate = 0 809 810 info['samp_rate'] = samp_rate 811 812 info['starttime'] = starttime 813 # If sample rate is zero set endtime to startime 814 if samp_rate == 0: 815 info['endtime'] = starttime 816 # Endtime is the time of the last sample. 817 else: 818 info['endtime'] = starttime + (npts - 1) / samp_rate 819 info['byteorder'] = endian 820 821 info['number_of_records'] = int(info['filesize'] // 822 info['record_length']) 823 info['excess_bytes'] = int(info['filesize'] % info['record_length']) 824 825 # Reset file pointer. 826 file_object.seek(initial_position, 0) 827 return info 828 829 830def _ctypes_array_2_numpy_array(buffer_, buffer_elements, sampletype): 831 """ 832 Takes a Ctypes array and its length and type and returns it as a 833 NumPy array. 834 835 :param buffer_: Ctypes c_void_p pointer to buffer. 836 :param buffer_elements: length of the whole buffer 837 :param sampletype: type of sample, on of "a", "i", "f", "d" 838 """ 839 # Allocate NumPy array to move memory to 840 numpy_array = np.empty(buffer_elements, dtype=sampletype) 841 datptr = numpy_array.ctypes.get_data() 842 # Manually copy the contents of the C allocated memory area to 843 # the address of the previously created NumPy array 844 C.memmove(datptr, buffer_, buffer_elements * SAMPLESIZES[sampletype]) 845 return numpy_array 846 847 848def _convert_msr_to_dict(m): 849 """ 850 Internal method used for setting header attributes. 851 """ 852 h = {} 853 attributes = ('network', 'station', 'location', 'channel', 854 'dataquality', 'starttime', 'samprate', 855 'samplecnt', 'numsamples', 'sampletype') 856 # loop over attributes 857 for _i in attributes: 858 h[_i] = getattr(m, _i) 859 return h 860 861 862def _convert_datetime_to_mstime(dt): 863 """ 864 Takes a obspy.util.UTCDateTime object and returns an epoch time in ms. 865 866 :param dt: obspy.util.UTCDateTime object. 867 """ 868 rest = (dt._ns % 10**3) >= 500 and 1 or 0 869 return dt._ns // 10**3 + rest 870 871 872def _convert_mstime_to_datetime(timestring): 873 """ 874 Takes a MiniSEED timestamp and returns a obspy.util.UTCDateTime object. 875 876 :param timestamp: MiniSEED timestring (Epoch time string in ms). 877 """ 878 return UTCDateTime(ns=int(round(timestring * 10**3))) 879 880 881def _unpack_steim_1(data, npts, swapflag=0, verbose=0): 882 """ 883 Unpack steim1 compressed data given as numpy array. 884 885 :type data: :class:`numpy.ndarray` 886 :param data: steim compressed data as a numpy array 887 :param npts: number of data points 888 :param swapflag: Swap bytes, defaults to 0 889 :return: Return data as numpy.ndarray of dtype int32 890 """ 891 datasize = len(data) 892 samplecnt = npts 893 datasamples = np.empty(npts, dtype=np.int32) 894 895 nsamples = clibmseed.msr_decode_steim1( 896 data.ctypes.data, 897 datasize, samplecnt, datasamples, 898 npts, None, swapflag) 899 if nsamples != npts: 900 raise Exception("Error in unpack_steim1") 901 return datasamples 902 903 904def _unpack_steim_2(data, npts, swapflag=0, verbose=0): 905 """ 906 Unpack steim2 compressed data given as numpy array. 907 908 :type data: :class:`numpy.ndarray` 909 :param data: steim compressed data as a numpy array 910 :param npts: number of data points 911 :param swapflag: Swap bytes, defaults to 0 912 :return: Return data as numpy.ndarray of dtype int32 913 """ 914 datasize = len(data) 915 samplecnt = npts 916 datasamples = np.empty(npts, dtype=np.int32) 917 918 nsamples = clibmseed.msr_decode_steim2( 919 data.ctypes.data, 920 datasize, samplecnt, datasamples, 921 npts, None, swapflag) 922 if nsamples != npts: 923 raise Exception("Error in unpack_steim2") 924 return datasamples 925 926 927def set_flags_in_fixed_headers(filename, flags): 928 """ 929 Updates a given MiniSEED file with some fixed header flags. 930 931 :type filename: string 932 :param filename: Name of the MiniSEED file to be changed 933 :type flags: dict 934 :param flags: The flags to update in the MiniSEED file 935 936 Flags are stored as a nested dictionary:: 937 938 { trace_id: 939 { flag_group: 940 { flag_name: flag_value, 941 ... 942 }, 943 ... 944 }, 945 ... 946 } 947 948 with: 949 950 * ``trace_id`` 951 A string identifying the trace. A string looking like 952 ``NETWORK.STATION.LOCATION.CHANNEL`` is expected, the values will 953 be compared to those found in the fixed header of every record. An 954 empty field will be interpreted as "every possible value", so 955 ``"..."`` will apply to every single trace in the file. Padding 956 spaces are ignored. 957 * ``flag_group`` 958 Which flag group is to be changed. One of ``'activity_flags'``, 959 ``'io_clock_flags'``, ``'data_qual_flags'`` is expected. Invalid 960 flag groups raise a ``ValueError``. 961 * ``flag_name`` 962 The name of the flag. Possible values are matched with 963 ``obspy.io.mseed.headers.FIXED_HEADER_ACTIVITY_FLAGS``, 964 ``FIXED_HEADER_IO_CLOCK_FLAGS`` or ``FIXED_HEADER_DATA_QUAL_FLAGS`` 965 depending on the flag_group. Invalid flags raise a ``ValueError``. 966 * ``flag_value`` 967 The value you want for this flag. Expected value is a bool (always 968 ``True``/``False``) or a dict to store the moments and durations 969 when this flag is ``True``. Expected syntax for this dict is 970 accurately described in ``obspy.io.mseed.util._check_flag_value``. 971 972 :raises IOError: if the file is not a MiniSEED file 973 :raises ValueError: if one of the flag group, flag name or flag value is 974 incorrect 975 976 Example: to add a *Calibration Signals Present* flag (which belongs to the 977 Activity Flags section of the fixed header) to every record, flags should 978 be:: 979 980 { "..." : { "activity_flags" : { "calib_signal" : True }}} 981 982 Example: to add a *Event in Progress* flag (which belongs to the 983 Activity Flags section of the fixed header) from 2009/12/23 06:00:00 to 984 2009/12/23 06:30:00, from 2009/12/24 10:00:00 to 2009/12/24 10:30:00 and 985 at precise times 2009/12/26 18:00:00 and 2009/12/26 18:04:00, 986 flags should be:: 987 988 date1 = UTCDateTime("2009-12-23T06:00:00.0") 989 date2 = UTCDateTime("2009-12-23T06:30:00.0") 990 date3 = UTCDateTime("2009-12-24T10:00:00.0") 991 date4 = UTCDateTime("2009-12-24T10:30:00.0") 992 date5 = UTCDateTime("2009-12-26T18:00:00.0") 993 date6 = UTCDateTime("2009-12-26T18:04:00.0") 994 { "..." : 995 { "activity_flags" : 996 { "event_in_progress" : 997 {"INSTANT" : [date5, date6], 998 "DURATION" : [(date1, date2), (date3, date4)]}}}} 999 1000 Alternative way to mark duration:: 1001 1002 { "..." : 1003 { "activity_flags" : 1004 { "event_in_progress" : 1005 { "INSTANT" : [date5, date6], 1006 "DURATION" : [date1, date2, date3, date4]}}}} 1007 1008 """ 1009 # import has to be here to break import loop 1010 from .core import _is_mseed 1011 # Basic check 1012 if not os.path.isfile(filename) or not _is_mseed(filename): 1013 raise IOError("File %s is not a valid MiniSEED file" % filename) 1014 filesize = os.path.getsize(filename) 1015 1016 # Nested dictionaries to allow empty strings as wildcards 1017 class NestedDict(dict): 1018 def __missing__(self, key): 1019 value = self[key] = type(self)() 1020 return value 1021 # Define wildcard character 1022 wildcard = "" 1023 1024 # Check channel identifier value 1025 flags_bytes = NestedDict() 1026 for (key, value) in flags.items(): 1027 split_key = key.split(".") 1028 if len(split_key) != 4: 1029 msg = "Invalid channel identifier. " +\ 1030 "Expected 'Network.Station.Location.Channel' " +\ 1031 "(empty fields allowed), got '%s'." 1032 raise ValueError(msg % key) 1033 1034 # Remove padding spaces and store in new dict 1035 net = split_key[0].strip() 1036 sta = split_key[1].strip() 1037 loc = split_key[2].strip() 1038 cha = split_key[3].strip() 1039 1040 # Check flag value for invalid data 1041 for flag_group in value: 1042 # Check invalid flag group, and prepare check for invalid flag name 1043 if flag_group == 'activity_flags': 1044 record_to_check = FIXED_HEADER_ACTIVITY_FLAGS 1045 elif flag_group == 'io_clock_flags': 1046 record_to_check = FIXED_HEADER_IO_CLOCK_FLAGS 1047 elif flag_group == 'data_qual_flags': 1048 record_to_check = FIXED_HEADER_DATA_QUAL_FLAGS 1049 else: 1050 msg = "Invalid flag group %s. One of 'activity_flags', " +\ 1051 "'io_clock_flags', 'data_qual_flags' is expected." 1052 raise ValueError(msg % flag_group) 1053 1054 for flag_name in value[flag_group]: 1055 # Check invalid flag name 1056 if flag_name not in record_to_check.values(): 1057 msg = "Invalid flag name %s. One of %s is expected." 1058 raise ValueError(msg % (flag_name, 1059 str(record_to_check.values()))) 1060 1061 # Check flag values and store them in an "easy to parse" way: 1062 # either bool or list of tuples (start, end) 1063 flag_value = value[flag_group][flag_name] 1064 corrected_flag = _check_flag_value(flag_value) 1065 flags_bytes[net][sta][loc][cha][flag_group][flag_name] = \ 1066 corrected_flag 1067 1068 # Open file 1069 with open(filename, 'r+b') as mseed_file: 1070 # Run through all records 1071 while mseed_file.tell() < filesize: 1072 record_start = mseed_file.tell() 1073 1074 # Ignore sequence number and data header 1075 mseed_file.seek(8, os.SEEK_CUR) 1076 # Read identifier 1077 sta = mseed_file.read(5).strip().decode() 1078 loc = mseed_file.read(2).strip().decode() 1079 chan = mseed_file.read(3).strip().decode() 1080 net = mseed_file.read(2).strip().decode() 1081 1082 # Search the nested dict for the network identifier 1083 if net in flags_bytes: 1084 dict_to_use = flags_bytes[net] 1085 elif wildcard in flags_bytes: 1086 dict_to_use = flags_bytes[wildcard] 1087 else: 1088 dict_to_use = None 1089 1090 # Search the nested dict for the station identifier 1091 if dict_to_use is not None and sta in dict_to_use: 1092 dict_to_use = dict_to_use[sta] 1093 elif dict_to_use is not None and wildcard in dict_to_use: 1094 dict_to_use = dict_to_use[wildcard] 1095 else: 1096 dict_to_use = None 1097 1098 # Search the nested dict for the location identifier 1099 if dict_to_use is not None and loc in dict_to_use: 1100 dict_to_use = dict_to_use[loc] 1101 elif dict_to_use is not None and wildcard in dict_to_use: 1102 dict_to_use = dict_to_use[wildcard] 1103 else: 1104 dict_to_use = None 1105 1106 # Search the nested dict for the channel identifier 1107 if dict_to_use is not None and chan in dict_to_use: 1108 flags_value = dict_to_use[chan] 1109 elif dict_to_use is not None and wildcard in dict_to_use: 1110 flags_value = dict_to_use[wildcard] 1111 else: 1112 flags_value = None 1113 1114 if flags_value is not None: 1115 # Calculate the real start and end of the record 1116 recstart = mseed_file.read(10) 1117 (yr, doy, hr, mn, sec, _, mil) = unpack(native_str(">HHBBBBH"), 1118 recstart) 1119 # Transformation to UTCDatetime() 1120 recstart = UTCDateTime(year=yr, julday=doy, hour=hr, minute=mn, 1121 second=sec, microsecond=mil * 100) 1122 # Read data to date begin and end of record 1123 (nb_samples, fact, mult) = unpack(native_str(">Hhh"), 1124 mseed_file.read(6)) 1125 1126 # Manage time correction 1127 act_flags = unpack(native_str(">B"), mseed_file.read(1))[0] 1128 time_correction_applied = bool(act_flags & 2) 1129 (_, _, _, time_correction) = unpack(native_str(">BBBl"), 1130 mseed_file.read(7)) 1131 if (time_correction_applied is False) and time_correction: 1132 # Time correction is in units of 0.0001 seconds. 1133 recstart += time_correction * 0.0001 1134 1135 # Manage blockette's datation informations 1136 # Search for blockette 100's "Actual sample rate" field 1137 samp_rate = _search_flag_in_blockette(mseed_file, 4, 100, 4, 1) 1138 if samp_rate is not None: 1139 samp_rate = unpack(native_str(">b"), samp_rate)[0] 1140 # Search for blockette 1001's "microsec" field 1141 microsec = _search_flag_in_blockette(mseed_file, 4, 1001, 5, 1) 1142 if microsec is not None: 1143 microsec = unpack(native_str(">b"), microsec)[0] 1144 else: 1145 microsec = 0 1146 1147 realstarttime = recstart + microsec * 0.000001 1148 1149 # If samprate not set via blockette 100 calculate the sample 1150 # rate according to the SEED manual. 1151 if samp_rate is None: 1152 if (fact > 0) and (mult) > 0: 1153 samp_rate = float(fact * mult) 1154 elif (fact > 0) and (mult) < 0: 1155 samp_rate = -1.0 * float(fact) / float(mult) 1156 elif (fact < 0) and (mult) > 0: 1157 samp_rate = -1.0 * float(mult) / float(fact) 1158 elif (fact < 0) and (mult) < 0: 1159 samp_rate = -1.0 / float(fact * mult) 1160 else: 1161 # if everything is unset or 0 set sample rate to 1 1162 samp_rate = 1 1163 1164 # date of the last sample is recstart+samp_rate*(nb_samples-1) 1165 # We assume here that a record with samples [0, 1, ..., n] 1166 # has a period [ date_0, date_n+1 [ AND NOT [ date_0, date_n ] 1167 realendtime = recstart + samp_rate * (nb_samples) 1168 1169 # Convert flags to bytes : activity 1170 if 'activity_flags' in flags_value: 1171 act_flag = _convert_flags_to_raw_byte( 1172 FIXED_HEADER_ACTIVITY_FLAGS, 1173 flags_value['activity_flags'], 1174 realstarttime, realendtime) 1175 else: 1176 act_flag = 0 1177 1178 # Convert flags to bytes : i/o and clock 1179 if 'io_clock_flags' in flags_value: 1180 io_clock_flag = _convert_flags_to_raw_byte( 1181 FIXED_HEADER_IO_CLOCK_FLAGS, 1182 flags_value['io_clock_flags'], 1183 realstarttime, realendtime) 1184 else: 1185 io_clock_flag = 0 1186 1187 # Convert flags to bytes : data quality flags 1188 if 'data_qual_flags' in flags_value: 1189 data_qual_flag = _convert_flags_to_raw_byte( 1190 FIXED_HEADER_DATA_QUAL_FLAGS, 1191 flags_value['data_qual_flags'], 1192 realstarttime, realendtime) 1193 else: 1194 data_qual_flag = 0 1195 1196 flagsbytes = pack("BBB", act_flag, 1197 io_clock_flag, data_qual_flag) 1198 # Go back right before the fixed header flags 1199 mseed_file.seek(-8, os.SEEK_CUR) 1200 # Update flags* 1201 mseed_file.write(flagsbytes) 1202 # Seek to first blockette 1203 mseed_file.seek(9, os.SEEK_CUR) 1204 else: 1205 # Seek directly to first blockette 1206 mseed_file.seek(28, os.SEEK_CUR) 1207 1208 # Read record length in blockette 1000 to seek to the next record 1209 reclen_pow = _search_flag_in_blockette(mseed_file, 0, 1000, 6, 1) 1210 1211 if reclen_pow is None: 1212 msg = "Invalid MiniSEED file. No blockette 1000 was found." 1213 raise IOError(msg) 1214 else: 1215 reclen_pow = unpack(native_str("B"), reclen_pow)[0] 1216 reclen = 2**reclen_pow 1217 mseed_file.seek(record_start + reclen, os.SEEK_SET) 1218 1219 1220def _check_flag_value(flag_value): 1221 """ 1222 Search for a given flag in a given blockette for the current record. 1223 1224 This is a utility function for set_flags_in_fixed_headers and is not 1225 designed to be called by someone else. 1226 1227 This function checks for valid entries for a flag. A flag can be either 1228 * ``bool`` value to be always True or False for all the records 1229 * ``datetime`` or ``UTCDateTime`` value to add a single 'INSTANT' datation 1230 (see below) 1231 * ``dict`` to allow complex flag datation 1232 ** The dict keys may be the keyword INSTANT to mark arbitrarly short 1233 duration flags, or the keyword DURATION to mark events that span across 1234 time. 1235 ** The dict values are: 1236 *** for the INSTANT value, a single UTCDateTime or datetime object, or a 1237 list of these datation objects 1238 *** for the DURATION value, either a list of 1239 [start1, end1, start2, end2, ...] or a list of tuples 1240 [(start1, end1), (start2, end2), ...] 1241 1242 1243 This function then returns all datation events as a list of tuples 1244 [(start1, end1), ...] to ease the work of _convert_flags_to_raw_byte. Bool 1245 values are unchanged, instant events become a tuple 1246 (event_date, event_date). 1247 1248 If the flag value is incorrect, a ValueError is raised with a (hopefully) 1249 explicit enough message. 1250 1251 :type flag_value: bool or dict 1252 :param flag_value: the flag value to check. 1253 :return: corrected value of the flag. 1254 :raises: If the flag is not the one expected, a ``ValueError`` is raised 1255 """ 1256 1257 if isinstance(flag_value, bool): 1258 # bool allowed 1259 corrected_flag = flag_value 1260 1261 elif isinstance(flag_value, datetime) or \ 1262 isinstance(flag_value, UTCDateTime): 1263 # A single instant value is allowed 1264 utc_val = UTCDateTime(flag_value) 1265 corrected_flag = [(utc_val, utc_val)] 1266 1267 elif isinstance(flag_value, collections_abc.Mapping): 1268 # dict allowed if it has the right format 1269 corrected_flag = [] 1270 for flag_key in flag_value: 1271 if flag_key == "INSTANT": 1272 # Expected: list of UTCDateTime 1273 inst_values = flag_value[flag_key] 1274 if isinstance(inst_values, datetime) or \ 1275 isinstance(inst_values, UTCDateTime): 1276 # Single value : ensure it's UTCDateTime and store it 1277 utc_val = UTCDateTime(inst_values) 1278 corrected_flag.append((utc_val, utc_val)) 1279 elif isinstance(inst_values, collections_abc.Sequence): 1280 # Several instant values : check their types 1281 # and add each of them 1282 for value in inst_values: 1283 if isinstance(value, datetime) or \ 1284 isinstance(value, UTCDateTime): 1285 utc_val = UTCDateTime(value) 1286 corrected_flag.append((utc_val, utc_val)) 1287 else: 1288 msg = "Unexpected type for flag duration " +\ 1289 "'INSTANT' %s" 1290 raise ValueError(msg % str(type(inst_values))) 1291 else: 1292 msg = "Unexpected type for flag duration 'INSTANT' %s" 1293 raise ValueError(msg % str(type(inst_values))) 1294 1295 elif flag_key == "DURATION": 1296 # Expecting either a list of tuples (start, end) or 1297 # a list of (start1, end1, start1, end1) 1298 dur_values = flag_value[flag_key] 1299 if isinstance(dur_values, collections_abc.Sequence): 1300 if len(dur_values) != 0: 1301 # Check first item 1302 if isinstance(dur_values[0], datetime) or \ 1303 isinstance(dur_values[0], UTCDateTime): 1304 # List of [start1, end1, start2, end2, etc] 1305 # Check len 1306 if len(dur_values) % 2 != 0: 1307 msg = "Expected even length of duration " +\ 1308 "values, got %s" 1309 raise ValueError(msg % len(dur_values)) 1310 1311 # Add values 1312 duration_iter = iter(dur_values) 1313 for value in duration_iter: 1314 start = value 1315 end = dur_values[dur_values.index(value) + 1] 1316 1317 # Check start type 1318 if not isinstance(start, datetime) and \ 1319 not isinstance(start, UTCDateTime): 1320 msg = "Incorrect type for duration " +\ 1321 "start %s" 1322 raise ValueError(msg % str(type(start))) 1323 1324 # Check end type 1325 if not isinstance(end, datetime) and \ 1326 not isinstance(end, UTCDateTime): 1327 msg = "Incorrect type for duration " +\ 1328 "end %s" 1329 raise ValueError(msg % str(type(end))) 1330 1331 # Check duration validity 1332 start = UTCDateTime(start) 1333 end = UTCDateTime(end) 1334 if start <= end: 1335 corrected_flag.append((start, end)) 1336 else: 1337 msg = "Flag datation: expected end of " +\ 1338 "duration after its start" 1339 raise ValueError(msg) 1340 next(duration_iter) 1341 1342 elif isinstance(dur_values[0], 1343 collections_abc.Sequence): 1344 # List of tuples (start, end) 1345 for value in dur_values: 1346 if not isinstance(value, 1347 collections_abc.Sequence): 1348 msg = "Incorrect type %s for flag duration" 1349 raise ValueError(msg % str(type(value))) 1350 elif len(value) != 2: 1351 msg = "Incorrect len %s for flag duration" 1352 raise ValueError(msg % len(value)) 1353 else: 1354 start = value[0] 1355 end = value[1] 1356 1357 # Check start type 1358 if not isinstance(start, datetime) and \ 1359 not isinstance(start, UTCDateTime): 1360 msg = "Incorrect type for duration " +\ 1361 "start %s" 1362 raise ValueError(msg % 1363 str(type(start))) 1364 1365 # Check end type 1366 if not isinstance(end, datetime) and \ 1367 not isinstance(end, UTCDateTime): 1368 msg = "Incorrect type for duration " +\ 1369 "end %s" 1370 raise ValueError(msg % str(type(end))) 1371 if start <= end: 1372 corrected_flag.append((start, end)) 1373 else: 1374 msg = "Flag datation: expected end " +\ 1375 "of duration after its start" 1376 raise ValueError(msg) 1377 1378 # Else: len(dur_values) == 0, empty duration list: 1379 # do nothing 1380 else: 1381 msg = "Incorrect DURATION value: expected a list of " +\ 1382 "tuples (start, end), got %s" 1383 raise ValueError(msg % str(type(dur_values))) 1384 1385 else: 1386 msg = "Invalid key %s for flag value. One of " +\ 1387 "'INSTANT', 'DURATION' is expected." 1388 raise ValueError(msg % flag_key) 1389 else: 1390 msg = "Invalid type %s for flag value. Allowed values " +\ 1391 "are bool or dict" 1392 raise ValueError(msg % str(type(flag_value))) 1393 1394 return corrected_flag 1395 1396 1397def _search_flag_in_blockette(mseed_file_desc, first_blockette_offset, 1398 blockette_number, field_offset, field_length): 1399 """ 1400 Search for a given flag in a given blockette for the current record. 1401 1402 This is a utility function for set_flags_in_fixed_headers and is not 1403 designed to be called by someone else. 1404 1405 This function uses the file descriptor``mseed_file_desc``, seeks 1406 ``first_blockette_offset`` to go to the first blockette, reads through all 1407 the blockettes until it finds the one with number ``blockette_number``, 1408 then skips ``field_offset`` bytes to read ``field_length`` bytes and 1409 returns them. If the blockette does not exist, it returns None 1410 1411 Please note that this function does not decommute the binary value into an 1412 exploitable data (int, float, string, ...) 1413 :type mseed_file_desc: File object 1414 :param mseed_file_desc: a File descriptor to the current miniseed file. 1415 The value of mseed_file_desc.tell() is set back by this funcion before 1416 returning, use in multithread applications at your own risk. 1417 :type first_blockette_offset: int 1418 :param first_blockette_offset: tells the function where the first blockette 1419 of the record is compared to the mseed_file_desc current position in the 1420 file. A positive value means the blockette is after the current position. 1421 :type blockette_number: int 1422 :param blockette_number: the blockette number to search for 1423 :type field_offset: int 1424 :param field_offset: how many bytes we have to skip before attaining the 1425 wanted field. Please note that it also counts blockette number and next 1426 blockette index's field. 1427 :type field_length: int 1428 :param field_length: length of the wanted field, in bytes 1429 :return: bytes containing the field's value in this record 1430 1431 """ 1432 previous_position = mseed_file_desc.tell() 1433 1434 try: 1435 # Go to first blockette 1436 mseed_file_desc.seek(first_blockette_offset, os.SEEK_CUR) 1437 mseed_record_start = mseed_file_desc.tell() - 48 1438 read_data = mseed_file_desc.read(4) 1439 # Read info in the first blockette 1440 [cur_blkt_number, next_blkt_offset] = unpack(native_str(">HH"), 1441 read_data) 1442 1443 while cur_blkt_number != blockette_number \ 1444 and next_blkt_offset != 0: 1445 # Nothing here, read next blockette 1446 mseed_file_desc.seek(mseed_record_start + next_blkt_offset, 1447 os.SEEK_SET) 1448 read_data = mseed_file_desc.read(4) 1449 [cur_blkt_number, next_blkt_offset] = unpack(native_str(">HH"), 1450 read_data) 1451 1452 if cur_blkt_number == blockette_number: 1453 # Blockette found: we want to skip ``field_offset`` bytes but we 1454 # have already read 4 of the offset to get informations about the 1455 # current blockette, so we remove them from skipped data 1456 mseed_file_desc.seek(field_offset - 4, os.SEEK_CUR) 1457 returned_bytes = mseed_file_desc.read(field_length) 1458 else: 1459 returned_bytes = None 1460 1461 finally: 1462 mseed_file_desc.seek(previous_position, os.SEEK_SET) 1463 1464 return returned_bytes 1465 1466 1467def _convert_flags_to_raw_byte(expected_flags, user_flags, recstart, recend): 1468 """ 1469 Converts a flag dictionary to a byte, ready to be encoded in a MiniSEED 1470 header. 1471 1472 This is a utility function for set_flags_in_fixed_headers and is not 1473 designed to be called by someone else. 1474 1475 expected_signals describes all the possible bit names for the user flags 1476 and their place in the result byte. Expected: dict { exponent: bit_name }. 1477 The fixed header flags are available in obspy.io.mseed.headers as 1478 FIXED_HEADER_ACTIVITY_FLAGS, FIXED_HEADER_DATA_QUAL_FLAGS and 1479 FIXED_HEADER_IO_CLOCK_FLAGS. 1480 1481 This expects a user_flags as a dictionary { bit_name : value }. bit_name is 1482 compared to the expected_signals, and its value is converted to bool. 1483 Missing values are considered false. 1484 1485 :type expected_flags: dict {int: str} 1486 :param expected_flags: every possible flag in this field, with its offset 1487 :type user_flags: dict {str: bool} 1488 :param user_flags: user defined flags and its value 1489 :type recstart: UTCDateTime 1490 :param recstart: date of the first sample of the current record 1491 :type recstart: UTCDateTime 1492 :param recend: date of the last sample of the current record 1493 :return: raw int value for the flag group 1494 """ 1495 1496 flag_byte = 0 1497 1498 for (bit, key) in expected_flags.items(): 1499 use_in_this_record = False 1500 if key in user_flags: 1501 if isinstance(user_flags[key], bool) and user_flags[key]: 1502 # Boolean value, we accept it for all records 1503 use_in_this_record = True 1504 elif isinstance(user_flags[key], collections_abc.Sequence): 1505 # List of tuples (start, end) 1506 use_in_this_record = False 1507 for tuple_value in user_flags[key]: 1508 # Check wether this record is concerned 1509 event_start = tuple_value[0] 1510 event_end = tuple_value[1] 1511 1512 if(event_start < recend) and (recstart <= event_end): 1513 use_in_this_record = True 1514 break 1515 1516 if use_in_this_record: 1517 flag_byte += 2**bit 1518 1519 return flag_byte 1520 1521 1522def shift_time_of_file(input_file, output_file, timeshift): 1523 """ 1524 Takes a MiniSEED file and shifts the time of every record by the given 1525 amount. 1526 1527 The same could be achieved by reading the MiniSEED file with ObsPy, 1528 modifying the starttime and writing it again. The problem with this 1529 approach is that some record specific flags and special blockettes might 1530 not be conserved. This function directly operates on the file and simply 1531 changes some header fields, not touching the rest, thus preserving it. 1532 1533 Will only work correctly if all records have the same record length which 1534 usually should be the case. 1535 1536 All times are in 0.0001 seconds, that is in 1/10000 seconds. NOT ms but one 1537 order of magnitude smaller! This is due to the way time corrections are 1538 stored in the MiniSEED format. 1539 1540 :type input_file: str 1541 :param input_file: The input filename. 1542 :type output_file: str 1543 :param output_file: The output filename. 1544 :type timeshift: int 1545 :param timeshift: The time-shift to be applied in 0.0001, e.g. 1E-4 1546 seconds. Use an integer number. 1547 1548 Please do NOT use identical input and output files because if something 1549 goes wrong, your data WILL be corrupted/destroyed. Also always check the 1550 resulting output file. 1551 1552 .. rubric:: Technical details 1553 1554 The function will loop over every record and change the "Time correction" 1555 field in the fixed section of the MiniSEED data header by the specified 1556 amount. Unfortunately a further flag (bit 1 in the "Activity flags" field) 1557 determines whether or not the time correction has already been applied to 1558 the record start time. If it has not, all is fine and changing the "Time 1559 correction" field is enough. Otherwise the actual time also needs to be 1560 changed. 1561 1562 One further detail: If bit 1 in the "Activity flags" field is 1 (True) and 1563 the "Time correction" field is 0, then the bit will be set to 0 and only 1564 the time correction will be changed thus avoiding the need to change the 1565 record start time which is preferable. 1566 """ 1567 timeshift = int(timeshift) 1568 # A timeshift of zero makes no sense. 1569 if timeshift == 0: 1570 msg = "The timeshift must to be not equal to 0." 1571 raise ValueError(msg) 1572 1573 # Get the necessary information from the file. 1574 info = get_record_information(input_file) 1575 record_length = info["record_length"] 1576 1577 byteorder = info["byteorder"] 1578 sys_byteorder = "<" if (sys.byteorder == "little") else ">" 1579 do_swap = False if (byteorder == sys_byteorder) else True 1580 1581 # This is in this scenario somewhat easier to use than BytesIO because one 1582 # can directly modify the data array. 1583 data = np.fromfile(input_file, dtype=np.uint8) 1584 array_length = len(data) 1585 record_offset = 0 1586 # Loop over every record. 1587 while True: 1588 remaining_bytes = array_length - record_offset 1589 if remaining_bytes < 48: 1590 if remaining_bytes > 0: 1591 msg = "%i excessive byte(s) in the file. " % remaining_bytes 1592 msg += "They will be appended to the output file." 1593 warnings.warn(msg) 1594 break 1595 # Use a slice for the current record. 1596 current_record = data[record_offset: record_offset + record_length] 1597 record_offset += record_length 1598 1599 activity_flags = current_record[36] 1600 is_time_correction_applied = bool(activity_flags & 2) 1601 1602 current_time_shift = current_record[40:44] 1603 current_time_shift.dtype = np.int32 1604 if do_swap: 1605 current_time_shift = current_time_shift.byteswap(False) 1606 current_time_shift = current_time_shift[0] 1607 1608 # If the time correction has been applied, but there is no actual 1609 # time correction, then simply set the time correction applied 1610 # field to false and process normally. 1611 # This should rarely be the case. 1612 if current_time_shift == 0 and is_time_correction_applied: 1613 # This sets bit 2 of the activity flags to 0. 1614 current_record[36] = current_record[36] & (~2) 1615 is_time_correction_applied = False 1616 # This is the case if the time correction has been applied. This 1617 # requires some more work by changing both, the actual time and the 1618 # time correction field. 1619 elif is_time_correction_applied: 1620 msg = "The timeshift can only be applied by actually changing the " 1621 msg += "time. This is experimental. Please make sure the output " 1622 msg += "file is correct." 1623 warnings.warn(msg) 1624 # The whole process is not particularly fast or optimized but 1625 # instead intends to avoid errors. 1626 # Get the time variables. 1627 time = current_record[20:30] 1628 year = time[0:2] 1629 julday = time[2:4] 1630 hour = time[4:5] 1631 minute = time[5:6] 1632 second = time[6:7] 1633 msecs = time[8:10] 1634 # Change dtype of multibyte values. 1635 year.dtype = np.uint16 1636 julday.dtype = np.uint16 1637 msecs.dtype = np.uint16 1638 if do_swap: 1639 year = year.byteswap(False) 1640 julday = julday.byteswap(False) 1641 msecs = msecs.byteswap(False) 1642 dtime = UTCDateTime(year=year[0], julday=julday[0], hour=hour[0], 1643 minute=minute[0], second=second[0], 1644 microsecond=msecs[0] * 100) 1645 dtime += (float(timeshift) / 10000) 1646 year[0] = dtime.year 1647 julday[0] = dtime.julday 1648 hour[0] = dtime.hour 1649 minute[0] = dtime.minute 1650 second[0] = dtime.second 1651 msecs[0] = dtime.microsecond / 100 1652 # Swap again. 1653 if do_swap: 1654 year = year.byteswap(False) 1655 julday = julday.byteswap(False) 1656 msecs = msecs.byteswap(False) 1657 # Change dtypes back. 1658 year.dtype = np.uint8 1659 julday.dtype = np.uint8 1660 msecs.dtype = np.uint8 1661 # Write to current record. 1662 time[0:2] = year[:] 1663 time[2:4] = julday[:] 1664 time[4] = hour[:] 1665 time[5] = minute[:] 1666 time[6] = second[:] 1667 time[8:10] = msecs[:] 1668 current_record[20:30] = time[:] 1669 1670 # Now modify the time correction flag. 1671 current_time_shift += timeshift 1672 current_time_shift = np.array([current_time_shift], np.int32) 1673 if do_swap: 1674 current_time_shift = current_time_shift.byteswap(False) 1675 current_time_shift.dtype = np.uint8 1676 current_record[40:44] = current_time_shift[:] 1677 1678 # Write to the output file. 1679 data.tofile(output_file) 1680 1681 1682def _convert_and_check_encoding_for_writing(encoding): 1683 """ 1684 Helper function to handle and test encodings. 1685 1686 If encoding is a string, it will be converted to the appropriate 1687 integer. It will furthermore be checked if the specified encoding can be 1688 written using libmseed. Appropriate errors will be raised if necessary. 1689 """ 1690 # Check if encoding kwarg is set and catch invalid encodings. 1691 encoding_strings = {v[0]: k for k, v in ENCODINGS.items()} 1692 1693 try: 1694 encoding = int(encoding) 1695 except Exception: 1696 pass 1697 1698 if isinstance(encoding, int): 1699 if (encoding in ENCODINGS and ENCODINGS[encoding][3] is False) or \ 1700 encoding in UNSUPPORTED_ENCODINGS: 1701 msg = ("Encoding %i cannot be written with ObsPy. Please " 1702 "use another encoding.") % encoding 1703 raise ValueError(msg) 1704 elif encoding not in ENCODINGS: 1705 raise ValueError("Unknown encoding: %i." % encoding) 1706 else: 1707 if encoding not in encoding_strings: 1708 raise ValueError("Unknown encoding: '%s'." % str(encoding)) 1709 elif ENCODINGS[encoding_strings[encoding]][3] is False: 1710 msg = ("Encoding '%s' cannot be written with ObsPy. Please " 1711 "use another encoding.") % encoding 1712 raise ValueError(msg) 1713 encoding = encoding_strings[encoding] 1714 return encoding 1715 1716 1717def get_timing_and_data_quality(file_or_file_object): 1718 warnings.warn("The obspy.io.mseed.util.get_timing_and_data_quality() " 1719 "function is deprecated and will be removed with the next " 1720 "release. Please use the " 1721 "improved obspy.io.mseed.util.get_flags() method instead.", 1722 ObsPyDeprecationWarning) 1723 flags = get_flags(files=file_or_file_object, io_flags=False, 1724 activity_flags=False, data_quality_flags=True, 1725 timing_quality=True) 1726 1727 ret_val = {} 1728 ret_val["data_quality_flags"] = \ 1729 list(flags["data_quality_flags_counts"].values()) 1730 1731 if flags["timing_quality"]: 1732 tq = flags["timing_quality"] 1733 ret_val["timing_quality_average"] = tq["mean"] 1734 ret_val["timing_quality_lower_quantile"] = tq["lower_quartile"] 1735 ret_val["timing_quality_max"] = tq["max"] 1736 ret_val["timing_quality_median"] = tq["median"] 1737 ret_val["timing_quality_min"] = tq["min"] 1738 ret_val["timing_quality_upper_quantile"] = tq["lower_quartile"] 1739 1740 return ret_val 1741 1742 1743if __name__ == '__main__': 1744 import doctest 1745 doctest.testmod(exclude_empty=True) 1746