1try: 2 from collections.abc import Mapping # noqa 3 from collections.abc import MutableMapping # noqa 4except ImportError: 5 from collections import Mapping # noqa 6 from collections import MutableMapping # noqa 7 8import segyio 9from .binfield import BinField 10from .tracefield import TraceField 11 12class Field(MutableMapping): 13 """ 14 The Field implements the dict interface, with a fixed set of keys. It's 15 used for both binary- and trace headers. Any modifications to this 16 dict_like object will be reflected on disk. 17 18 The keys can be integers, int_likes, or enumerations such as BinField, 19 TraceField, and su. If raw, numerical offsets are used they must align with 20 the defined byte offsets by the SEGY specification. 21 22 Notes 23 ----- 24 .. versionadded:: 1.1 25 26 .. versionchanged:: 1.3 27 common dict operations (update, keys, values) 28 29 .. versionchanged:: 1.6 30 more common dict operations (MutableMapping) 31 """ 32 _bin_keys = [x for x in BinField.enums() 33 if x != BinField.Unassigned1 34 and x != BinField.Unassigned2] 35 36 _tr_keys = [x for x in TraceField.enums() 37 if x != TraceField.UnassignedInt1 38 and x != TraceField.UnassignedInt2] 39 40 _kwargs = { 41 'tracl' : TraceField.TRACE_SEQUENCE_LINE, 42 'tracr' : TraceField.TRACE_SEQUENCE_FILE, 43 'fldr' : TraceField.FieldRecord, 44 'tracf' : TraceField.TraceNumber, 45 'ep' : TraceField.EnergySourcePoint, 46 'cdp' : TraceField.CDP, 47 'cdpt' : TraceField.CDP_TRACE, 48 'trid' : TraceField.TraceIdentificationCode, 49 'nvs' : TraceField.NSummedTraces, 50 'nhs' : TraceField.NStackedTraces, 51 'duse' : TraceField.DataUse, 52 'offset': TraceField.offset, 53 'gelev' : TraceField.ReceiverGroupElevation, 54 'selev' : TraceField.SourceSurfaceElevation, 55 'sdepth': TraceField.SourceDepth, 56 'gdel' : TraceField.ReceiverDatumElevation, 57 'sdel' : TraceField.SourceDatumElevation, 58 'swdep' : TraceField.SourceWaterDepth, 59 'gwdep' : TraceField.GroupWaterDepth, 60 'scalel': TraceField.ElevationScalar, 61 'scalco': TraceField.SourceGroupScalar, 62 'sx' : TraceField.SourceX, 63 'sy' : TraceField.SourceY, 64 'gx' : TraceField.GroupX, 65 'gy' : TraceField.GroupY, 66 'counit': TraceField.CoordinateUnits, 67 'wevel' : TraceField.WeatheringVelocity, 68 'swevel': TraceField.SubWeatheringVelocity, 69 'sut' : TraceField.SourceUpholeTime, 70 'gut' : TraceField.GroupUpholeTime, 71 'sstat' : TraceField.SourceStaticCorrection, 72 'gstat' : TraceField.GroupStaticCorrection, 73 'tstat' : TraceField.TotalStaticApplied, 74 'laga' : TraceField.LagTimeA, 75 'lagb' : TraceField.LagTimeB, 76 'delrt' : TraceField.DelayRecordingTime, 77 'muts' : TraceField.MuteTimeStart, 78 'mute' : TraceField.MuteTimeEND, 79 'ns' : TraceField.TRACE_SAMPLE_COUNT, 80 'dt' : TraceField.TRACE_SAMPLE_INTERVAL, 81 'gain' : TraceField.GainType, 82 'igc' : TraceField.InstrumentGainConstant, 83 'igi' : TraceField.InstrumentInitialGain, 84 'corr' : TraceField.Correlated, 85 'sfs' : TraceField.SweepFrequencyStart, 86 'sfe' : TraceField.SweepFrequencyEnd, 87 'slen' : TraceField.SweepLength, 88 'styp' : TraceField.SweepType, 89 'stat' : TraceField.SweepTraceTaperLengthStart, 90 'stae' : TraceField.SweepTraceTaperLengthEnd, 91 'tatyp' : TraceField.TaperType, 92 'afilf' : TraceField.AliasFilterFrequency, 93 'afils' : TraceField.AliasFilterSlope, 94 'nofilf': TraceField.NotchFilterFrequency, 95 'nofils': TraceField.NotchFilterSlope, 96 'lcf' : TraceField.LowCutFrequency, 97 'hcf' : TraceField.HighCutFrequency, 98 'lcs' : TraceField.LowCutSlope, 99 'hcs' : TraceField.HighCutSlope, 100 'year' : TraceField.YearDataRecorded, 101 'day' : TraceField.DayOfYear, 102 'hour' : TraceField.HourOfDay, 103 'minute': TraceField.MinuteOfHour, 104 'sec' : TraceField.SecondOfMinute, 105 'timbas': TraceField.TimeBaseCode, 106 'trwf' : TraceField.TraceWeightingFactor, 107 'grnors': TraceField.GeophoneGroupNumberRoll1, 108 'grnofr': TraceField.GeophoneGroupNumberFirstTraceOrigField, 109 'grnlof': TraceField.GeophoneGroupNumberLastTraceOrigField, 110 'gaps' : TraceField.GapSize, 111 'otrav' : TraceField.OverTravel, 112 'cdpx' : TraceField.CDP_X, 113 'cdpy' : TraceField.CDP_Y, 114 'iline' : TraceField.INLINE_3D, 115 'xline' : TraceField.CROSSLINE_3D, 116 'sp' : TraceField.ShotPoint, 117 'scalsp': TraceField.ShotPointScalar, 118 'trunit': TraceField.TraceValueMeasurementUnit, 119 'tdcm' : TraceField.TransductionConstantMantissa, 120 'tdcp' : TraceField.TransductionConstantPower, 121 'tdunit': TraceField.TransductionUnit, 122 'triden': TraceField.TraceIdentifier, 123 'sctrh' : TraceField.ScalarTraceHeader, 124 'stype' : TraceField.SourceType, 125 'sedm' : TraceField.SourceEnergyDirectionMantissa, 126 'sede' : TraceField.SourceEnergyDirectionExponent, 127 'smm' : TraceField.SourceMeasurementMantissa, 128 'sme' : TraceField.SourceMeasurementExponent, 129 'smunit': TraceField.SourceMeasurementUnit, 130 'uint1' : TraceField.UnassignedInt1, 131 'uint2' : TraceField.UnassignedInt2, 132 133 'jobid' : BinField.JobID, 134 'lino' : BinField.LineNumber, 135 'reno' : BinField.ReelNumber, 136 'ntrpr' : BinField.Traces, 137 'nart' : BinField.AuxTraces, 138 'hdt' : BinField.Interval, 139 'dto' : BinField.IntervalOriginal, 140 'hns' : BinField.Samples, 141 'nso' : BinField.SamplesOriginal, 142 'format': BinField.Format, 143 'fold' : BinField.EnsembleFold, 144 'tsort' : BinField.SortingCode, 145 'vscode': BinField.VerticalSum, 146 'hsfs' : BinField.SweepFrequencyStart, 147 'hsfe' : BinField.SweepFrequencyEnd, 148 'hslen' : BinField.SweepLength, 149 'hstyp' : BinField.Sweep, 150 'schn' : BinField.SweepChannel, 151 'hstas' : BinField.SweepTaperStart, 152 'hstae' : BinField.SweepTaperEnd, 153 'htatyp': BinField.Taper, 154 'hcorr' : BinField.CorrelatedTraces, 155 'bgrcv' : BinField.BinaryGainRecovery, 156 'rcvm' : BinField.AmplitudeRecovery, 157 'mfeet' : BinField.MeasurementSystem, 158 'polyt' : BinField.ImpulseSignalPolarity, 159 'vpol' : BinField.VibratoryPolarity, 160 'extntrpr' : BinField.ExtTraces, 161 'extnart' : BinField.ExtAuxTraces, 162 'exthns' : BinField.ExtSamples, 163 'extnso' : BinField.ExtSamplesOriginal, 164 'extfold' : BinField.ExtEnsembleFold, 165 'unas1' : BinField.Unassigned1, 166 'rev' : BinField.SEGYRevision, 167 'revmin': BinField.SEGYRevisionMinor, 168 'trflag': BinField.TraceFlag, 169 'exth' : BinField.ExtendedHeaders, 170 'unas2' : BinField.Unassigned2, 171 } 172 173 def __init__(self, buf, kind, traceno = None, filehandle = None, readonly = True): 174 # do setup of kind/keys first, so that keys() work. if this method 175 # throws, we want repr() to be well-defined for backtrace, and that 176 # requires _keys 177 if kind == 'binary': 178 self._keys = self._bin_keys 179 self.kind = BinField 180 elif kind == 'trace': 181 self._keys = self._tr_keys 182 self.kind = TraceField 183 else: 184 raise ValueError('Unknown header type {}'.format(kind)) 185 186 self.buf = buf 187 self.traceno = traceno 188 self.filehandle = filehandle 189 self.getfield = segyio._segyio.getfield 190 self.putfield = segyio._segyio.putfield 191 192 self.readonly = readonly 193 194 def fetch(self, buf = None, traceno = None): 195 """Fetch the header from disk 196 197 This object will read header when it is constructed, which means it 198 might be out-of-date if the file is updated through some other handle. 199 This method is largely meant for internal use - if you need to reload 200 disk contents, use ``reload``. 201 202 Fetch does not update any internal state (unless `buf` is ``None`` on a 203 trace header, and the read succeeds), but returns the fetched header 204 contents. 205 206 This method can be used to reposition the trace header, which is useful 207 for constructing generators. 208 209 If this is called on a writable, new file, and this header has not yet 210 been written to, it will successfully return an empty buffer that, when 211 written to, will be reflected on disk. 212 213 Parameters 214 ---------- 215 buf : bytearray 216 buffer to read into instead of ``self.buf`` 217 traceno : int 218 219 Returns 220 ------- 221 buf : bytearray 222 223 Notes 224 ----- 225 .. versionadded:: 1.6 226 227 This method is not intended as user-oriented functionality, but might 228 be useful in high-performance code. 229 """ 230 231 if buf is None: 232 buf = self.buf 233 234 if traceno is None: 235 traceno = self.traceno 236 237 try: 238 if self.kind == TraceField: 239 if traceno is None: return buf 240 return self.filehandle.getth(traceno, buf) 241 else: 242 return self.filehandle.getbin() 243 except IOError: 244 if not self.readonly: 245 # the file was probably newly created and the trace header 246 # hasn't been written yet, and we set the buffer to zero. if 247 # this is the case we want to try and write it later, and if 248 # the file was broken, permissions were wrong etc writing will 249 # fail too 250 # 251 # if the file is opened read-only and this happens, there's no 252 # way to actually write and the error is an actual error 253 return bytearray(len(self.buf)) 254 else: raise 255 256 def reload(self): 257 """ 258 This object will read header when it is constructed, which means it 259 might be out-of-date if the file is updated through some other handle. 260 261 It's rarely required to call this method, and it's a symptom of fragile 262 code. However, if you have multiple handles to the same header, it 263 might be necessary. Consider the following example:: 264 265 >>> x = f.header[10] 266 >>> y = f.header[10] 267 >>> x[1, 5] 268 { 1: 5, 5: 10 } 269 >>> y[1, 5] 270 { 1: 5, 5: 10 } 271 >>> x[1] = 6 272 >>> x[1], y[1] # write to x[1] is invisible to y 273 6, 5 274 >>> y.reload() 275 >>> x[1], y[1] 276 6, 6 277 >>> x[1] = 5 278 >>> x[1], y[1] 279 5, 6 280 >>> y[5] = 1 281 >>> x.reload() 282 >>> x[1], y[1, 5] # the write to x[1] is lost 283 6, { 1: 6; 5: 1 } 284 285 In segyio, headers writes are atomic, and the write to disk writes the 286 full cache. If this cache is out of date, some writes might get lost, 287 even though the updates are compatible. 288 289 The fix to this issue is either to use ``reload`` and maintain buffer 290 consistency, or simply don't let header handles alias and overlap in 291 lifetime. 292 293 Notes 294 ----- 295 .. versionadded:: 1.6 296 """ 297 298 self.buf = self.fetch(buf = self.buf) 299 return self 300 301 def flush(self): 302 """Commit backing storage to disk 303 304 This method is largely internal, and it is not necessary to call this 305 from user code. It should not be explicitly invoked and may be removed 306 in future versions. 307 """ 308 309 if self.kind == TraceField: 310 self.filehandle.putth(self.traceno, self.buf) 311 312 elif self.kind == BinField: 313 self.filehandle.putbin(self.buf) 314 315 else: 316 msg = 'Object corrupted: kind {} not valid' 317 raise RuntimeError(msg.format(self.kind)) 318 319 def __getitem__(self, key): 320 """d[key] 321 322 Read the associated value of `key`. 323 324 `key` can be any iterable, to retrieve multiple keys at once. In this 325 case, a mapping of key -> value is returned. 326 327 Parameters 328 ---------- 329 key : int, or iterable of int 330 331 Returns 332 ------- 333 value : int or dict_like 334 335 Notes 336 ----- 337 .. versionadded:: 1.1 338 339 .. note:: 340 Since version 1.6, KeyError is appropriately raised on key misses, 341 whereas ``IndexError`` was raised before. This is an old bug, since 342 header types were documented to be dict-like. If you rely on 343 catching key-miss errors in your code, you might want to handle 344 both ``IndexError`` and ``KeyError`` for multi-version robustness. 345 346 .. warning:: 347 segyio considers reads/writes full headers, not individual fields, 348 and does the read from disk when this class is constructed. If the 349 file is updated through some other handle, including a secondary 350 access via `f.header`, this cache might be out-of-date. 351 352 Examples 353 -------- 354 Read a single value: 355 356 >>> d[3213] 357 15000 358 359 Read multiple values at once: 360 361 >>> d[37, 189] 362 { 37: 5, 189: 2484 } 363 >>> d[37, TraceField.INLINE_3D] 364 { 37: 5, 189: 2484 } 365 """ 366 367 try: return self.getfield(self.buf, int(key)) 368 except TypeError: pass 369 370 return {self.kind(k): self.getfield(self.buf, int(k)) for k in key} 371 372 def __setitem__(self, key, val): 373 """d[key] = val 374 375 Set d[key] to val. Setting keys commits changes to disk, although the 376 changes may not be visible until the kernel schedules the write. 377 378 Unlike d[key], this method does not support assigning multiple values 379 at once. To set multiple values at once, use the `update` method. 380 381 Parameters 382 ---------- 383 key : int_like 384 val : int_like 385 386 Returns 387 ------- 388 val : int 389 The value set 390 391 Notes 392 ----- 393 .. versionadded:: 1.1 394 395 .. note:: 396 Since version 1.6, KeyError is appropriately raised on key misses, 397 whereas ``IndexError`` was raised before. This is an old bug, since 398 header types were documented to be dict-like. If you rely on 399 catching key-miss errors in your code, you might want to handle 400 both ``IndexError`` and ``KeyError`` for multi-version robustness. 401 402 .. warning:: 403 segyio considers reads/writes full headers, not individual fields, 404 and does the read from disk when this class is constructed. If the 405 file is updated through some other handle, including a secondary 406 access via `f.header`, this cache might be out-of-date. That means 407 writing an individual field will write the full header to disk, 408 possibly overwriting previously set values. 409 410 Examples 411 -------- 412 Set a value and keep in a variable: 413 414 >>> x = header[189] = 5 415 >>> x 416 5 417 """ 418 419 self.putfield(self.buf, key, val) 420 self.flush() 421 422 return val 423 424 def __delitem__(self, key): 425 """del d[key] 426 427 'Delete' the key by setting value to zero. Equivalent to ``d[key] = 428 0``. 429 430 Notes 431 ----- 432 .. versionadded:: 1.6 433 """ 434 435 self[key] = 0 436 437 def keys(self): 438 """D.keys() -> a set-like object providing a view on D's keys""" 439 return list(self._keys) 440 441 def __len__(self): 442 """x.__len__() <==> len(x)""" 443 return len(self._keys) 444 445 def __iter__(self): 446 """x.__iter__() <==> iter(x)""" 447 return iter(self._keys) 448 449 def __eq__(self, other): 450 """x.__eq__(y) <==> x == y""" 451 452 if not isinstance(other, Mapping): 453 return NotImplemented 454 455 if len(self) != len(other): 456 return False 457 458 def intkeys(d): 459 return { int(k): v for k, v in d.items() } 460 461 return intkeys(self) == intkeys(other) 462 463 464 def update(self, *args, **kwargs): 465 """d.update([E, ]**F) -> None. Update D from mapping/iterable E and F. 466 467 Overwrite the values in `d` with the keys from `E` and `F`. If any key 468 in `value` is invalid in `d`, ``KeyError`` is raised. 469 470 This method is atomic - either all values in `value` are set in `d`, or 471 none are. ``update`` does not commit a partially-updated version to 472 disk. 473 474 For kwargs, Seismic Unix-style names are supported. `BinField` and 475 `TraceField` are not, because there are name collisions between them, 476 although this restriction may be lifted in the future. 477 478 Notes 479 ----- 480 .. versionchanged:: 1.3 481 Support for common dict operations (update, keys, values) 482 483 .. versionchanged:: 1.6 484 Atomicity guarantee 485 486 .. versionchanged:: 1.6 487 `**kwargs` support 488 489 Examples 490 -------- 491 >>> e = { 1: 10, 9: 5 } 492 >>> d.update(e) 493 >>> l = [ (105, 11), (169, 4) ] 494 >>> d.update(l) 495 >>> d.update(e, iline=189, xline=193, hour=5) 496 >>> d.update(sx=7) 497 498 """ 499 500 if len(args) > 1: 501 msg = 'update expected at most 1 non-keyword argument, got {}' 502 raise TypeError(msg.format(len(args))) 503 504 buf = bytearray(self.buf) 505 506 # Implementation largely borrowed from Mapping 507 # If E present and has a .keys() method: for k in E: D[k] = E[k] 508 # If E present and lacks .keys() method: for (k, v) in E: D[k] = v 509 # In either case, this is followed by: for k, v in F.items(): D[k] = v 510 if len(args) == 1: 511 other = args[0] 512 if isinstance(other, Mapping): 513 for key in other: 514 self.putfield(buf, int(key), other[key]) 515 elif hasattr(other, "keys"): 516 for key in other.keys(): 517 self.putfield(buf, int(key), other[key]) 518 else: 519 for key, value in other: 520 self.putfield(buf, int(key), value) 521 522 for key, value in kwargs.items(): 523 self.putfield(buf, int(self._kwargs[key]), value) 524 525 self.buf = buf 526 self.flush() 527 528 @classmethod 529 def binary(cls, segy): 530 buf = bytearray(segyio._segyio.binsize()) 531 return Field(buf, kind='binary', 532 filehandle=segy.xfd, 533 readonly=segy.readonly, 534 ).reload() 535 536 @classmethod 537 def trace(cls, traceno, segy): 538 buf = bytearray(segyio._segyio.thsize()) 539 return Field(buf, kind='trace', 540 traceno=traceno, 541 filehandle=segy.xfd, 542 readonly=segy.readonly, 543 ).reload() 544 545 def __repr__(self): 546 return repr(self[self.keys()]) 547