1# Copyright (c) 2021 MetPy Developers. 2# Distributed under the terms of the BSD 3-Clause License. 3# SPDX-License-Identifier: BSD-3-Clause 4"""Tools to process GEMPAK-formatted products.""" 5 6import bisect 7from collections import namedtuple 8from collections.abc import Iterable 9import contextlib 10from datetime import datetime, timedelta 11from enum import Enum 12from itertools import product 13import logging 14import math 15import struct 16import sys 17 18import numpy as np 19import pyproj 20import xarray as xr 21 22from ._tools import IOBuffer, NamedStruct, open_as_needed 23from .. import constants 24from ..calc import (mixing_ratio_from_specific_humidity, scale_height, 25 specific_humidity_from_dewpoint, thickness_hydrostatic, 26 virtual_temperature) 27from ..package_tools import Exporter 28from ..units import units 29 30exporter = Exporter(globals()) 31log = logging.getLogger(__name__) 32 33BYTES_PER_WORD = 4 34PARAM_ATTR = [('name', (4, 's')), ('scale', (1, 'i')), 35 ('offset', (1, 'i')), ('bits', (1, 'i'))] 36USED_FLAG = 9999 37UNUSED_FLAG = -9999 38GEMPAK_HEADER = 'GEMPAK DATA MANAGEMENT FILE ' 39GEMPROJ_TO_PROJ = { 40 'MER': ('merc', 'cyl'), 41 'NPS': ('stere', 'azm'), 42 'SPS': ('stere', 'azm'), 43 'LCC': ('lcc', 'con'), 44 'SCC': ('lcc', 'con'), 45 'CED': ('eqc', 'cyl'), 46 'MCD': ('eqc', 'cyl'), 47 'NOR': ('ortho', 'azm'), 48 'SOR': ('ortho', 'azm'), 49 'STR': ('stere', 'azm'), 50 'AED': ('aeqd', 'azm'), 51 'ORT': ('ortho', 'azm'), 52 'LEA': ('laea', 'azm'), 53 'GNO': ('gnom', 'azm'), 54 'TVM': ('tmerc', 'obq'), 55 'UTM': ('utm', 'obq'), 56} 57GVCORD_TO_VAR = { 58 'PRES': 'p', 59 'HGHT': 'z', 60 'THTA': 'theta', 61} 62 63 64class FileTypes(Enum): 65 """GEMPAK file type.""" 66 67 surface = 1 68 sounding = 2 69 grid = 3 70 71 72class DataTypes(Enum): 73 """Data management library data types.""" 74 75 real = 1 76 integer = 2 77 character = 3 78 realpack = 4 79 grid = 5 80 81 82class VerticalCoordinates(Enum): 83 """Vertical coordinates.""" 84 85 none = 0 86 pres = 1 87 thta = 2 88 hght = 3 89 sgma = 4 90 dpth = 5 91 hybd = 6 92 pvab = 7 93 pvbl = 8 94 95 96class PackingType(Enum): 97 """GRIB packing type.""" 98 99 none = 0 100 grib = 1 101 nmc = 2 102 diff = 3 103 dec = 4 104 grib2 = 5 105 106 107class ForecastType(Enum): 108 """Forecast type.""" 109 110 analysis = 0 111 forecast = 1 112 guess = 2 113 initial = 3 114 115 116class DataSource(Enum): 117 """Data source.""" 118 119 model = 0 120 airway_surface = 1 121 metar = 2 122 ship = 3 123 raob_buoy = 4 124 synop_raob_vas = 5 125 grid = 6 126 watch_by_county = 7 127 unknown = 99 128 text = 100 129 metar2 = 102 130 ship2 = 103 131 raob_buoy2 = 104 132 synop_raob_vas2 = 105 133 134 135Grid = namedtuple('Grid', [ 136 'GRIDNO', 137 'TYPE', 138 'DATTIM1', 139 'DATTIM2', 140 'PARM', 141 'LEVEL1', 142 'LEVEL2', 143 'COORD', 144]) 145 146Sounding = namedtuple('Sounding', [ 147 'DTNO', 148 'SNDNO', 149 'DATTIM', 150 'ID', 151 'NUMBER', 152 'LAT', 153 'LON', 154 'ELEV', 155 'STATE', 156 'COUNTRY', 157]) 158 159Surface = namedtuple('Surface', [ 160 'ROW', 161 'COL', 162 'DATTIM', 163 'ID', 164 'NUMBER', 165 'LAT', 166 'LON', 167 'ELEV', 168 'STATE', 169 'COUNTRY', 170]) 171 172 173def _data_source(source): 174 """Get data source from stored integer.""" 175 try: 176 return DataSource(source) 177 except ValueError: 178 log.warning('Could not interpret data source `%s`. ' 179 'Setting to `Unknown`.', source) 180 return DataSource(99) 181 182 183def _word_to_position(word, bytes_per_word=BYTES_PER_WORD): 184 """Return beginning position of a word in bytes.""" 185 return (word * bytes_per_word) - bytes_per_word 186 187 188def _interp_logp_data(sounding, missing=-9999): 189 """Interpolate missing sounding data. 190 191 This function is similar to the MR_MISS subroutine in GEMPAK. 192 """ 193 size = len(sounding['PRES']) 194 recipe = [('TEMP', 'DWPT'), ('DRCT', 'SPED'), ('DWPT', None)] 195 196 for var1, var2 in recipe: 197 iabove = 0 198 i = 1 199 more = True 200 while i < (size - 1) and more: 201 if sounding[var1][i] == missing: 202 if iabove <= i: 203 iabove = i + 1 204 found = False 205 while not found: 206 if sounding[var1][iabove] != missing: 207 found = True 208 else: 209 iabove += 1 210 if iabove >= size: 211 found = True 212 iabove = 0 213 more = False 214 215 if (var2 is None and iabove != 0 216 and sounding['PRES'][i - 1] > 100 217 and sounding['PRES'][iabove] < 100): 218 iabove = 0 219 220 if iabove != 0: 221 adata = {} 222 bdata = {} 223 for param, val in sounding.items(): 224 if (param in ['PRES', 'TEMP', 'DWPT', 225 'DRCT', 'SPED', 'HGHT']): 226 adata[param] = val[i - 1] 227 bdata[param] = val[iabove] 228 vlev = sounding['PRES'][i] 229 outdata = _interp_parameters(vlev, adata, bdata, missing) 230 sounding[var1][i] = outdata[var1] 231 if var2 is not None: 232 sounding[var2][i] = outdata[var2] 233 i += 1 234 235 236def _interp_logp_height(sounding, missing=-9999): 237 """Interpolate height linearly with respect to log p. 238 239 This function mimics the functionality of the MR_INTZ 240 subroutine in GEMPAK. 241 """ 242 size = len(sounding['HGHT']) 243 244 idx = -1 245 maxlev = -1 246 while size + idx != 0: 247 if sounding['HGHT'][idx] != missing: 248 maxlev = size + idx 249 break 250 else: 251 idx -= 1 252 253 pbot = missing 254 for i in range(maxlev): 255 press = sounding['PRES'][i] 256 hght = sounding['HGHT'][i] 257 258 if press == missing: 259 continue 260 elif hght != missing: 261 pbot = press 262 zbot = hght 263 ptop = 2000 264 elif pbot == missing: 265 continue 266 else: 267 ilev = i + 1 268 while press <= ptop: 269 if sounding['HGHT'][ilev] != missing: 270 ptop = sounding['PRES'][ilev] 271 ztop = sounding['HGHT'][ilev] 272 else: 273 ilev += 1 274 sounding['HGHT'][i] = (zbot + (ztop - zbot) 275 * (np.log(press / pbot) / np.log(ptop / pbot))) 276 277 if maxlev < size - 1: 278 if maxlev > -1: 279 pb = units.Quantity(sounding['PRES'][maxlev], 'hPa') 280 zb = units.Quantity(sounding['HGHT'][maxlev], 'm') 281 tb = units.Quantity(sounding['TEMP'][maxlev], 'degC') 282 tdb = units.Quantity(sounding['DWPT'][maxlev], 'degC') 283 else: 284 pb = units.Quantity(missing, 'hPa') 285 zb = units.Quantity(missing, 'm') 286 tb = units.Quantity(missing, 'degC') 287 tdb = units.Quantity(missing, 'degC') 288 289 for i in range(maxlev + 1, size): 290 if sounding['HGHT'][i] == missing: 291 tt = units.Quantity(sounding['TEMP'][i], 'degC') 292 tdt = units.Quantity(sounding['DWPT'][i], 'degC') 293 pt = units.Quantity(sounding['PRES'][i], 'hPa') 294 295 pl = units.Quantity([pb.m, pt.m], 'hPa') 296 tl = units.Quantity([tb.m, tt.m], 'degC') 297 tdl = units.Quantity([tdb.m, tdt.m], 'degC') 298 299 if missing in tdl.m: 300 rl = None 301 else: 302 ql = specific_humidity_from_dewpoint(pl, tdl) 303 rl = mixing_ratio_from_specific_humidity(ql) 304 305 if missing not in [*tl.m, zb.m]: 306 sounding['HGHT'][i] = (zb + thickness_hydrostatic(pl, tl, rl)).m 307 else: 308 sounding['HGHT'][i] = missing 309 310 311def _interp_logp_pressure(sounding, missing=-9999): 312 """Interpolate pressure from heights. 313 314 This function is similar to the MR_INTP subroutine from GEMPAK. 315 """ 316 i = 0 317 ilev = -1 318 klev = -1 319 size = len(sounding['PRES']) 320 pt = missing 321 zt = missing 322 pb = missing 323 zb = missing 324 325 while i < size: 326 p = sounding['PRES'][i] 327 z = sounding['HGHT'][i] 328 329 if p != missing and z != missing: 330 klev = i 331 pt = p 332 zt = z 333 334 if ilev != -1 and klev != -1: 335 for j in range(ilev + 1, klev): 336 z = sounding['HGHT'][j] 337 if missing not in [z, zb, pb]: 338 sounding['PRES'][j] = ( 339 pb * np.exp((z - zb) * np.log(pt / pb) / (zt - zb)) 340 ) 341 ilev = klev 342 pb = pt 343 zb = zt 344 i += 1 345 346 347def _interp_moist_height(sounding, missing=-9999): 348 """Interpolate moist hydrostatic height. 349 350 This function mimics the functionality of the MR_SCMZ 351 subroutine in GEMPAK. This the default behavior when 352 merging observed sounding data. 353 """ 354 hlist = units.Quantity(np.ones(len(sounding['PRES'])) * -9999, 'm') 355 356 ilev = -1 357 top = False 358 359 found = False 360 while not found and not top: 361 ilev += 1 362 if ilev >= len(sounding['PRES']): 363 top = True 364 elif missing not in [ 365 sounding['PRES'][ilev], 366 sounding['TEMP'][ilev], 367 sounding['HGHT'][ilev] 368 ]: 369 found = True 370 371 while not top: 372 plev = units.Quantity(sounding['PRES'][ilev], 'hPa') 373 pb = units.Quantity(sounding['PRES'][ilev], 'hPa') 374 tb = units.Quantity(sounding['TEMP'][ilev], 'degC') 375 tdb = units.Quantity(sounding['DWPT'][ilev], 'degC') 376 zb = units.Quantity(sounding['HGHT'][ilev], 'm') 377 zlev = units.Quantity(sounding['HGHT'][ilev], 'm') 378 jlev = ilev 379 klev = 0 380 mand = False 381 382 while not mand: 383 jlev += 1 384 if jlev >= len(sounding['PRES']): 385 mand = True 386 top = True 387 else: 388 pt = units.Quantity(sounding['PRES'][jlev], 'hPa') 389 tt = units.Quantity(sounding['TEMP'][jlev], 'degC') 390 tdt = units.Quantity(sounding['DWPT'][jlev], 'degC') 391 zt = units.Quantity(sounding['HGHT'][jlev], 'm') 392 if (zt.m != missing 393 and tt != missing): 394 mand = True 395 klev = jlev 396 397 pl = units.Quantity([pb.m, pt.m], 'hPa') 398 tl = units.Quantity([tb.m, tt.m], 'degC') 399 tdl = units.Quantity([tdb.m, tdt.m], 'degC') 400 401 if missing in tdl.m: 402 rl = None 403 tvl = tl 404 else: 405 ql = specific_humidity_from_dewpoint(pl, tdl) 406 rl = mixing_ratio_from_specific_humidity(ql) 407 tvl = virtual_temperature(tl, ql) 408 409 if missing not in [*tl.m, zb.m]: 410 scale_z = scale_height(*tvl) 411 znew = zb + thickness_hydrostatic(pl, tl, rl) 412 tb = tt 413 tdb = tdt 414 pb = pt 415 zb = znew 416 else: 417 scale_z = units.Quantity(missing, 'm') 418 znew = units.Quantity(missing, 'm') 419 hlist[jlev] = scale_z 420 421 if klev != 0: 422 s = (zt - zlev) / (znew - zlev) 423 for h in range(ilev + 1, klev + 1): 424 hlist[h] *= s 425 426 hbb = zlev 427 pbb = plev 428 for ii in range(ilev + 1, jlev): 429 p = units.Quantity(sounding['PRES'][ii], 'hPa') 430 scale_z = hlist[ii] 431 if missing not in [scale_z.m, hbb.m, 432 pbb.m, p.m]: 433 th = ((scale_z * constants.g) / constants.Rd).to('degC') 434 tbar = units.Quantity([th.m, th.m], 'degC') 435 pll = units.Quantity([pbb.m, p.m], 'hPa') 436 z = hbb + thickness_hydrostatic(pll, tbar) 437 else: 438 z = units.Quantity(missing, 'm') 439 sounding['HGHT'][ii] = z.m 440 hbb = z 441 pbb = p 442 443 ilev = klev 444 445 446def _interp_parameters(vlev, adata, bdata, missing=-9999): 447 """General interpolation with respect to log-p. 448 449 See the PC_INTP subroutine in GEMPAK. 450 """ 451 pres1 = adata['PRES'] 452 pres2 = bdata['PRES'] 453 between = (((pres1 < pres2) and (pres1 < vlev) 454 and (vlev < pres2)) 455 or ((pres2 < pres1) and (pres2 < vlev) 456 and (vlev < pres1))) 457 458 if not between: 459 raise ValueError('Current pressure does not fall between levels.') 460 elif pres1 <= 0 or pres2 <= 0: 461 raise ValueError('Pressure cannot be negative.') 462 463 outdata = {} 464 rmult = np.log(vlev / pres1) / np.log(pres2 / pres1) 465 outdata['PRES'] = vlev 466 for param, aval in adata.items(): 467 bval = bdata[param] 468 if param == 'DRCT': 469 ang1 = aval % 360 470 ang2 = bval % 360 471 if abs(ang1 - ang2) > 180: 472 if ang1 < ang2: 473 ang1 += 360 474 else: 475 ang2 += 360 476 ang = ang1 + (ang2 - ang1) * rmult 477 outdata[param] = ang % 360 478 else: 479 outdata[param] = aval + (bval - aval) * rmult 480 481 if missing in [aval, bval]: 482 outdata[param] = missing 483 484 return outdata 485 486 487class GempakFile(): 488 """Base class for GEMPAK files. 489 490 Reads ubiquitous GEMPAK file headers (i.e., the data management portion of 491 each file). 492 """ 493 494 prod_desc_fmt = [('version', 'i'), ('file_headers', 'i'), 495 ('file_keys_ptr', 'i'), ('rows', 'i'), 496 ('row_keys', 'i'), ('row_keys_ptr', 'i'), 497 ('row_headers_ptr', 'i'), ('columns', 'i'), 498 ('column_keys', 'i'), ('column_keys_ptr', 'i'), 499 ('column_headers_ptr', 'i'), ('parts', 'i'), 500 ('parts_ptr', 'i'), ('data_mgmt_ptr', 'i'), 501 ('data_mgmt_length', 'i'), ('data_block_ptr', 'i'), 502 ('file_type', 'i', FileTypes), 503 ('data_source', 'i', _data_source), 504 ('machine_type', 'i'), ('missing_int', 'i'), 505 (None, '12x'), ('missing_float', 'f')] 506 507 grid_nav_fmt = [('grid_definition_type', 'f'), 508 ('projection', '3sx', bytes.decode), 509 ('left_grid_number', 'f'), ('bottom_grid_number', 'f'), 510 ('right_grid_number', 'f'), ('top_grid_number', 'f'), 511 ('lower_left_lat', 'f'), ('lower_left_lon', 'f'), 512 ('upper_right_lat', 'f'), ('upper_right_lon', 'f'), 513 ('proj_angle1', 'f'), ('proj_angle2', 'f'), 514 ('proj_angle3', 'f'), (None, '972x')] 515 516 grid_anl_fmt1 = [('analysis_type', 'f'), ('delta_n', 'f'), 517 ('delta_x', 'f'), ('delta_y', 'f'), 518 (None, '4x'), ('garea_llcr_lat', 'f'), 519 ('garea_llcr_lon', 'f'), ('garea_urcr_lat', 'f'), 520 ('garea_urcr_lon', 'f'), ('extarea_llcr_lat', 'f'), 521 ('extarea_llcr_lon', 'f'), ('extarea_urcr_lat', 'f'), 522 ('extarea_urcr_lon', 'f'), ('datarea_llcr_lat', 'f'), 523 ('datarea_llcr_lon', 'f'), ('datarea_urcr_lat', 'f'), 524 ('datarea_urcrn_lon', 'f'), (None, '444x')] 525 526 grid_anl_fmt2 = [('analysis_type', 'f'), ('delta_n', 'f'), 527 ('grid_ext_left', 'f'), ('grid_ext_down', 'f'), 528 ('grid_ext_right', 'f'), ('grid_ext_up', 'f'), 529 ('garea_llcr_lat', 'f'), ('garea_llcr_lon', 'f'), 530 ('garea_urcr_lat', 'f'), ('garea_urcr_lon', 'f'), 531 ('extarea_llcr_lat', 'f'), ('extarea_llcr_lon', 'f'), 532 ('extarea_urcr_lat', 'f'), ('extarea_urcr_lon', 'f'), 533 ('datarea_llcr_lat', 'f'), ('datarea_llcr_lon', 'f'), 534 ('datarea_urcr_lat', 'f'), ('datarea_urcrn_lon', 'f'), 535 (None, '440x')] 536 537 data_management_fmt = ([('next_free_word', 'i'), ('max_free_pairs', 'i'), 538 ('actual_free_pairs', 'i'), ('last_word', 'i')] 539 + [('free_word{:d}'.format(n), 'i') for n in range(1, 29)]) 540 541 def __init__(self, file): 542 """Instantiate GempakFile object from file.""" 543 fobj = open_as_needed(file) 544 545 with contextlib.closing(fobj): 546 self._buffer = IOBuffer.fromfile(fobj) 547 548 # Save file start position as pointers use this as reference 549 self._start = self._buffer.set_mark() 550 551 # Process the main GEMPAK header to verify file format 552 self._process_gempak_header() 553 meta = self._buffer.set_mark() 554 555 # # Check for byte swapping 556 self._swap_bytes(bytes(self._buffer.read_binary(4))) 557 self._buffer.jump_to(meta) 558 559 # Process main metadata header 560 self.prod_desc = self._buffer.read_struct(NamedStruct(self.prod_desc_fmt, 561 self.prefmt, 562 'ProductDescription')) 563 564 # File Keys 565 # Surface and upper-air files will not have the file headers, so we need to check. 566 if self.prod_desc.file_headers > 0: 567 # This would grab any file headers, but NAVB and ANLB are the only ones used. 568 fkey_prod = product(['header_name', 'header_length', 'header_type'], 569 range(1, self.prod_desc.file_headers + 1)) 570 fkey_names = ['{}{}'.format(*x) for x in fkey_prod] 571 fkey_info = list(zip(fkey_names, np.repeat(('4s', 'i', 'i'), 572 self.prod_desc.file_headers))) 573 self.file_keys_format = NamedStruct(fkey_info, self.prefmt, 'FileKeys') 574 575 self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.file_keys_ptr)) 576 self.file_keys = self._buffer.read_struct(self.file_keys_format) 577 578 # file_key_blocks = self._buffer.set_mark() 579 # Navigation Block 580 navb_size = self._buffer.read_int(4, self.endian, False) 581 582 nav_stuct = NamedStruct(self.grid_nav_fmt, 583 self.prefmt, 584 'NavigationBlock') 585 586 if navb_size != nav_stuct.size // BYTES_PER_WORD: 587 raise ValueError('Navigation block size does not match GEMPAK specification') 588 else: 589 self.navigation_block = ( 590 self._buffer.read_struct(nav_stuct) 591 ) 592 self.kx = int(self.navigation_block.right_grid_number) 593 self.ky = int(self.navigation_block.top_grid_number) 594 595 # Analysis Block 596 anlb_size = self._buffer.read_int(4, self.endian, False) 597 anlb_start = self._buffer.set_mark() 598 anlb1_struct = NamedStruct(self.grid_anl_fmt1, 599 self.prefmt, 600 'AnalysisBlock') 601 anlb2_struct = NamedStruct(self.grid_anl_fmt2, 602 self.prefmt, 603 'AnalysisBlock') 604 605 if anlb_size not in [anlb1_struct.size // BYTES_PER_WORD, 606 anlb2_struct.size // BYTES_PER_WORD]: 607 raise ValueError('Analysis block size does not match GEMPAK specification') 608 else: 609 anlb_type = self._buffer.read_struct(struct.Struct(self.prefmt + 'f'))[0] 610 self._buffer.jump_to(anlb_start) 611 if anlb_type == 1: 612 self.analysis_block = ( 613 self._buffer.read_struct(anlb1_struct) 614 ) 615 elif anlb_type == 2: 616 self.analysis_block = ( 617 self._buffer.read_struct(anlb2_struct) 618 ) 619 else: 620 self.analysis_block = None 621 else: 622 self.analysis_block = None 623 self.navigation_block = None 624 625 # Data Management 626 self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.data_mgmt_ptr)) 627 self.data_management = self._buffer.read_struct(NamedStruct(self.data_management_fmt, 628 self.prefmt, 629 'DataManagement')) 630 631 # Row Keys 632 self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.row_keys_ptr)) 633 row_key_info = [('row_key{:d}'.format(n), '4s', self._decode_strip) 634 for n in range(1, self.prod_desc.row_keys + 1)] 635 row_key_info.extend([(None, None)]) 636 row_keys_fmt = NamedStruct(row_key_info, self.prefmt, 'RowKeys') 637 self.row_keys = self._buffer.read_struct(row_keys_fmt) 638 639 # Column Keys 640 self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.column_keys_ptr)) 641 column_key_info = [('column_key{:d}'.format(n), '4s', self._decode_strip) 642 for n in range(1, self.prod_desc.column_keys + 1)] 643 column_key_info.extend([(None, None)]) 644 column_keys_fmt = NamedStruct(column_key_info, self.prefmt, 'ColumnKeys') 645 self.column_keys = self._buffer.read_struct(column_keys_fmt) 646 647 # Parts 648 self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.parts_ptr)) 649 # parts = self._buffer.set_mark() 650 self.parts = [] 651 parts_info = [('name', '4s', self._decode_strip), 652 (None, '{:d}x'.format((self.prod_desc.parts - 1) * BYTES_PER_WORD)), 653 ('header_length', 'i'), 654 (None, '{:d}x'.format((self.prod_desc.parts - 1) * BYTES_PER_WORD)), 655 ('data_type', 'i', DataTypes), 656 (None, '{:d}x'.format((self.prod_desc.parts - 1) * BYTES_PER_WORD)), 657 ('parameter_count', 'i')] 658 parts_info.extend([(None, None)]) 659 parts_fmt = NamedStruct(parts_info, self.prefmt, 'Parts') 660 for n in range(1, self.prod_desc.parts + 1): 661 self.parts.append(self._buffer.read_struct(parts_fmt)) 662 self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.parts_ptr + n)) 663 664 # Parameters 665 # No need to jump to any position as this follows parts information 666 self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.parts_ptr 667 + self.prod_desc.parts * 4)) 668 self.parameters = [{key: [] for key, _ in PARAM_ATTR} 669 for n in range(self.prod_desc.parts)] 670 for attr, fmt in PARAM_ATTR: 671 fmt = (fmt[0], self.prefmt + fmt[1] if fmt[1] != 's' else fmt[1]) 672 for n, part in enumerate(self.parts): 673 for _ in range(part.parameter_count): 674 if 's' in fmt[1]: 675 self.parameters[n][attr] += [ 676 self._decode_strip(self._buffer.read_binary(*fmt)[0]) 677 ] 678 else: 679 self.parameters[n][attr] += self._buffer.read_binary(*fmt) 680 681 def _swap_bytes(self, binary): 682 """Swap between little and big endian.""" 683 self.swaped_bytes = (struct.pack('@i', 1) != binary) 684 685 if self.swaped_bytes: 686 if sys.byteorder == 'little': 687 self.prefmt = '>' 688 self.endian = 'big' 689 elif sys.byteorder == 'big': 690 self.prefmt = '<' 691 self.endian = 'little' 692 else: 693 self.prefmt = '' 694 self.endian = sys.byteorder 695 696 def _process_gempak_header(self): 697 """Read the GEMPAK header from the file.""" 698 fmt = [('text', '28s', bytes.decode), (None, None)] 699 700 header = self._buffer.read_struct(NamedStruct(fmt, '', 'GempakHeader')) 701 if header.text != GEMPAK_HEADER: 702 raise TypeError('Unknown file format or invalid GEMPAK file') 703 704 @staticmethod 705 def _convert_dattim(dattim): 706 """Convert GEMPAK DATTIM integer to datetime object.""" 707 if dattim: 708 if dattim < 100000000: 709 dt = datetime.strptime(str(dattim), '%y%m%d') 710 else: 711 dt = datetime.strptime('{:010d}'.format(dattim), '%m%d%y%H%M') 712 else: 713 dt = None 714 return dt 715 716 @staticmethod 717 def _convert_ftime(ftime): 718 """Convert GEMPAK forecast time and type integer.""" 719 if ftime >= 0: 720 iftype = ForecastType(ftime // 100000) 721 iftime = ftime - iftype.value * 100000 722 hours = iftime // 100 723 minutes = iftime - hours * 100 724 out = (iftype.name, timedelta(hours=hours, minutes=minutes)) 725 else: 726 out = None 727 return out 728 729 @staticmethod 730 def _convert_level(level): 731 """Convert levels.""" 732 if isinstance(level, (int, float)) and level >= 0: 733 return level 734 else: 735 return None 736 737 @staticmethod 738 def _convert_vertical_coord(coord): 739 """Convert integer vertical coordinate to name.""" 740 if coord <= 8: 741 return VerticalCoordinates(coord).name.upper() 742 else: 743 return struct.pack('i', coord).decode() 744 745 @staticmethod 746 def _fortran_ishift(i, shift): 747 """Python-friendly bit shifting.""" 748 if shift >= 0: 749 # Shift left and only keep low 32 bits 750 ret = (i << shift) & 0xffffffff 751 752 # If high bit, convert back to negative of two's complement 753 if ret > 0x7fffffff: 754 ret = -(~ret & 0x7fffffff) - 1 755 return ret 756 else: 757 # Shift right the low 32 bits 758 return (i & 0xffffffff) >> -shift 759 760 @staticmethod 761 def _decode_strip(b): 762 """Decode bytes to string and strip whitespace.""" 763 return b.decode().strip() 764 765 @staticmethod 766 def _make_date(dattim): 767 """Make a date object from GEMPAK DATTIM integer.""" 768 return GempakFile._convert_dattim(dattim).date() 769 770 @staticmethod 771 def _make_time(t): 772 """Make a time object from GEMPAK FTIME integer.""" 773 string = '{:04d}'.format(t) 774 return datetime.strptime(string, '%H%M').time() 775 776 def _unpack_real(self, buffer, parameters, length): 777 """Unpack floating point data packed in integers. 778 779 Similar to DP_UNPK subroutine in GEMPAK. 780 """ 781 nparms = len(parameters['name']) 782 mskpat = 0xffffffff 783 784 pwords = (sum(parameters['bits']) - 1) // 32 + 1 785 npack = (length - 1) // pwords + 1 786 unpacked = np.ones(npack * nparms, dtype=np.float32) * self.prod_desc.missing_float 787 if npack * pwords != length: 788 raise ValueError('Unpacking length mismatch.') 789 790 ir = 0 791 ii = 0 792 for _i in range(npack): 793 pdat = buffer[ii:(ii + pwords)] 794 rdat = unpacked[ir:(ir + nparms)] 795 itotal = 0 796 for idata in range(nparms): 797 scale = 10**parameters['scale'][idata] 798 offset = parameters['offset'][idata] 799 bits = parameters['bits'][idata] 800 isbitc = (itotal % 32) + 1 801 iswrdc = (itotal // 32) 802 imissc = self._fortran_ishift(mskpat, bits - 32) 803 804 jbit = bits 805 jsbit = isbitc 806 jshift = 1 - jsbit 807 jsword = iswrdc 808 jword = pdat[jsword] 809 mask = self._fortran_ishift(mskpat, jbit - 32) 810 ifield = self._fortran_ishift(jword, jshift) 811 ifield &= mask 812 813 if (jsbit + jbit - 1) > 32: 814 jword = pdat[jsword + 1] 815 jshift += 32 816 iword = self._fortran_ishift(jword, jshift) 817 iword &= mask 818 ifield |= iword 819 820 if ifield == imissc: 821 rdat[idata] = self.prod_desc.missing_float 822 else: 823 rdat[idata] = (ifield + offset) * scale 824 itotal += bits 825 unpacked[ir:(ir + nparms)] = rdat 826 ir += nparms 827 ii += pwords 828 829 return unpacked.tolist() 830 831 832@exporter.export 833class GempakGrid(GempakFile): 834 """Subclass of GempakFile specific to GEMPAK gridded data.""" 835 836 def __init__(self, file, *args, **kwargs): 837 super().__init__(file) 838 839 datetime_names = ['GDT1', 'GDT2'] 840 level_names = ['GLV1', 'GLV2'] 841 ftime_names = ['GTM1', 'GTM2'] 842 string_names = ['GPM1', 'GPM2', 'GPM3'] 843 844 # Row Headers 845 # Based on GEMPAK source, row/col headers have a 0th element in their Fortran arrays. 846 # This appears to be a flag value to say a header is used or not. 9999 847 # means its in use, otherwise -9999. GEMPAK allows empty grids, etc., but 848 # no real need to keep track of that in Python. 849 self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.row_headers_ptr)) 850 self.row_headers = [] 851 row_headers_info = [(key, 'i') for key in self.row_keys] 852 row_headers_info.extend([(None, None)]) 853 row_headers_fmt = NamedStruct(row_headers_info, self.prefmt, 'RowHeaders') 854 for _ in range(1, self.prod_desc.rows + 1): 855 if self._buffer.read_int(4, self.endian, False) == USED_FLAG: 856 self.row_headers.append(self._buffer.read_struct(row_headers_fmt)) 857 858 # Column Headers 859 self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.column_headers_ptr)) 860 self.column_headers = [] 861 column_headers_info = [(key, 'i', self._convert_level) if key in level_names 862 else (key, 'i', self._convert_vertical_coord) if key == 'GVCD' 863 else (key, 'i', self._convert_dattim) if key in datetime_names 864 else (key, 'i', self._convert_ftime) if key in ftime_names 865 else (key, '4s', self._decode_strip) if key in string_names 866 else (key, 'i') 867 for key in self.column_keys] 868 column_headers_info.extend([(None, None)]) 869 column_headers_fmt = NamedStruct(column_headers_info, self.prefmt, 'ColumnHeaders') 870 for _ in range(1, self.prod_desc.columns + 1): 871 if self._buffer.read_int(4, self.endian, False) == USED_FLAG: 872 self.column_headers.append(self._buffer.read_struct(column_headers_fmt)) 873 874 self._gdinfo = [] 875 for n, head in enumerate(self.column_headers): 876 self._gdinfo.append( 877 Grid( 878 n, 879 head.GTM1[0], 880 head.GDT1 + head.GTM1[1], 881 head.GDT2 + head.GTM2[1] if head.GDT2 and head.GDTM2 else None, 882 head.GPM1 + head.GPM2 + head.GPM3, 883 head.GLV1, 884 head.GLV2, 885 head.GVCD, 886 ) 887 ) 888 889 # Coordinates 890 if self.navigation_block is not None: 891 self._get_crs() 892 self._set_coordinates() 893 894 def gdinfo(self): 895 """Return grid information.""" 896 return self._gdinfo 897 898 def _get_crs(self): 899 """Create CRS from GEMPAK navigation block.""" 900 gemproj = self.navigation_block.projection 901 proj, ptype = GEMPROJ_TO_PROJ[gemproj] 902 radius_sph = 6371200.0 903 904 if ptype == 'azm': 905 lat_0 = self.navigation_block.proj_angle1 906 lon_0 = self.navigation_block.proj_angle2 907 rot = self.navigation_block.proj_angle3 908 if rot != 0: 909 log.warning('Rotated projections currently ' 910 'not supported. Angle3 (%7.2f) ignored.', rot) 911 self.crs = pyproj.CRS.from_dict({'proj': proj, 912 'lat_0': lat_0, 913 'lon_0': lon_0, 914 'R': radius_sph}) 915 elif ptype == 'cyl': 916 if gemproj != 'mcd': 917 lat_0 = self.navigation_block.proj_angle1 918 lon_0 = self.navigation_block.proj_angle2 919 rot = self.navigation_block.proj_angle3 920 if rot != 0: 921 log.warning('Rotated projections currently ' 922 'not supported. Angle3 (%7.2f) ignored.', rot) 923 self.crs = pyproj.CRS.from_dict({'proj': proj, 924 'lat_0': lat_0, 925 'lon_0': lon_0, 926 'R': radius_sph}) 927 else: 928 avglat = (self.navigation_block.upper_right_lat 929 + self.navigation_block.lower_left_lat) * 0.5 930 k_0 = (1 / math.cos(avglat) 931 if self.navigation_block.proj_angle1 == 0 932 else self.navigation_block.proj_angle1 933 ) 934 lon_0 = self.navigation_block.proj_angle2 935 self.crs = pyproj.CRS.from_dict({'proj': proj, 936 'lat_0': avglat, 937 'lon_0': lon_0, 938 'k_0': k_0, 939 'R': radius_sph}) 940 elif ptype == 'con': 941 lat_1 = self.navigation_block.proj_angle1 942 lon_0 = self.navigation_block.proj_angle2 943 lat_2 = self.navigation_block.proj_angle3 944 self.crs = pyproj.CRS.from_dict({'proj': proj, 945 'lon_0': lon_0, 946 'lat_1': lat_1, 947 'lat_2': lat_2, 948 'R': radius_sph}) 949 950 def _set_coordinates(self): 951 """Use GEMPAK navigation block to define coordinates. 952 953 Defines geographic and projection coordinates for the object. 954 """ 955 transform = pyproj.Proj(self.crs) 956 llx, lly = transform(self.navigation_block.lower_left_lon, 957 self.navigation_block.lower_left_lat) 958 urx, ury = transform(self.navigation_block.upper_right_lon, 959 self.navigation_block.upper_right_lat) 960 self.x = np.linspace(llx, urx, self.kx, dtype=np.float32) 961 self.y = np.linspace(lly, ury, self.ky, dtype=np.float32) 962 xx, yy = np.meshgrid(self.x, self.y, copy=False) 963 self.lon, self.lat = transform(xx, yy, inverse=True) 964 self.lon = self.lon.astype(np.float32) 965 self.lat = self.lat.astype(np.float32) 966 967 def _unpack_grid(self, packing_type, part): 968 """Read raw GEMPAK grid integers and unpack into floats.""" 969 if packing_type == PackingType.none: 970 lendat = self.data_header_length - part.header_length - 1 971 972 if lendat > 1: 973 buffer_fmt = '{}{}f'.format(self.prefmt, lendat) 974 buffer = self._buffer.read_struct(struct.Struct(buffer_fmt)) 975 grid = np.zeros(self.ky * self.kx, dtype=np.float32) 976 grid[...] = buffer 977 else: 978 grid = None 979 980 return grid 981 982 elif packing_type == PackingType.nmc: 983 raise NotImplementedError('NMC unpacking not supported.') 984 # integer_meta_fmt = [('bits', 'i'), ('missing_flag', 'i'), ('kxky', 'i')] 985 # real_meta_fmt = [('reference', 'f'), ('scale', 'f')] 986 # self.grid_meta_int = self._buffer.read_struct(NamedStruct(integer_meta_fmt, 987 # self.prefmt, 988 # 'GridMetaInt')) 989 # self.grid_meta_real = self._buffer.read_struct(NamedStruct(real_meta_fmt, 990 # self.prefmt, 991 # 'GridMetaReal')) 992 # grid_start = self._buffer.set_mark() 993 elif packing_type == PackingType.diff: 994 integer_meta_fmt = [('bits', 'i'), ('missing_flag', 'i'), 995 ('kxky', 'i'), ('kx', 'i')] 996 real_meta_fmt = [('reference', 'f'), ('scale', 'f'), ('diffmin', 'f')] 997 self.grid_meta_int = self._buffer.read_struct(NamedStruct(integer_meta_fmt, 998 self.prefmt, 999 'GridMetaInt')) 1000 self.grid_meta_real = self._buffer.read_struct(NamedStruct(real_meta_fmt, 1001 self.prefmt, 1002 'GridMetaReal')) 1003 # grid_start = self._buffer.set_mark() 1004 1005 imiss = 2**self.grid_meta_int.bits - 1 1006 lendat = self.data_header_length - part.header_length - 8 1007 packed_buffer_fmt = '{}{}i'.format(self.prefmt, lendat) 1008 packed_buffer = self._buffer.read_struct(struct.Struct(packed_buffer_fmt)) 1009 grid = np.zeros((self.ky, self.kx), dtype=np.float32) 1010 1011 if lendat > 1: 1012 iword = 0 1013 ibit = 1 1014 first = True 1015 for j in range(self.ky): 1016 line = False 1017 for i in range(self.kx): 1018 jshft = self.grid_meta_int.bits + ibit - 33 1019 idat = self._fortran_ishift(packed_buffer[iword], jshft) 1020 idat &= imiss 1021 1022 if jshft > 0: 1023 jshft -= 32 1024 idat2 = self._fortran_ishift(packed_buffer[iword + 1], jshft) 1025 idat |= idat2 1026 1027 ibit += self.grid_meta_int.bits 1028 if ibit > 32: 1029 ibit -= 32 1030 iword += 1 1031 1032 if (self.grid_meta_int.missing_flag and idat == imiss): 1033 grid[j, i] = self.prod_desc.missing_float 1034 else: 1035 if first: 1036 grid[j, i] = self.grid_meta_real.reference 1037 psav = self.grid_meta_real.reference 1038 plin = self.grid_meta_real.reference 1039 line = True 1040 first = False 1041 else: 1042 if not line: 1043 grid[j, i] = plin + (self.grid_meta_real.diffmin 1044 + idat * self.grid_meta_real.scale) 1045 line = True 1046 plin = grid[j, i] 1047 else: 1048 grid[j, i] = psav + (self.grid_meta_real.diffmin 1049 + idat * self.grid_meta_real.scale) 1050 psav = grid[j, i] 1051 else: 1052 grid = None 1053 1054 return grid 1055 1056 elif packing_type in [PackingType.grib, PackingType.dec]: 1057 integer_meta_fmt = [('bits', 'i'), ('missing_flag', 'i'), ('kxky', 'i')] 1058 real_meta_fmt = [('reference', 'f'), ('scale', 'f')] 1059 self.grid_meta_int = self._buffer.read_struct(NamedStruct(integer_meta_fmt, 1060 self.prefmt, 1061 'GridMetaInt')) 1062 self.grid_meta_real = self._buffer.read_struct(NamedStruct(real_meta_fmt, 1063 self.prefmt, 1064 'GridMetaReal')) 1065 # grid_start = self._buffer.set_mark() 1066 1067 lendat = self.data_header_length - part.header_length - 6 1068 packed_buffer_fmt = '{}{}i'.format(self.prefmt, lendat) 1069 1070 grid = np.zeros(self.grid_meta_int.kxky, dtype=np.float32) 1071 packed_buffer = self._buffer.read_struct(struct.Struct(packed_buffer_fmt)) 1072 if lendat > 1: 1073 imax = 2**self.grid_meta_int.bits - 1 1074 ibit = 1 1075 iword = 0 1076 for cell in range(self.grid_meta_int.kxky): 1077 jshft = self.grid_meta_int.bits + ibit - 33 1078 idat = self._fortran_ishift(packed_buffer[iword], jshft) 1079 idat &= imax 1080 1081 if jshft > 0: 1082 jshft -= 32 1083 idat2 = self._fortran_ishift(packed_buffer[iword + 1], jshft) 1084 idat |= idat2 1085 1086 if (idat == imax) and self.grid_meta_int.missing_flag: 1087 grid[cell] = self.prod_desc.missing_float 1088 else: 1089 grid[cell] = (self.grid_meta_real.reference 1090 + (idat * self.grid_meta_real.scale)) 1091 1092 ibit += self.grid_meta_int.bits 1093 if ibit > 32: 1094 ibit -= 32 1095 iword += 1 1096 else: 1097 grid = None 1098 1099 return grid 1100 elif packing_type == PackingType.grib2: 1101 raise NotImplementedError('GRIB2 unpacking not supported.') 1102 # integer_meta_fmt = [('iuscal', 'i'), ('kx', 'i'), 1103 # ('ky', 'i'), ('iscan_mode', 'i')] 1104 # real_meta_fmt = [('rmsval', 'f')] 1105 # self.grid_meta_int = self._buffer.read_struct(NamedStruct(integer_meta_fmt, 1106 # self.prefmt, 1107 # 'GridMetaInt')) 1108 # self.grid_meta_real = self._buffer.read_struct(NamedStruct(real_meta_fmt, 1109 # self.prefmt, 1110 # 'GridMetaReal')) 1111 # grid_start = self._buffer.set_mark() 1112 else: 1113 raise NotImplementedError('No method for unknown grid packing {}' 1114 .format(packing_type.name)) 1115 1116 def gdxarray(self, parameter=None, date_time=None, coordinate=None, 1117 level=None, date_time2=None, level2=None): 1118 """Select grids and output as list of xarray DataArrays. 1119 1120 Subset the data by parameter values. The default is to not 1121 subset and return the entire dataset. 1122 1123 Parameters 1124 ---------- 1125 parameter : str or array-like of str 1126 Name of GEMPAK parameter. 1127 1128 date_time : datetime or array-like of datetime 1129 Valid datetime of the grid. Alternatively can be 1130 a string with the format YYYYmmddHHMM. 1131 1132 coordinate : str or array-like of str 1133 Vertical coordinate. 1134 1135 level : float or array-like of float 1136 Vertical level. 1137 1138 date_time2 : datetime or array-like of datetime 1139 Secondary valid datetime of the grid. Alternatively can be 1140 a string with the format YYYYmmddHHMM. 1141 1142 level2: float or array_like of float 1143 Secondary vertical level. Typically used for layers. 1144 1145 Returns 1146 ------- 1147 list 1148 List of xarray.DataArray objects for each grid. 1149 """ 1150 if parameter is not None: 1151 if (not isinstance(parameter, Iterable) 1152 or isinstance(parameter, str)): 1153 parameter = [parameter] 1154 parameter = [p.upper() for p in parameter] 1155 1156 if date_time is not None: 1157 if (not isinstance(date_time, Iterable) 1158 or isinstance(date_time, str)): 1159 date_time = [date_time] 1160 for i, dt in enumerate(date_time): 1161 if isinstance(dt, str): 1162 date_time[i] = datetime.strptime(dt, '%Y%m%d%H%M') 1163 1164 if coordinate is not None: 1165 if (not isinstance(coordinate, Iterable) 1166 or isinstance(coordinate, str)): 1167 coordinate = [coordinate] 1168 coordinate = [c.upper() for c in coordinate] 1169 1170 if level is not None and not isinstance(level, Iterable): 1171 level = [level] 1172 1173 if date_time2 is not None: 1174 if (not isinstance(date_time2, Iterable) 1175 or isinstance(date_time2, str)): 1176 date_time2 = [date_time2] 1177 for i, dt in enumerate(date_time2): 1178 if isinstance(dt, str): 1179 date_time2[i] = datetime.strptime(dt, '%Y%m%d%H%M') 1180 1181 if level2 is not None and not isinstance(level2, Iterable): 1182 level2 = [level2] 1183 1184 # Figure out which columns to extract from the file 1185 matched = self._gdinfo.copy() 1186 1187 if parameter is not None: 1188 matched = filter( 1189 lambda grid: grid if grid.PARM in parameter else False, 1190 matched 1191 ) 1192 1193 if date_time is not None: 1194 matched = filter( 1195 lambda grid: grid if grid.DATTIM1 in date_time else False, 1196 matched 1197 ) 1198 1199 if coordinate is not None: 1200 matched = filter( 1201 lambda grid: grid if grid.COORD in coordinate else False, 1202 matched 1203 ) 1204 1205 if level is not None: 1206 matched = filter( 1207 lambda grid: grid if grid.LEVEL1 in level else False, 1208 matched 1209 ) 1210 1211 if date_time2 is not None: 1212 matched = filter( 1213 lambda grid: grid if grid.DATTIM2 in date_time2 else False, 1214 matched 1215 ) 1216 1217 if level2 is not None: 1218 matched = filter( 1219 lambda grid: grid if grid.LEVEL2 in level2 else False, 1220 matched 1221 ) 1222 1223 matched = list(matched) 1224 1225 if len(matched) < 1: 1226 raise KeyError('No grids were matched with given parameters.') 1227 1228 gridno = [g.GRIDNO for g in matched] 1229 1230 grids = [] 1231 irow = 0 # Only one row for grids 1232 for icol, col_head in enumerate(self.column_headers): 1233 if icol not in gridno: 1234 continue 1235 for iprt, part in enumerate(self.parts): 1236 pointer = (self.prod_desc.data_block_ptr 1237 + (irow * self.prod_desc.columns * self.prod_desc.parts) 1238 + (icol * self.prod_desc.parts + iprt)) 1239 self._buffer.jump_to(self._start, _word_to_position(pointer)) 1240 self.data_ptr = self._buffer.read_int(4, self.endian, False) 1241 self._buffer.jump_to(self._start, _word_to_position(self.data_ptr)) 1242 self.data_header_length = self._buffer.read_int(4, self.endian, False) 1243 data_header = self._buffer.set_mark() 1244 self._buffer.jump_to(data_header, 1245 _word_to_position(part.header_length + 1)) 1246 packing_type = PackingType(self._buffer.read_int(4, self.endian, False)) 1247 1248 full_name = col_head.GPM1 + col_head.GPM2 + col_head.GPM3 1249 ftype, ftime = col_head.GTM1 1250 valid = col_head.GDT1 + ftime 1251 gvcord = col_head.GVCD.lower() if col_head.GVCD is not None else 'none' 1252 var = (GVCORD_TO_VAR[full_name] 1253 if full_name in GVCORD_TO_VAR 1254 else full_name.lower() 1255 ) 1256 data = self._unpack_grid(packing_type, part) 1257 if data is not None: 1258 if data.ndim < 2: 1259 data = np.ma.array(data.reshape((self.ky, self.kx)), 1260 mask=data == self.prod_desc.missing_float, 1261 dtype=np.float32) 1262 else: 1263 data = np.ma.array(data, mask=data == self.prod_desc.missing_float, 1264 dtype=np.float32) 1265 1266 xrda = xr.DataArray( 1267 data=data[np.newaxis, np.newaxis, ...], 1268 coords={ 1269 'time': [valid], 1270 gvcord: [col_head.GLV1], 1271 'x': self.x, 1272 'y': self.y, 1273 }, 1274 dims=['time', gvcord, 'y', 'x'], 1275 name=var, 1276 attrs={ 1277 **self.crs.to_cf(), 1278 'grid_type': ftype, 1279 } 1280 ) 1281 grids.append(xrda) 1282 1283 else: 1284 log.warning('Unable to read grid for %s', col_head.GPM1) 1285 return grids 1286 1287 1288@exporter.export 1289class GempakSounding(GempakFile): 1290 """Subclass of GempakFile specific to GEMPAK sounding data.""" 1291 1292 def __init__(self, file, *args, **kwargs): 1293 super().__init__(file) 1294 1295 # Row Headers 1296 self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.row_headers_ptr)) 1297 self.row_headers = [] 1298 row_headers_info = [(key, 'i', self._make_date) if key == 'DATE' 1299 else (key, 'i', self._make_time) if key == 'TIME' 1300 else (key, 'i') 1301 for key in self.row_keys] 1302 row_headers_info.extend([(None, None)]) 1303 row_headers_fmt = NamedStruct(row_headers_info, self.prefmt, 'RowHeaders') 1304 for _ in range(1, self.prod_desc.rows + 1): 1305 if self._buffer.read_int(4, self.endian, False) == USED_FLAG: 1306 self.row_headers.append(self._buffer.read_struct(row_headers_fmt)) 1307 1308 # Column Headers 1309 self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.column_headers_ptr)) 1310 self.column_headers = [] 1311 column_headers_info = [(key, '4s', self._decode_strip) if key == 'STID' 1312 else (key, 'i') if key == 'STNM' 1313 else (key, 'i', lambda x: x / 100) if key == 'SLAT' 1314 else (key, 'i', lambda x: x / 100) if key == 'SLON' 1315 else (key, 'i') if key == 'SELV' 1316 else (key, '4s', self._decode_strip) if key == 'STAT' 1317 else (key, '4s', self._decode_strip) if key == 'COUN' 1318 else (key, '4s', self._decode_strip) if key == 'STD2' 1319 else (key, 'i') 1320 for key in self.column_keys] 1321 column_headers_info.extend([(None, None)]) 1322 column_headers_fmt = NamedStruct(column_headers_info, self.prefmt, 'ColumnHeaders') 1323 for _ in range(1, self.prod_desc.columns + 1): 1324 if self._buffer.read_int(4, self.endian, False) == USED_FLAG: 1325 self.column_headers.append(self._buffer.read_struct(column_headers_fmt)) 1326 1327 self.merged = 'SNDT' in (part.name for part in self.parts) 1328 1329 self._sninfo = [] 1330 for irow, row_head in enumerate(self.row_headers): 1331 for icol, col_head in enumerate(self.column_headers): 1332 pointer = (self.prod_desc.data_block_ptr 1333 + (irow * self.prod_desc.columns * self.prod_desc.parts) 1334 + (icol * self.prod_desc.parts)) 1335 1336 self._buffer.jump_to(self._start, _word_to_position(pointer)) 1337 data_ptr = self._buffer.read_int(4, self.endian, False) 1338 1339 if data_ptr: 1340 self._sninfo.append( 1341 Sounding( 1342 irow, 1343 icol, 1344 datetime.combine(row_head.DATE, row_head.TIME), 1345 col_head.STID, 1346 col_head.STNM, 1347 col_head.SLAT, 1348 col_head.SLON, 1349 col_head.SELV, 1350 col_head.STAT, 1351 col_head.COUN, 1352 ) 1353 ) 1354 1355 def sninfo(self): 1356 """Return sounding information.""" 1357 return self._sninfo 1358 1359 def _unpack_merged(self, sndno): 1360 """Unpack merged sounding data.""" 1361 soundings = [] 1362 for irow, row_head in enumerate(self.row_headers): 1363 for icol, col_head in enumerate(self.column_headers): 1364 if (irow, icol) not in sndno: 1365 continue 1366 sounding = {'STID': col_head.STID, 1367 'STNM': col_head.STNM, 1368 'SLAT': col_head.SLAT, 1369 'SLON': col_head.SLON, 1370 'SELV': col_head.SELV, 1371 'STAT': col_head.STAT, 1372 'COUN': col_head.COUN, 1373 'DATE': row_head.DATE, 1374 'TIME': row_head.TIME, 1375 } 1376 for iprt, part in enumerate(self.parts): 1377 pointer = (self.prod_desc.data_block_ptr 1378 + (irow * self.prod_desc.columns * self.prod_desc.parts) 1379 + (icol * self.prod_desc.parts + iprt)) 1380 self._buffer.jump_to(self._start, _word_to_position(pointer)) 1381 self.data_ptr = self._buffer.read_int(4, self.endian, False) 1382 if not self.data_ptr: 1383 continue 1384 self._buffer.jump_to(self._start, _word_to_position(self.data_ptr)) 1385 self.data_header_length = self._buffer.read_int(4, self.endian, False) 1386 data_header = self._buffer.set_mark() 1387 self._buffer.jump_to(data_header, 1388 _word_to_position(part.header_length + 1)) 1389 lendat = self.data_header_length - part.header_length 1390 1391 fmt_code = { 1392 DataTypes.real: 'f', 1393 DataTypes.realpack: 'i', 1394 DataTypes.character: 's', 1395 }.get(part.data_type) 1396 1397 if fmt_code is None: 1398 raise NotImplementedError('No methods for data type {}' 1399 .format(part.data_type)) 1400 packed_buffer = ( 1401 self._buffer.read_struct( 1402 struct.Struct(f'{self.prefmt}{lendat}{fmt_code}') 1403 ) 1404 ) 1405 1406 parameters = self.parameters[iprt] 1407 nparms = len(parameters['name']) 1408 1409 if part.data_type == DataTypes.realpack: 1410 unpacked = self._unpack_real(packed_buffer, parameters, lendat) 1411 for iprm, param in enumerate(parameters['name']): 1412 sounding[param] = unpacked[iprm::nparms] 1413 else: 1414 for iprm, param in enumerate(parameters['name']): 1415 sounding[param] = np.array( 1416 packed_buffer[iprm::nparms], dtype=np.float32 1417 ) 1418 1419 soundings.append(sounding) 1420 return soundings 1421 1422 def _unpack_unmerged(self, sndno): 1423 """Unpack unmerged sounding data.""" 1424 soundings = [] 1425 for irow, row_head in enumerate(self.row_headers): 1426 for icol, col_head in enumerate(self.column_headers): 1427 if (irow, icol) not in sndno: 1428 continue 1429 sounding = {'STID': col_head.STID, 1430 'STNM': col_head.STNM, 1431 'SLAT': col_head.SLAT, 1432 'SLON': col_head.SLON, 1433 'SELV': col_head.SELV, 1434 'STAT': col_head.STAT, 1435 'COUN': col_head.COUN, 1436 'DATE': row_head.DATE, 1437 'TIME': row_head.TIME, 1438 } 1439 for iprt, part in enumerate(self.parts): 1440 pointer = (self.prod_desc.data_block_ptr 1441 + (irow * self.prod_desc.columns * self.prod_desc.parts) 1442 + (icol * self.prod_desc.parts + iprt)) 1443 self._buffer.jump_to(self._start, _word_to_position(pointer)) 1444 self.data_ptr = self._buffer.read_int(4, self.endian, False) 1445 if not self.data_ptr: 1446 continue 1447 self._buffer.jump_to(self._start, _word_to_position(self.data_ptr)) 1448 self.data_header_length = self._buffer.read_int(4, self.endian, False) 1449 data_header = self._buffer.set_mark() 1450 self._buffer.jump_to(data_header, 1451 _word_to_position(part.header_length + 1)) 1452 lendat = self.data_header_length - part.header_length 1453 1454 fmt_code = { 1455 DataTypes.real: 'f', 1456 DataTypes.realpack: 'i', 1457 DataTypes.character: 's', 1458 }.get(part.data_type) 1459 1460 if fmt_code is None: 1461 raise NotImplementedError('No methods for data type {}' 1462 .format(part.data_type)) 1463 packed_buffer = ( 1464 self._buffer.read_struct( 1465 struct.Struct(f'{self.prefmt}{lendat}{fmt_code}') 1466 ) 1467 ) 1468 1469 parameters = self.parameters[iprt] 1470 nparms = len(parameters['name']) 1471 sounding[part.name] = {} 1472 1473 if part.data_type == DataTypes.realpack: 1474 unpacked = self._unpack_real(packed_buffer, parameters, lendat) 1475 for iprm, param in enumerate(parameters['name']): 1476 sounding[part.name][param] = unpacked[iprm::nparms] 1477 elif part.data_type == DataTypes.character: 1478 for iprm, param in enumerate(parameters['name']): 1479 sounding[part.name][param] = ( 1480 self._decode_strip(packed_buffer[iprm]) 1481 ) 1482 else: 1483 for iprm, param in enumerate(parameters['name']): 1484 sounding[part.name][param] = ( 1485 np.array(packed_buffer[iprm::nparms], dtype=np.float32) 1486 ) 1487 1488 soundings.append(self._merge_sounding(sounding)) 1489 return soundings 1490 1491 def _merge_significant_temps(self, merged, parts, section, pbot): 1492 """Process and merge a significant temperature sections.""" 1493 for isigt, press in enumerate(parts[section]['PRES']): 1494 press = abs(press) 1495 if self.prod_desc.missing_float not in [ 1496 press, 1497 parts[section]['TEMP'][isigt] 1498 ] and press != 0: 1499 if press > pbot: 1500 continue 1501 elif press in merged['PRES']: 1502 ploc = merged['PRES'].index(press) 1503 if merged['TEMP'][ploc] == self.prod_desc.missing_float: 1504 merged['TEMP'][ploc] = parts[section]['TEMP'][isigt] 1505 merged['DWPT'][ploc] = parts[section]['DWPT'][isigt] 1506 else: 1507 size = len(merged['PRES']) 1508 loc = size - bisect.bisect_left(merged['PRES'][::-1], press) 1509 merged['PRES'].insert(loc, press) 1510 merged['TEMP'].insert(loc, parts[section]['TEMP'][isigt]) 1511 merged['DWPT'].insert(loc, parts[section]['DWPT'][isigt]) 1512 merged['DRCT'].insert(loc, self.prod_desc.missing_float) 1513 merged['SPED'].insert(loc, self.prod_desc.missing_float) 1514 merged['HGHT'].insert(loc, self.prod_desc.missing_float) 1515 pbot = press 1516 1517 return pbot 1518 1519 def _merge_tropopause_data(self, merged, parts, section, pbot): 1520 """Process and merge tropopause sections.""" 1521 for itrp, press in enumerate(parts[section]['PRES']): 1522 press = abs(press) 1523 if self.prod_desc.missing_float not in [ 1524 press, 1525 parts[section]['TEMP'][itrp] 1526 ] and press != 0: 1527 if press > pbot: 1528 continue 1529 elif press in merged['PRES']: 1530 ploc = merged['PRES'].index(press) 1531 if merged['TEMP'][ploc] == self.prod_desc.missing_float: 1532 merged['TEMP'][ploc] = parts[section]['TEMP'][itrp] 1533 merged['DWPT'][ploc] = parts[section]['DWPT'][itrp] 1534 if merged['DRCT'][ploc] == self.prod_desc.missing_float: 1535 merged['DRCT'][ploc] = parts[section]['DRCT'][itrp] 1536 merged['SPED'][ploc] = parts[section]['SPED'][itrp] 1537 merged['HGHT'][ploc] = self.prod_desc.missing_float 1538 else: 1539 size = len(merged['PRES']) 1540 loc = size - bisect.bisect_left(merged['PRES'][::-1], press) 1541 merged['PRES'].insert(loc, press) 1542 merged['TEMP'].insert(loc, parts[section]['TEMP'][itrp]) 1543 merged['DWPT'].insert(loc, parts[section]['DWPT'][itrp]) 1544 merged['DRCT'].insert(loc, parts[section]['DRCT'][itrp]) 1545 merged['SPED'].insert(loc, parts[section]['SPED'][itrp]) 1546 merged['HGHT'].insert(loc, self.prod_desc.missing_float) 1547 pbot = press 1548 1549 return pbot 1550 1551 def _merge_mandatory_temps(self, merged, parts, section, qcman, bgl, plast): 1552 """Process and merge mandatory temperature sections.""" 1553 num_levels = len(parts[section]['PRES']) 1554 start_level = { 1555 'TTAA': 1, 1556 'TTCC': 0, 1557 } 1558 for i in range(start_level[section], num_levels): 1559 if (parts[section]['PRES'][i] < plast 1560 and self.prod_desc.missing_float not in [ 1561 parts[section]['PRES'][i], 1562 parts[section]['TEMP'][i], 1563 parts[section]['HGHT'][i] 1564 ]): 1565 for pname, pval in parts[section].items(): 1566 merged[pname].append(pval[i]) 1567 plast = merged['PRES'][-1] 1568 else: 1569 if section == 'TTAA': 1570 if parts[section]['PRES'][i] > merged['PRES'][0]: 1571 bgl += 1 1572 else: 1573 # GEMPAK ignores MAN data with missing TEMP/HGHT and does not 1574 # interpolate for them. 1575 if parts[section]['PRES'][i] != self.prod_desc.missing_float: 1576 qcman.append(parts[section]['PRES'][i]) 1577 1578 return bgl, plast 1579 1580 def _merge_mandatory_winds(self, merged, parts, section, qcman): 1581 """Process and merge manadatory wind sections.""" 1582 for iwind, press in enumerate(parts[section]['PRES']): 1583 if press in merged['PRES'][1:]: 1584 loc = merged['PRES'].index(press) 1585 if merged['DRCT'][loc] == self.prod_desc.missing_float: 1586 merged['DRCT'][loc] = parts[section]['DRCT'][iwind] 1587 merged['SPED'][loc] = parts[section]['SPED'][iwind] 1588 else: 1589 if press not in qcman: 1590 size = len(merged['PRES']) 1591 loc = size - bisect.bisect_left(merged['PRES'][1:][::-1], press) 1592 if loc >= size + 1: 1593 loc = -1 1594 merged['PRES'].insert(loc, press) 1595 merged['TEMP'].insert(loc, self.prod_desc.missing_float) 1596 merged['DWPT'].insert(loc, self.prod_desc.missing_float) 1597 merged['DRCT'].insert(loc, parts[section]['DRCT'][iwind]) 1598 merged['SPED'].insert(loc, parts[section]['SPED'][iwind]) 1599 merged['HGHT'].insert(loc, self.prod_desc.missing_float) 1600 1601 def _merge_winds_pressure(self, merged, parts, section, pbot): 1602 """Process and merge wind sections on pressure surfaces.""" 1603 for ilevel, press in enumerate(parts[section]['PRES']): 1604 press = abs(press) 1605 if self.prod_desc.missing_float not in [ 1606 press, 1607 parts[section]['DRCT'][ilevel], 1608 parts[section]['SPED'][ilevel] 1609 ] and press != 0: 1610 if press > pbot: 1611 continue 1612 elif press in merged['PRES']: 1613 ploc = merged['PRES'].index(press) 1614 if self.prod_desc.missing_float in [ 1615 merged['DRCT'][ploc], 1616 merged['SPED'][ploc] 1617 ]: 1618 merged['DRCT'][ploc] = parts[section]['DRCT'][ilevel] 1619 merged['SPED'][ploc] = parts[section]['SPED'][ilevel] 1620 else: 1621 size = len(merged['PRES']) 1622 loc = size - bisect.bisect_left(merged['PRES'][::-1], press) 1623 merged['PRES'].insert(loc, press) 1624 merged['DRCT'].insert(loc, parts[section]['DRCT'][ilevel]) 1625 merged['SPED'].insert(loc, parts[section]['SPED'][ilevel]) 1626 merged['TEMP'].insert(loc, self.prod_desc.missing_float) 1627 merged['DWPT'].insert(loc, self.prod_desc.missing_float) 1628 merged['HGHT'].insert(loc, self.prod_desc.missing_float) 1629 pbot = press 1630 1631 return pbot 1632 1633 def _merge_winds_height(self, merged, parts, nsgw, nasw, istart): 1634 """Merge wind sections on height surfaces.""" 1635 size = len(merged['HGHT']) 1636 psfc = merged['PRES'][0] 1637 zsfc = merged['HGHT'][0] 1638 1639 if self.prod_desc.missing_float not in [ 1640 psfc, 1641 zsfc 1642 ] and size >= 2: 1643 more = True 1644 zold = merged['HGHT'][0] 1645 znxt = merged['HGHT'][1] 1646 ilev = 1 1647 elif size >= 3: 1648 more = True 1649 zold = merged['HGHT'][1] 1650 znxt = merged['HGHT'][2] 1651 ilev = 2 1652 else: 1653 zold = self.prod_desc.missing_float 1654 znxt = self.prod_desc.missing_float 1655 1656 if self.prod_desc.missing_float in [ 1657 zold, 1658 znxt 1659 ]: 1660 more = False 1661 1662 if istart <= nsgw: 1663 above = False 1664 i = istart 1665 iend = nsgw 1666 else: 1667 above = True 1668 i = 0 1669 iend = nasw 1670 1671 while more and i < iend: 1672 if not above: 1673 hght = parts['PPBB']['HGHT'][i] 1674 drct = parts['PPBB']['DRCT'][i] 1675 sped = parts['PPBB']['SPED'][i] 1676 else: 1677 hght = parts['PPDD']['HGHT'][i] 1678 drct = parts['PPDD']['DRCT'][i] 1679 sped = parts['PPDD']['SPED'][i] 1680 skip = False 1681 1682 if self.prod_desc.missing_float in [ 1683 hght, 1684 drct, 1685 sped 1686 ]: 1687 skip = True 1688 elif abs(zold - hght) < 1: 1689 skip = True 1690 if self.prod_desc.missing_float in [ 1691 merged['DRCT'][ilev - 1], 1692 merged['SPED'][ilev - 1] 1693 ]: 1694 merged['DRCT'][ilev - 1] = drct 1695 merged['SPED'][ilev - 1] = sped 1696 elif hght <= zold: 1697 skip = True 1698 elif hght >= znxt: 1699 while more and hght > znxt: 1700 zold = znxt 1701 ilev += 1 1702 if ilev >= size: 1703 more = False 1704 else: 1705 znxt = merged['HGHT'][ilev] 1706 if znxt == self.prod_desc.missing_float: 1707 more = False 1708 1709 if more and not skip: 1710 if abs(znxt - hght) < 1: 1711 if self.prod_desc.missing_float in [ 1712 merged['DRCT'][ilev - 1], 1713 merged['SPED'][ilev - 1] 1714 ]: 1715 merged['DRCT'][ilev] = drct 1716 merged['SPED'][ilev] = sped 1717 else: 1718 loc = bisect.bisect_left(merged['HGHT'], hght) 1719 merged['HGHT'].insert(loc, hght) 1720 merged['DRCT'].insert(loc, drct) 1721 merged['SPED'].insert(loc, sped) 1722 merged['PRES'].insert(loc, self.prod_desc.missing_float) 1723 merged['TEMP'].insert(loc, self.prod_desc.missing_float) 1724 merged['DWPT'].insert(loc, self.prod_desc.missing_float) 1725 size += 1 1726 ilev += 1 1727 zold = hght 1728 1729 if not above and i == nsgw - 1: 1730 above = True 1731 i = 0 1732 iend = nasw 1733 else: 1734 i += 1 1735 1736 return size 1737 1738 def _merge_sounding(self, parts): 1739 """Merge unmerged sounding data.""" 1740 merged = {'STID': parts['STID'], 1741 'STNM': parts['STNM'], 1742 'SLAT': parts['SLAT'], 1743 'SLON': parts['SLON'], 1744 'SELV': parts['SELV'], 1745 'STAT': parts['STAT'], 1746 'COUN': parts['COUN'], 1747 'DATE': parts['DATE'], 1748 'TIME': parts['TIME'], 1749 'PRES': [], 1750 'HGHT': [], 1751 'TEMP': [], 1752 'DWPT': [], 1753 'DRCT': [], 1754 'SPED': [], 1755 } 1756 1757 # Number of parameter levels 1758 num_man_levels = len(parts['TTAA']['PRES']) if 'TTAA' in parts else 0 1759 num_man_wind_levels = len(parts['PPAA']['PRES']) if 'PPAA' in parts else 0 1760 num_trop_levels = len(parts['TRPA']['PRES']) if 'TRPA' in parts else 0 1761 num_max_wind_levels = len(parts['MXWA']['PRES']) if 'MXWA' in parts else 0 1762 num_sigt_levels = len(parts['TTBB']['PRES']) if 'TTBB' in parts else 0 1763 num_sigw_levels = len(parts['PPBB']['SPED']) if 'PPBB' in parts else 0 1764 num_above_man_levels = len(parts['TTCC']['PRES']) if 'TTCC' in parts else 0 1765 num_above_trop_levels = len(parts['TRPC']['PRES']) if 'TRPC' in parts else 0 1766 num_above_max_wind_levels = len(parts['MXWC']['SPED']) if 'MXWC' in parts else 0 1767 num_above_sigt_levels = len(parts['TTDD']['PRES']) if 'TTDD' in parts else 0 1768 num_above_sigw_levels = len(parts['PPDD']['SPED']) if 'PPDD' in parts else 0 1769 num_above_man_wind_levels = len(parts['PPCC']['SPED']) if 'PPCC' in parts else 0 1770 1771 total_data = (num_man_levels 1772 + num_man_wind_levels 1773 + num_trop_levels 1774 + num_max_wind_levels 1775 + num_sigt_levels 1776 + num_sigw_levels 1777 + num_above_man_levels 1778 + num_above_trop_levels 1779 + num_above_max_wind_levels 1780 + num_above_sigt_levels 1781 + num_above_sigw_levels 1782 + num_above_man_wind_levels 1783 ) 1784 if total_data == 0: 1785 return None 1786 1787 # Check SIG wind vertical coordinate 1788 # For some reason, the pressure data can get put into the 1789 # height array. Perhaps this is just a artifact of Python, 1790 # as GEMPAK itself just uses array indices without any 1791 # names involved. Since the first valid pressure of the 1792 # array will be negative in the case of pressure coordinates, 1793 # we can check for it and place data in the appropriate array. 1794 ppbb_is_z = True 1795 if num_sigw_levels: 1796 if 'PRES' in parts['PPBB']: 1797 ppbb_is_z = False 1798 else: 1799 for z in parts['PPBB']['HGHT']: 1800 if z != self.prod_desc.missing_float and z < 0: 1801 ppbb_is_z = False 1802 parts['PPBB']['PRES'] = parts['PPBB']['HGHT'] 1803 break 1804 1805 ppdd_is_z = True 1806 if num_above_sigw_levels: 1807 if 'PRES' in parts['PPDD']: 1808 ppdd_is_z = False 1809 else: 1810 for z in parts['PPDD']['HGHT']: 1811 if z != self.prod_desc.missing_float and z < 0: 1812 ppdd_is_z = False 1813 parts['PPDD']['PRES'] = parts['PPDD']['HGHT'] 1814 break 1815 1816 # Process surface data 1817 if num_man_levels < 1: 1818 merged['PRES'].append(self.prod_desc.missing_float) 1819 merged['HGHT'].append(self.prod_desc.missing_float) 1820 merged['TEMP'].append(self.prod_desc.missing_float) 1821 merged['DWPT'].append(self.prod_desc.missing_float) 1822 merged['DRCT'].append(self.prod_desc.missing_float) 1823 merged['SPED'].append(self.prod_desc.missing_float) 1824 else: 1825 merged['PRES'].append(parts['TTAA']['PRES'][0]) 1826 merged['HGHT'].append(parts['TTAA']['HGHT'][0]) 1827 merged['TEMP'].append(parts['TTAA']['TEMP'][0]) 1828 merged['DWPT'].append(parts['TTAA']['DWPT'][0]) 1829 merged['DRCT'].append(parts['TTAA']['DRCT'][0]) 1830 merged['SPED'].append(parts['TTAA']['SPED'][0]) 1831 1832 merged['HGHT'][0] = merged['SELV'] 1833 1834 first_man_p = self.prod_desc.missing_float 1835 if num_man_levels >= 1: 1836 for mp, mt, mz in zip(parts['TTAA']['PRES'], 1837 parts['TTAA']['TEMP'], 1838 parts['TTAA']['HGHT']): 1839 if self.prod_desc.missing_float not in [ 1840 mp, 1841 mt, 1842 mz 1843 ]: 1844 first_man_p = mp 1845 break 1846 1847 surface_p = merged['PRES'][0] 1848 if surface_p > 1060: 1849 surface_p = self.prod_desc.missing_float 1850 1851 if (surface_p == self.prod_desc.missing_float 1852 or (surface_p < first_man_p 1853 and surface_p != self.prod_desc.missing_float)): 1854 merged['PRES'][0] = self.prod_desc.missing_float 1855 merged['HGHT'][0] = self.prod_desc.missing_float 1856 merged['TEMP'][0] = self.prod_desc.missing_float 1857 merged['DWPT'][0] = self.prod_desc.missing_float 1858 merged['DRCT'][0] = self.prod_desc.missing_float 1859 merged['SPED'][0] = self.prod_desc.missing_float 1860 1861 if (num_sigt_levels >= 1 1862 and self.prod_desc.missing_float not in [ 1863 parts['TTBB']['PRES'][0], 1864 parts['TTBB']['TEMP'][0] 1865 ]): 1866 first_man_p = merged['PRES'][0] 1867 first_sig_p = parts['TTBB']['PRES'][0] 1868 if (first_man_p == self.prod_desc.missing_float 1869 or np.isclose(first_man_p, first_sig_p)): 1870 merged['PRES'][0] = parts['TTBB']['PRES'][0] 1871 merged['DWPT'][0] = parts['TTBB']['DWPT'][0] 1872 merged['TEMP'][0] = parts['TTBB']['TEMP'][0] 1873 1874 if num_sigw_levels >= 1: 1875 if ppbb_is_z: 1876 if (parts['PPBB']['HGHT'][0] == 0 1877 and parts['PPBB']['DRCT'][0] != self.prod_desc.missing_float): 1878 merged['DRCT'][0] = parts['PPBB']['DRCT'][0] 1879 merged['SPED'][0] = parts['PPBB']['SPED'][0] 1880 else: 1881 if self.prod_desc.missing_float not in [ 1882 parts['PPBB']['PRES'][0], 1883 parts['PPBB']['DRCT'][0] 1884 ]: 1885 first_man_p = merged['PRES'][0] 1886 first_sig_p = abs(parts['PPBB']['PRES'][0]) 1887 if (first_man_p == self.prod_desc.missing_float 1888 or np.isclose(first_man_p, first_sig_p)): 1889 merged['PRES'][0] = abs(parts['PPBB']['PRES'][0]) 1890 merged['DRCT'][0] = parts['PPBB']['DRCT'][0] 1891 merged['SPED'][0] = parts['PPBB']['SPED'][0] 1892 1893 # Merge MAN temperature 1894 bgl = 0 1895 qcman = [] 1896 if num_man_levels >= 2 or num_above_man_levels >= 1: 1897 if merged['PRES'][0] == self.prod_desc.missing_float: 1898 plast = 2000 1899 else: 1900 plast = merged['PRES'][0] 1901 1902 if num_man_levels >= 2: 1903 bgl, plast = self._merge_mandatory_temps(merged, parts, 'TTAA', 1904 qcman, bgl, plast) 1905 1906 if num_above_man_levels >= 1: 1907 bgl, plast = self._merge_mandatory_temps(merged, parts, 'TTCC', 1908 qcman, bgl, plast) 1909 1910 # Merge MAN wind 1911 if num_man_wind_levels >= 1 and num_man_levels >= 1 and len(merged['PRES']) >= 2: 1912 self._merge_mandatory_winds(merged, parts, 'PPAA', qcman) 1913 1914 if num_above_man_wind_levels >= 1 and num_man_levels >= 1 and len(merged['PRES']) >= 2: 1915 self._merge_mandatory_winds(merged, parts, 'PPCC', qcman) 1916 1917 # Merge TROP 1918 if num_trop_levels >= 1 or num_above_trop_levels >= 1: 1919 if merged['PRES'][0] != self.prod_desc.missing_float: 1920 pbot = merged['PRES'][0] 1921 elif len(merged['PRES']) > 1: 1922 pbot = merged['PRES'][1] 1923 if pbot < parts['TRPA']['PRES'][1]: 1924 pbot = 1050 1925 else: 1926 pbot = 1050 1927 1928 if num_trop_levels >= 1: 1929 pbot = self._merge_tropopause_data(merged, parts, 'TRPA', pbot) 1930 1931 if num_above_trop_levels >= 1: 1932 pbot = self._merge_tropopause_data(merged, parts, 'TRPC', pbot) 1933 1934 # Merge SIG temperature 1935 if num_sigt_levels >= 1 or num_above_sigt_levels >= 1: 1936 if merged['PRES'][0] != self.prod_desc.missing_float: 1937 pbot = merged['PRES'][0] 1938 elif len(merged['PRES']) > 1: 1939 pbot = merged['PRES'][1] 1940 if pbot < parts['TTBB']['PRES'][1]: 1941 pbot = 1050 1942 else: 1943 pbot = 1050 1944 1945 if num_sigt_levels >= 1: 1946 pbot = self._merge_significant_temps(merged, parts, 'TTBB', pbot) 1947 1948 if num_above_sigt_levels >= 1: 1949 pbot = self._merge_significant_temps(merged, parts, 'TTDD', pbot) 1950 1951 # Interpolate heights 1952 _interp_moist_height(merged, self.prod_desc.missing_float) 1953 1954 # Merge SIG winds on pressure surfaces 1955 if not ppbb_is_z or not ppdd_is_z: 1956 if num_sigw_levels >= 1 or num_above_sigw_levels >= 1: 1957 if merged['PRES'][0] != self.prod_desc.missing_float: 1958 pbot = merged['PRES'][0] 1959 elif len(merged['PRES']) > 1: 1960 pbot = merged['PRES'][1] 1961 else: 1962 pbot = 0 1963 1964 if num_sigw_levels >= 1 and not ppbb_is_z: 1965 pbot = self._merge_winds_pressure(merged, parts, 'PPBB', pbot) 1966 1967 if num_above_sigw_levels >= 1 and not ppdd_is_z: 1968 pbot = self._merge_winds_pressure(merged, parts, 'PPDD', pbot) 1969 1970 # Merge max winds on pressure surfaces 1971 if num_max_wind_levels >= 1 or num_above_max_wind_levels >= 1: 1972 if merged['PRES'][0] != self.prod_desc.missing_float: 1973 pbot = merged['PRES'][0] 1974 elif len(merged['PRES']) > 1: 1975 pbot = merged['PRES'][1] 1976 else: 1977 pbot = 0 1978 1979 if num_max_wind_levels >= 1: 1980 pbot = self._merge_winds_pressure(merged, parts, 'MXWA', pbot) 1981 1982 if num_above_max_wind_levels >= 1: 1983 pbot = self._merge_winds_pressure(merged, parts, 'MXWC', pbot) 1984 1985 # Interpolate height for SIG/MAX winds 1986 _interp_logp_height(merged, self.prod_desc.missing_float) 1987 1988 # Merge SIG winds on height surfaces 1989 if ppbb_is_z or ppdd_is_z: 1990 nsgw = num_sigw_levels if ppbb_is_z else 0 1991 nasw = num_above_sigw_levels if ppdd_is_z else 0 1992 if (nsgw >= 1 and (parts['PPBB']['HGHT'][0] == 0 1993 or parts['PPBB']['HGHT'][0] == merged['HGHT'][0])): 1994 istart = 1 1995 else: 1996 istart = 0 1997 1998 size = self._merge_winds_height(merged, parts, nsgw, nasw, istart) 1999 2000 # Interpolate missing pressure with height 2001 _interp_logp_pressure(merged, self.prod_desc.missing_float) 2002 2003 # Interpolate missing data 2004 _interp_logp_data(merged, self.prod_desc.missing_float) 2005 2006 # Add below ground MAN data 2007 if merged['PRES'][0] != self.prod_desc.missing_float and bgl > 0: 2008 for ibgl in range(1, num_man_levels): 2009 press = parts['TTAA']['PRES'][ibgl] 2010 if press > merged['PRES'][0]: 2011 loc = size - bisect.bisect_left(merged['PRES'][1:][::-1], press) 2012 merged['PRES'].insert(loc, press) 2013 merged['TEMP'].insert(loc, parts['TTAA']['TEMP'][ibgl]) 2014 merged['DWPT'].insert(loc, parts['TTAA']['DWPT'][ibgl]) 2015 merged['DRCT'].insert(loc, parts['TTAA']['DRCT'][ibgl]) 2016 merged['SPED'].insert(loc, parts['TTAA']['SPED'][ibgl]) 2017 merged['HGHT'].insert(loc, parts['TTAA']['HGHT'][ibgl]) 2018 size += 1 2019 2020 # Add text data, if it is included 2021 if 'TXTA' in parts: 2022 merged['TXTA'] = parts['TXTA']['TEXT'] 2023 if 'TXTB' in parts: 2024 merged['TXTB'] = parts['TXTB']['TEXT'] 2025 if 'TXTC' in parts: 2026 merged['TXTC'] = parts['TXTC']['TEXT'] 2027 if 'TXPB' in parts: 2028 merged['TXPB'] = parts['TXPB']['TEXT'] 2029 2030 return merged 2031 2032 def snxarray(self, station_id=None, station_number=None, 2033 date_time=None, state=None, country=None): 2034 """Select soundings and output as list of xarray Datasets. 2035 2036 Subset the data by parameter values. The default is to not 2037 subset and return the entire dataset. 2038 2039 Parameters 2040 ---------- 2041 station_id : str or array-like of str 2042 Station ID of sounding site. 2043 2044 station_number : int or array-like of int 2045 Station number of sounding site. 2046 2047 date_time : datetime or array-like of datetime 2048 Valid datetime of the grid. Alternatively can be 2049 a string with the format YYYYmmddHHMM. 2050 2051 state : str or array-like of str 2052 State where sounding site is located. 2053 2054 country : str or array-like of str 2055 Country where sounding site is located. 2056 2057 Returns 2058 ------- 2059 list 2060 List of xarray.Dataset objects for each sounding. 2061 """ 2062 if station_id is not None: 2063 if (not isinstance(station_id, Iterable) 2064 or isinstance(station_id, str)): 2065 station_id = [station_id] 2066 station_id = [c.upper() for c in station_id] 2067 2068 if station_number is not None: 2069 if not isinstance(station_number, Iterable): 2070 station_number = [station_number] 2071 station_number = [int(sn) for sn in station_number] 2072 2073 if date_time is not None: 2074 if (not isinstance(date_time, Iterable) 2075 or isinstance(date_time, str)): 2076 date_time = [date_time] 2077 for i, dt in enumerate(date_time): 2078 if isinstance(dt, str): 2079 date_time[i] = datetime.strptime(dt, '%Y%m%d%H%M') 2080 2081 if (state is not None 2082 and (not isinstance(state, Iterable) 2083 or isinstance(state, str))): 2084 state = [state] 2085 state = [s.upper() for s in state] 2086 2087 if (country is not None 2088 and (not isinstance(country, Iterable) 2089 or isinstance(country, str))): 2090 country = [country] 2091 country = [c.upper() for c in country] 2092 2093 # Figure out which columns to extract from the file 2094 matched = self._sninfo.copy() 2095 2096 if station_id is not None: 2097 matched = filter( 2098 lambda snd: snd if snd.ID in station_id else False, 2099 matched 2100 ) 2101 2102 if station_number is not None: 2103 matched = filter( 2104 lambda snd: snd if snd.NUMBER in station_number else False, 2105 matched 2106 ) 2107 2108 if date_time is not None: 2109 matched = filter( 2110 lambda snd: snd if snd.DATTIM in date_time else False, 2111 matched 2112 ) 2113 2114 if state is not None: 2115 matched = filter( 2116 lambda snd: snd if snd.STATE in state else False, 2117 matched 2118 ) 2119 2120 if country is not None: 2121 matched = filter( 2122 lambda snd: snd if snd.COUNTRY in country else False, 2123 matched 2124 ) 2125 2126 matched = list(matched) 2127 2128 if len(matched) < 1: 2129 raise KeyError('No stations were matched with given parameters.') 2130 2131 sndno = [(s.DTNO, s.SNDNO) for s in matched] 2132 2133 if self.merged: 2134 data = self._unpack_merged(sndno) 2135 else: 2136 data = self._unpack_unmerged(sndno) 2137 2138 soundings = [] 2139 for snd in data: 2140 if snd is None or 'PRES' not in snd: 2141 continue 2142 station_pressure = snd['PRES'][0] 2143 radat_text = {} 2144 attrs = { 2145 'station_id': snd.pop('STID'), 2146 'station_number': snd.pop('STNM'), 2147 'lat': snd.pop('SLAT'), 2148 'lon': snd.pop('SLON'), 2149 'elevation': snd.pop('SELV'), 2150 'station_pressure': station_pressure, 2151 'state': snd.pop('STAT'), 2152 'country': snd.pop('COUN'), 2153 } 2154 2155 if 'TXTA' in snd: 2156 radat_text['txta'] = snd.pop('TXTA') 2157 if 'TXTB' in snd: 2158 radat_text['txtb'] = snd.pop('TXTB') 2159 if 'TXTC' in snd: 2160 radat_text['txtc'] = snd.pop('TXTC') 2161 if 'TXPB' in snd: 2162 radat_text['txpb'] = snd.pop('TXPB') 2163 if radat_text: 2164 attrs['RADAT'] = radat_text 2165 2166 dt = datetime.combine(snd.pop('DATE'), snd.pop('TIME')) 2167 press = np.array(snd.pop('PRES')) 2168 2169 var = {} 2170 for param, values in snd.items(): 2171 values = np.array(values)[np.newaxis, ...] 2172 maskval = np.ma.array(values, mask=values == self.prod_desc.missing_float, 2173 dtype=np.float32) 2174 var[param.lower()] = (['time', 'pressure'], maskval) 2175 2176 xrds = xr.Dataset(var, 2177 coords={'time': np.atleast_1d(dt), 'pressure': press}, 2178 attrs=attrs) 2179 2180 # Sort to fix GEMPAK surface data at first level 2181 xrds = xrds.sortby('pressure', ascending=False) 2182 2183 soundings.append(xrds) 2184 return soundings 2185 2186 2187@exporter.export 2188class GempakSurface(GempakFile): 2189 """Subclass of GempakFile specific to GEMPAK surface data.""" 2190 2191 def __init__(self, file, *args, **kwargs): 2192 super().__init__(file) 2193 2194 # Row Headers 2195 self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.row_headers_ptr)) 2196 self.row_headers = [] 2197 row_headers_info = self._key_types(self.row_keys) 2198 row_headers_info.extend([(None, None)]) 2199 row_headers_fmt = NamedStruct(row_headers_info, self.prefmt, 'RowHeaders') 2200 for _ in range(1, self.prod_desc.rows + 1): 2201 if self._buffer.read_int(4, self.endian, False) == USED_FLAG: 2202 self.row_headers.append(self._buffer.read_struct(row_headers_fmt)) 2203 2204 # Column Headers 2205 self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.column_headers_ptr)) 2206 self.column_headers = [] 2207 column_headers_info = self._key_types(self.column_keys) 2208 column_headers_info.extend([(None, None)]) 2209 column_headers_fmt = NamedStruct(column_headers_info, self.prefmt, 'ColumnHeaders') 2210 for _ in range(1, self.prod_desc.columns + 1): 2211 if self._buffer.read_int(4, self.endian, False) == USED_FLAG: 2212 self.column_headers.append(self._buffer.read_struct(column_headers_fmt)) 2213 2214 self._get_surface_type() 2215 2216 self._sfinfo = [] 2217 if self.surface_type == 'standard': 2218 for irow, row_head in enumerate(self.row_headers): 2219 for icol, col_head in enumerate(self.column_headers): 2220 pointer = (self.prod_desc.data_block_ptr 2221 + (irow * self.prod_desc.columns * self.prod_desc.parts) 2222 + (icol * self.prod_desc.parts)) 2223 2224 self._buffer.jump_to(self._start, _word_to_position(pointer)) 2225 data_ptr = self._buffer.read_int(4, self.endian, False) 2226 2227 if data_ptr: 2228 self._sfinfo.append( 2229 Surface( 2230 irow, 2231 icol, 2232 datetime.combine(row_head.DATE, row_head.TIME), 2233 col_head.STID + col_head.STD2, 2234 col_head.STNM, 2235 col_head.SLAT, 2236 col_head.SLON, 2237 col_head.SELV, 2238 col_head.STAT, 2239 col_head.COUN, 2240 ) 2241 ) 2242 elif self.surface_type == 'ship': 2243 irow = 0 2244 for icol, col_head in enumerate(self.column_headers): 2245 pointer = (self.prod_desc.data_block_ptr 2246 + (irow * self.prod_desc.columns * self.prod_desc.parts) 2247 + (icol * self.prod_desc.parts)) 2248 2249 self._buffer.jump_to(self._start, _word_to_position(pointer)) 2250 data_ptr = self._buffer.read_int(4, self.endian, False) 2251 2252 if data_ptr: 2253 self._sfinfo.append( 2254 Surface( 2255 irow, 2256 icol, 2257 datetime.combine(col_head.DATE, col_head.TIME), 2258 col_head.STID + col_head.STD2, 2259 col_head.STNM, 2260 col_head.SLAT, 2261 col_head.SLON, 2262 col_head.SELV, 2263 col_head.STAT, 2264 col_head.COUN, 2265 ) 2266 ) 2267 elif self.surface_type == 'climate': 2268 for icol, col_head in enumerate(self.column_headers): 2269 for irow, row_head in enumerate(self.row_headers): 2270 pointer = (self.prod_desc.data_block_ptr 2271 + (irow * self.prod_desc.columns * self.prod_desc.parts) 2272 + (icol * self.prod_desc.parts)) 2273 2274 self._buffer.jump_to(self._start, _word_to_position(pointer)) 2275 data_ptr = self._buffer.read_int(4, self.endian, False) 2276 2277 if data_ptr: 2278 self._sfinfo.append( 2279 Surface( 2280 irow, 2281 icol, 2282 datetime.combine(col_head.DATE, col_head.TIME), 2283 row_head.STID + row_head.STD2, 2284 row_head.STNM, 2285 row_head.SLAT, 2286 row_head.SLON, 2287 row_head.SELV, 2288 row_head.STAT, 2289 row_head.COUN, 2290 ) 2291 ) 2292 else: 2293 raise TypeError('Unknown surface type {}'.format(self.surface_type)) 2294 2295 def sfinfo(self): 2296 """Return station information.""" 2297 return self._sfinfo 2298 2299 def _get_surface_type(self): 2300 """Determine type of surface file.""" 2301 if len(self.row_headers) == 1: 2302 self.surface_type = 'ship' 2303 elif 'DATE' in self.row_keys: 2304 self.surface_type = 'standard' 2305 elif 'DATE' in self.column_keys: 2306 self.surface_type = 'climate' 2307 else: 2308 raise TypeError('Unknown surface data type') 2309 2310 def _key_types(self, keys): 2311 """Determine header information from a set of keys.""" 2312 header_info = [(key, '4s', self._decode_strip) if key == 'STID' 2313 else (key, 'i') if key == 'STNM' 2314 else (key, 'i', lambda x: x / 100) if key == 'SLAT' 2315 else (key, 'i', lambda x: x / 100) if key == 'SLON' 2316 else (key, 'i') if key == 'SELV' 2317 else (key, '4s', self._decode_strip) if key == 'STAT' 2318 else (key, '4s', self._decode_strip) if key == 'COUN' 2319 else (key, '4s', self._decode_strip) if key == 'STD2' 2320 else (key, 'i', self._make_date) if key == 'DATE' 2321 else (key, 'i', self._make_time) if key == 'TIME' 2322 else (key, 'i') 2323 for key in keys] 2324 2325 return header_info 2326 2327 def _unpack_climate(self, sfcno): 2328 """Unpack a climate surface data file.""" 2329 stations = [] 2330 for icol, col_head in enumerate(self.column_headers): 2331 for irow, row_head in enumerate(self.row_headers): 2332 if (irow, icol) not in sfcno: 2333 continue 2334 station = {'STID': row_head.STID, 2335 'STNM': row_head.STNM, 2336 'SLAT': row_head.SLAT, 2337 'SLON': row_head.SLON, 2338 'SELV': row_head.SELV, 2339 'STAT': row_head.STAT, 2340 'COUN': row_head.COUN, 2341 'STD2': row_head.STD2, 2342 'SPRI': row_head.SPRI, 2343 'DATE': col_head.DATE, 2344 'TIME': col_head.TIME, 2345 } 2346 for iprt, part in enumerate(self.parts): 2347 pointer = (self.prod_desc.data_block_ptr 2348 + (irow * self.prod_desc.columns * self.prod_desc.parts) 2349 + (icol * self.prod_desc.parts + iprt)) 2350 self._buffer.jump_to(self._start, _word_to_position(pointer)) 2351 self.data_ptr = self._buffer.read_int(4, self.endian, False) 2352 if not self.data_ptr: 2353 continue 2354 self._buffer.jump_to(self._start, _word_to_position(self.data_ptr)) 2355 self.data_header_length = self._buffer.read_int(4, self.endian, False) 2356 data_header = self._buffer.set_mark() 2357 self._buffer.jump_to(data_header, 2358 _word_to_position(part.header_length + 1)) 2359 lendat = self.data_header_length - part.header_length 2360 2361 fmt_code = { 2362 DataTypes.real: 'f', 2363 DataTypes.realpack: 'i', 2364 DataTypes.character: 's', 2365 }.get(part.data_type) 2366 2367 if fmt_code is None: 2368 raise NotImplementedError('No methods for data type {}' 2369 .format(part.data_type)) 2370 packed_buffer = ( 2371 self._buffer.read_struct( 2372 struct.Struct(f'{self.prefmt}{lendat}{fmt_code}') 2373 ) 2374 ) 2375 2376 parameters = self.parameters[iprt] 2377 2378 if part.data_type == DataTypes.realpack: 2379 unpacked = self._unpack_real(packed_buffer, parameters, lendat) 2380 for iprm, param in enumerate(parameters['name']): 2381 station[param] = unpacked[iprm] 2382 elif part.data_type == DataTypes.character: 2383 for iprm, param in enumerate(parameters['name']): 2384 station[param] = self._decode_strip(packed_buffer[iprm]) 2385 else: 2386 for iprm, param in enumerate(parameters['name']): 2387 station[param] = np.array( 2388 packed_buffer[iprm], dtype=np.float32 2389 ) 2390 2391 stations.append(station) 2392 return stations 2393 2394 def _unpack_ship(self, sfcno): 2395 """Unpack ship (moving observation) surface data file.""" 2396 stations = [] 2397 irow = 0 2398 for icol, col_head in enumerate(self.column_headers): 2399 if (irow, icol) not in sfcno: 2400 continue 2401 station = {'STID': col_head.STID, 2402 'STNM': col_head.STNM, 2403 'SLAT': col_head.SLAT, 2404 'SLON': col_head.SLON, 2405 'SELV': col_head.SELV, 2406 'STAT': col_head.STAT, 2407 'COUN': col_head.COUN, 2408 'STD2': col_head.STD2, 2409 'SPRI': col_head.SPRI, 2410 'DATE': col_head.DATE, 2411 'TIME': col_head.TIME, 2412 } 2413 for iprt, part in enumerate(self.parts): 2414 pointer = (self.prod_desc.data_block_ptr 2415 + (irow * self.prod_desc.columns * self.prod_desc.parts) 2416 + (icol * self.prod_desc.parts + iprt)) 2417 self._buffer.jump_to(self._start, _word_to_position(pointer)) 2418 self.data_ptr = self._buffer.read_int(4, self.endian, False) 2419 if not self.data_ptr: 2420 continue 2421 self._buffer.jump_to(self._start, _word_to_position(self.data_ptr)) 2422 self.data_header_length = self._buffer.read_int(4, self.endian, False) 2423 data_header = self._buffer.set_mark() 2424 self._buffer.jump_to(data_header, 2425 _word_to_position(part.header_length + 1)) 2426 lendat = self.data_header_length - part.header_length 2427 2428 fmt_code = { 2429 DataTypes.real: 'f', 2430 DataTypes.realpack: 'i', 2431 DataTypes.character: 's', 2432 }.get(part.data_type) 2433 2434 if fmt_code is None: 2435 raise NotImplementedError('No methods for data type {}' 2436 .format(part.data_type)) 2437 packed_buffer = ( 2438 self._buffer.read_struct( 2439 struct.Struct(f'{self.prefmt}{lendat}{fmt_code}') 2440 ) 2441 ) 2442 2443 parameters = self.parameters[iprt] 2444 2445 if part.data_type == DataTypes.realpack: 2446 unpacked = self._unpack_real(packed_buffer, parameters, lendat) 2447 for iprm, param in enumerate(parameters['name']): 2448 station[param] = unpacked[iprm] 2449 elif part.data_type == DataTypes.character: 2450 for iprm, param in enumerate(parameters['name']): 2451 station[param] = self._decode_strip(packed_buffer[iprm]) 2452 else: 2453 for iprm, param in enumerate(parameters['name']): 2454 station[param] = np.array( 2455 packed_buffer[iprm], dtype=np.float32 2456 ) 2457 2458 stations.append(station) 2459 return stations 2460 2461 def _unpack_standard(self, sfcno): 2462 """Unpack a standard surface data file.""" 2463 stations = [] 2464 for irow, row_head in enumerate(self.row_headers): 2465 for icol, col_head in enumerate(self.column_headers): 2466 if (irow, icol) not in sfcno: 2467 continue 2468 station = {'STID': col_head.STID, 2469 'STNM': col_head.STNM, 2470 'SLAT': col_head.SLAT, 2471 'SLON': col_head.SLON, 2472 'SELV': col_head.SELV, 2473 'STAT': col_head.STAT, 2474 'COUN': col_head.COUN, 2475 'STD2': col_head.STD2, 2476 'SPRI': col_head.SPRI, 2477 'DATE': row_head.DATE, 2478 'TIME': row_head.TIME, 2479 } 2480 for iprt, part in enumerate(self.parts): 2481 pointer = (self.prod_desc.data_block_ptr 2482 + (irow * self.prod_desc.columns * self.prod_desc.parts) 2483 + (icol * self.prod_desc.parts + iprt)) 2484 self._buffer.jump_to(self._start, _word_to_position(pointer)) 2485 self.data_ptr = self._buffer.read_int(4, self.endian, False) 2486 if not self.data_ptr: 2487 continue 2488 self._buffer.jump_to(self._start, _word_to_position(self.data_ptr)) 2489 self.data_header_length = self._buffer.read_int(4, self.endian, False) 2490 data_header = self._buffer.set_mark() 2491 self._buffer.jump_to(data_header, 2492 _word_to_position(part.header_length + 1)) 2493 lendat = self.data_header_length - part.header_length 2494 2495 fmt_code = { 2496 DataTypes.real: 'f', 2497 DataTypes.realpack: 'i', 2498 DataTypes.character: 's', 2499 }.get(part.data_type) 2500 2501 if fmt_code is None: 2502 raise NotImplementedError('No methods for data type {}' 2503 .format(part.data_type)) 2504 packed_buffer = ( 2505 self._buffer.read_struct( 2506 struct.Struct(f'{self.prefmt}{lendat}{fmt_code}') 2507 ) 2508 ) 2509 2510 parameters = self.parameters[iprt] 2511 2512 if part.data_type == DataTypes.realpack: 2513 unpacked = self._unpack_real(packed_buffer, parameters, lendat) 2514 for iprm, param in enumerate(parameters['name']): 2515 station[param] = unpacked[iprm] 2516 elif part.data_type == DataTypes.character: 2517 for iprm, param in enumerate(parameters['name']): 2518 station[param] = self._decode_strip(packed_buffer[iprm]) 2519 else: 2520 for iprm, param in enumerate(parameters['name']): 2521 station[param] = packed_buffer[iprm] 2522 2523 stations.append(station) 2524 return stations 2525 2526 def nearest_time(self, date_time, station_id=None, station_number=None): 2527 """Get nearest observation to given time for selected stations. 2528 2529 Parameters 2530 ---------- 2531 date_time : datetime or array-like of datetime 2532 Valid/observed datetime of the surface station. Alternatively 2533 object or a string with the format YYYYmmddHHMM. 2534 2535 station_id : str or array-like of str 2536 Station ID of the surface station. 2537 2538 station_number : int or array-like of int 2539 Station number of the surface station. 2540 2541 Returns 2542 ------- 2543 list 2544 List of dicts/JSONs for each surface station. 2545 2546 Notes 2547 ----- 2548 One of either station_id or station_number must be used. If both 2549 are present, station_id will take precedence. 2550 """ 2551 if isinstance(date_time, str): 2552 date_time = datetime.strptime(date_time, '%Y%m%d%H%M') 2553 2554 if station_id is None and station_number is None: 2555 raise ValueError('Must have either station_id or station_number') 2556 2557 if station_id is not None and station_number is not None: 2558 station_number = None 2559 2560 if (station_id is not None 2561 and (not isinstance(station_id, Iterable) 2562 or isinstance(station_id, str))): 2563 station_id = [station_id] 2564 station_id = [c.upper() for c in station_id] 2565 2566 if station_number is not None and not isinstance(station_number, Iterable): 2567 station_number = [station_number] 2568 station_number = [int(sn) for sn in station_number] 2569 2570 time_matched = [] 2571 if station_id: 2572 for stn in station_id: 2573 matched = self.sfjson(station_id=stn) 2574 2575 nearest = min( 2576 matched, 2577 key=lambda d: abs(d['properties']['date_time'] - date_time) 2578 ) 2579 2580 time_matched.append(nearest) 2581 2582 if station_number: 2583 for stn in station_id: 2584 matched = self.sfjson(station_number=stn) 2585 2586 nearest = min( 2587 matched, 2588 key=lambda d: abs(d['properties']['date_time'] - date_time) 2589 ) 2590 2591 time_matched.append(nearest) 2592 2593 return time_matched 2594 2595 def sfjson(self, station_id=None, station_number=None, 2596 date_time=None, state=None, country=None): 2597 """Select surface stations and output as list of JSON objects. 2598 2599 Subset the data by parameter values. The default is to not 2600 subset and return the entire dataset. 2601 2602 Parameters 2603 ---------- 2604 station_id : str or array-like of str 2605 Station ID of the surface station. 2606 2607 station_number : int or array-like of int 2608 Station number of the surface station. 2609 2610 date_time : datetime or array-like of datetime 2611 Valid datetime of the grid. Alternatively can be 2612 a string with the format YYYYmmddHHMM. 2613 2614 state : str or array-like of str 2615 State where surface station is located. 2616 2617 country : str or array-like of str 2618 Country where surface station is located. 2619 2620 Returns 2621 ------- 2622 list 2623 List of dicts/JSONs for each surface station. 2624 """ 2625 if (station_id is not None 2626 and (not isinstance(station_id, Iterable) 2627 or isinstance(station_id, str))): 2628 station_id = [station_id] 2629 station_id = [c.upper() for c in station_id] 2630 2631 if station_number is not None and not isinstance(station_number, Iterable): 2632 station_number = [station_number] 2633 station_number = [int(sn) for sn in station_number] 2634 2635 if date_time is not None: 2636 if (not isinstance(date_time, Iterable) 2637 or isinstance(date_time, str)): 2638 date_time = [date_time] 2639 for i, dt in enumerate(date_time): 2640 if isinstance(dt, str): 2641 date_time[i] = datetime.strptime(dt, '%Y%m%d%H%M') 2642 2643 if (state is not None 2644 and (not isinstance(state, Iterable) 2645 or isinstance(state, str))): 2646 state = [state] 2647 state = [s.upper() for s in state] 2648 2649 if (country is not None 2650 and (not isinstance(country, Iterable) 2651 or isinstance(country, str))): 2652 country = [country] 2653 country = [c.upper() for c in country] 2654 2655 # Figure out which columns to extract from the file 2656 matched = self._sfinfo.copy() 2657 2658 if station_id is not None: 2659 matched = filter( 2660 lambda sfc: sfc if sfc.ID in station_id else False, 2661 matched 2662 ) 2663 2664 if station_number is not None: 2665 matched = filter( 2666 lambda sfc: sfc if sfc.NUMBER in station_number else False, 2667 matched 2668 ) 2669 2670 if date_time is not None: 2671 matched = filter( 2672 lambda sfc: sfc if sfc.DATTIM in date_time else False, 2673 matched 2674 ) 2675 2676 if state is not None: 2677 matched = filter( 2678 lambda sfc: sfc if sfc.STATE in state else False, 2679 matched 2680 ) 2681 2682 if country is not None: 2683 matched = filter( 2684 lambda sfc: sfc if sfc.COUNTRY in country else False, 2685 matched 2686 ) 2687 2688 matched = list(matched) 2689 2690 if len(matched) < 1: 2691 raise KeyError('No stations were matched with given parameters.') 2692 2693 sfcno = [(s.ROW, s.COL) for s in matched] 2694 2695 if self.surface_type == 'standard': 2696 data = self._unpack_standard(sfcno) 2697 elif self.surface_type == 'ship': 2698 data = self._unpack_ship(sfcno) 2699 elif self.surface_type == 'climate': 2700 data = self._unpack_climate(sfcno) 2701 else: 2702 raise ValueError(f'Unknown surface data type: {self.surface_type}') 2703 2704 stnarr = [] 2705 for stn in data: 2706 stnobj = {} 2707 stnobj['properties'] = {} 2708 stnobj['values'] = {} 2709 stnobj['properties']['date_time'] = datetime.combine(stn.pop('DATE'), 2710 stn.pop('TIME')) 2711 stnobj['properties']['station_id'] = stn.pop('STID') + stn.pop('STD2') 2712 stnobj['properties']['station_number'] = stn.pop('STNM') 2713 stnobj['properties']['longitude'] = stn.pop('SLON') 2714 stnobj['properties']['latitude'] = stn.pop('SLAT') 2715 stnobj['properties']['elevation'] = stn.pop('SELV') 2716 stnobj['properties']['state'] = stn.pop('STAT') 2717 stnobj['properties']['country'] = stn.pop('COUN') 2718 stnobj['properties']['priority'] = stn.pop('SPRI') 2719 if stn: 2720 for name, ob in stn.items(): 2721 stnobj['values'][name.lower()] = ob 2722 stnarr.append(stnobj) 2723 2724 return stnarr 2725