1# Copyright (c) 2021 MetPy Developers.
2# Distributed under the terms of the BSD 3-Clause License.
3# SPDX-License-Identifier: BSD-3-Clause
4"""Tools to process GEMPAK-formatted products."""
5
6import bisect
7from collections import namedtuple
8from collections.abc import Iterable
9import contextlib
10from datetime import datetime, timedelta
11from enum import Enum
12from itertools import product
13import logging
14import math
15import struct
16import sys
17
18import numpy as np
19import pyproj
20import xarray as xr
21
22from ._tools import IOBuffer, NamedStruct, open_as_needed
23from .. import constants
24from ..calc import (mixing_ratio_from_specific_humidity, scale_height,
25                    specific_humidity_from_dewpoint, thickness_hydrostatic,
26                    virtual_temperature)
27from ..package_tools import Exporter
28from ..units import units
29
30exporter = Exporter(globals())
31log = logging.getLogger(__name__)
32
33BYTES_PER_WORD = 4
34PARAM_ATTR = [('name', (4, 's')), ('scale', (1, 'i')),
35              ('offset', (1, 'i')), ('bits', (1, 'i'))]
36USED_FLAG = 9999
37UNUSED_FLAG = -9999
38GEMPAK_HEADER = 'GEMPAK DATA MANAGEMENT FILE '
39GEMPROJ_TO_PROJ = {
40    'MER': ('merc', 'cyl'),
41    'NPS': ('stere', 'azm'),
42    'SPS': ('stere', 'azm'),
43    'LCC': ('lcc', 'con'),
44    'SCC': ('lcc', 'con'),
45    'CED': ('eqc', 'cyl'),
46    'MCD': ('eqc', 'cyl'),
47    'NOR': ('ortho', 'azm'),
48    'SOR': ('ortho', 'azm'),
49    'STR': ('stere', 'azm'),
50    'AED': ('aeqd', 'azm'),
51    'ORT': ('ortho', 'azm'),
52    'LEA': ('laea', 'azm'),
53    'GNO': ('gnom', 'azm'),
54    'TVM': ('tmerc', 'obq'),
55    'UTM': ('utm', 'obq'),
56}
57GVCORD_TO_VAR = {
58    'PRES': 'p',
59    'HGHT': 'z',
60    'THTA': 'theta',
61}
62
63
64class FileTypes(Enum):
65    """GEMPAK file type."""
66
67    surface = 1
68    sounding = 2
69    grid = 3
70
71
72class DataTypes(Enum):
73    """Data management library data types."""
74
75    real = 1
76    integer = 2
77    character = 3
78    realpack = 4
79    grid = 5
80
81
82class VerticalCoordinates(Enum):
83    """Vertical coordinates."""
84
85    none = 0
86    pres = 1
87    thta = 2
88    hght = 3
89    sgma = 4
90    dpth = 5
91    hybd = 6
92    pvab = 7
93    pvbl = 8
94
95
96class PackingType(Enum):
97    """GRIB packing type."""
98
99    none = 0
100    grib = 1
101    nmc = 2
102    diff = 3
103    dec = 4
104    grib2 = 5
105
106
107class ForecastType(Enum):
108    """Forecast type."""
109
110    analysis = 0
111    forecast = 1
112    guess = 2
113    initial = 3
114
115
116class DataSource(Enum):
117    """Data source."""
118
119    model = 0
120    airway_surface = 1
121    metar = 2
122    ship = 3
123    raob_buoy = 4
124    synop_raob_vas = 5
125    grid = 6
126    watch_by_county = 7
127    unknown = 99
128    text = 100
129    metar2 = 102
130    ship2 = 103
131    raob_buoy2 = 104
132    synop_raob_vas2 = 105
133
134
135Grid = namedtuple('Grid', [
136    'GRIDNO',
137    'TYPE',
138    'DATTIM1',
139    'DATTIM2',
140    'PARM',
141    'LEVEL1',
142    'LEVEL2',
143    'COORD',
144])
145
146Sounding = namedtuple('Sounding', [
147    'DTNO',
148    'SNDNO',
149    'DATTIM',
150    'ID',
151    'NUMBER',
152    'LAT',
153    'LON',
154    'ELEV',
155    'STATE',
156    'COUNTRY',
157])
158
159Surface = namedtuple('Surface', [
160    'ROW',
161    'COL',
162    'DATTIM',
163    'ID',
164    'NUMBER',
165    'LAT',
166    'LON',
167    'ELEV',
168    'STATE',
169    'COUNTRY',
170])
171
172
173def _data_source(source):
174    """Get data source from stored integer."""
175    try:
176        return DataSource(source)
177    except ValueError:
178        log.warning('Could not interpret data source `%s`. '
179                    'Setting to `Unknown`.', source)
180        return DataSource(99)
181
182
183def _word_to_position(word, bytes_per_word=BYTES_PER_WORD):
184    """Return beginning position of a word in bytes."""
185    return (word * bytes_per_word) - bytes_per_word
186
187
188def _interp_logp_data(sounding, missing=-9999):
189    """Interpolate missing sounding data.
190
191    This function is similar to the MR_MISS subroutine in GEMPAK.
192    """
193    size = len(sounding['PRES'])
194    recipe = [('TEMP', 'DWPT'), ('DRCT', 'SPED'), ('DWPT', None)]
195
196    for var1, var2 in recipe:
197        iabove = 0
198        i = 1
199        more = True
200        while i < (size - 1) and more:
201            if sounding[var1][i] == missing:
202                if iabove <= i:
203                    iabove = i + 1
204                    found = False
205                    while not found:
206                        if sounding[var1][iabove] != missing:
207                            found = True
208                        else:
209                            iabove += 1
210                            if iabove >= size:
211                                found = True
212                                iabove = 0
213                                more = False
214
215                if (var2 is None and iabove != 0
216                   and sounding['PRES'][i - 1] > 100
217                   and sounding['PRES'][iabove] < 100):
218                    iabove = 0
219
220                if iabove != 0:
221                    adata = {}
222                    bdata = {}
223                    for param, val in sounding.items():
224                        if (param in ['PRES', 'TEMP', 'DWPT',
225                                      'DRCT', 'SPED', 'HGHT']):
226                            adata[param] = val[i - 1]
227                            bdata[param] = val[iabove]
228                    vlev = sounding['PRES'][i]
229                    outdata = _interp_parameters(vlev, adata, bdata, missing)
230                    sounding[var1][i] = outdata[var1]
231                    if var2 is not None:
232                        sounding[var2][i] = outdata[var2]
233            i += 1
234
235
236def _interp_logp_height(sounding, missing=-9999):
237    """Interpolate height linearly with respect to log p.
238
239    This function mimics the functionality of the MR_INTZ
240    subroutine in GEMPAK.
241    """
242    size = len(sounding['HGHT'])
243
244    idx = -1
245    maxlev = -1
246    while size + idx != 0:
247        if sounding['HGHT'][idx] != missing:
248            maxlev = size + idx
249            break
250        else:
251            idx -= 1
252
253    pbot = missing
254    for i in range(maxlev):
255        press = sounding['PRES'][i]
256        hght = sounding['HGHT'][i]
257
258        if press == missing:
259            continue
260        elif hght != missing:
261            pbot = press
262            zbot = hght
263            ptop = 2000
264        elif pbot == missing:
265            continue
266        else:
267            ilev = i + 1
268            while press <= ptop:
269                if sounding['HGHT'][ilev] != missing:
270                    ptop = sounding['PRES'][ilev]
271                    ztop = sounding['HGHT'][ilev]
272                else:
273                    ilev += 1
274            sounding['HGHT'][i] = (zbot + (ztop - zbot)
275                                   * (np.log(press / pbot) / np.log(ptop / pbot)))
276
277    if maxlev < size - 1:
278        if maxlev > -1:
279            pb = units.Quantity(sounding['PRES'][maxlev], 'hPa')
280            zb = units.Quantity(sounding['HGHT'][maxlev], 'm')
281            tb = units.Quantity(sounding['TEMP'][maxlev], 'degC')
282            tdb = units.Quantity(sounding['DWPT'][maxlev], 'degC')
283        else:
284            pb = units.Quantity(missing, 'hPa')
285            zb = units.Quantity(missing, 'm')
286            tb = units.Quantity(missing, 'degC')
287            tdb = units.Quantity(missing, 'degC')
288
289        for i in range(maxlev + 1, size):
290            if sounding['HGHT'][i] == missing:
291                tt = units.Quantity(sounding['TEMP'][i], 'degC')
292                tdt = units.Quantity(sounding['DWPT'][i], 'degC')
293                pt = units.Quantity(sounding['PRES'][i], 'hPa')
294
295                pl = units.Quantity([pb.m, pt.m], 'hPa')
296                tl = units.Quantity([tb.m, tt.m], 'degC')
297                tdl = units.Quantity([tdb.m, tdt.m], 'degC')
298
299                if missing in tdl.m:
300                    rl = None
301                else:
302                    ql = specific_humidity_from_dewpoint(pl, tdl)
303                    rl = mixing_ratio_from_specific_humidity(ql)
304
305                if missing not in [*tl.m, zb.m]:
306                    sounding['HGHT'][i] = (zb + thickness_hydrostatic(pl, tl, rl)).m
307                else:
308                    sounding['HGHT'][i] = missing
309
310
311def _interp_logp_pressure(sounding, missing=-9999):
312    """Interpolate pressure from heights.
313
314    This function is similar to the MR_INTP subroutine from GEMPAK.
315    """
316    i = 0
317    ilev = -1
318    klev = -1
319    size = len(sounding['PRES'])
320    pt = missing
321    zt = missing
322    pb = missing
323    zb = missing
324
325    while i < size:
326        p = sounding['PRES'][i]
327        z = sounding['HGHT'][i]
328
329        if p != missing and z != missing:
330            klev = i
331            pt = p
332            zt = z
333
334        if ilev != -1 and klev != -1:
335            for j in range(ilev + 1, klev):
336                z = sounding['HGHT'][j]
337                if missing not in [z, zb, pb]:
338                    sounding['PRES'][j] = (
339                        pb * np.exp((z - zb) * np.log(pt / pb) / (zt - zb))
340                    )
341        ilev = klev
342        pb = pt
343        zb = zt
344        i += 1
345
346
347def _interp_moist_height(sounding, missing=-9999):
348    """Interpolate moist hydrostatic height.
349
350    This function mimics the functionality of the MR_SCMZ
351    subroutine in GEMPAK. This the default behavior when
352    merging observed sounding data.
353    """
354    hlist = units.Quantity(np.ones(len(sounding['PRES'])) * -9999, 'm')
355
356    ilev = -1
357    top = False
358
359    found = False
360    while not found and not top:
361        ilev += 1
362        if ilev >= len(sounding['PRES']):
363            top = True
364        elif missing not in [
365            sounding['PRES'][ilev],
366            sounding['TEMP'][ilev],
367            sounding['HGHT'][ilev]
368        ]:
369            found = True
370
371    while not top:
372        plev = units.Quantity(sounding['PRES'][ilev], 'hPa')
373        pb = units.Quantity(sounding['PRES'][ilev], 'hPa')
374        tb = units.Quantity(sounding['TEMP'][ilev], 'degC')
375        tdb = units.Quantity(sounding['DWPT'][ilev], 'degC')
376        zb = units.Quantity(sounding['HGHT'][ilev], 'm')
377        zlev = units.Quantity(sounding['HGHT'][ilev], 'm')
378        jlev = ilev
379        klev = 0
380        mand = False
381
382        while not mand:
383            jlev += 1
384            if jlev >= len(sounding['PRES']):
385                mand = True
386                top = True
387            else:
388                pt = units.Quantity(sounding['PRES'][jlev], 'hPa')
389                tt = units.Quantity(sounding['TEMP'][jlev], 'degC')
390                tdt = units.Quantity(sounding['DWPT'][jlev], 'degC')
391                zt = units.Quantity(sounding['HGHT'][jlev], 'm')
392                if (zt.m != missing
393                   and tt != missing):
394                    mand = True
395                    klev = jlev
396
397                pl = units.Quantity([pb.m, pt.m], 'hPa')
398                tl = units.Quantity([tb.m, tt.m], 'degC')
399                tdl = units.Quantity([tdb.m, tdt.m], 'degC')
400
401                if missing in tdl.m:
402                    rl = None
403                    tvl = tl
404                else:
405                    ql = specific_humidity_from_dewpoint(pl, tdl)
406                    rl = mixing_ratio_from_specific_humidity(ql)
407                    tvl = virtual_temperature(tl, ql)
408
409                if missing not in [*tl.m, zb.m]:
410                    scale_z = scale_height(*tvl)
411                    znew = zb + thickness_hydrostatic(pl, tl, rl)
412                    tb = tt
413                    tdb = tdt
414                    pb = pt
415                    zb = znew
416                else:
417                    scale_z = units.Quantity(missing, 'm')
418                    znew = units.Quantity(missing, 'm')
419                hlist[jlev] = scale_z
420
421        if klev != 0:
422            s = (zt - zlev) / (znew - zlev)
423            for h in range(ilev + 1, klev + 1):
424                hlist[h] *= s
425
426        hbb = zlev
427        pbb = plev
428        for ii in range(ilev + 1, jlev):
429            p = units.Quantity(sounding['PRES'][ii], 'hPa')
430            scale_z = hlist[ii]
431            if missing not in [scale_z.m, hbb.m,
432                               pbb.m, p.m]:
433                th = ((scale_z * constants.g) / constants.Rd).to('degC')
434                tbar = units.Quantity([th.m, th.m], 'degC')
435                pll = units.Quantity([pbb.m, p.m], 'hPa')
436                z = hbb + thickness_hydrostatic(pll, tbar)
437            else:
438                z = units.Quantity(missing, 'm')
439            sounding['HGHT'][ii] = z.m
440            hbb = z
441            pbb = p
442
443        ilev = klev
444
445
446def _interp_parameters(vlev, adata, bdata, missing=-9999):
447    """General interpolation with respect to log-p.
448
449    See the PC_INTP subroutine in GEMPAK.
450    """
451    pres1 = adata['PRES']
452    pres2 = bdata['PRES']
453    between = (((pres1 < pres2) and (pres1 < vlev)
454               and (vlev < pres2))
455               or ((pres2 < pres1) and (pres2 < vlev)
456               and (vlev < pres1)))
457
458    if not between:
459        raise ValueError('Current pressure does not fall between levels.')
460    elif pres1 <= 0 or pres2 <= 0:
461        raise ValueError('Pressure cannot be negative.')
462
463    outdata = {}
464    rmult = np.log(vlev / pres1) / np.log(pres2 / pres1)
465    outdata['PRES'] = vlev
466    for param, aval in adata.items():
467        bval = bdata[param]
468        if param == 'DRCT':
469            ang1 = aval % 360
470            ang2 = bval % 360
471            if abs(ang1 - ang2) > 180:
472                if ang1 < ang2:
473                    ang1 += 360
474                else:
475                    ang2 += 360
476            ang = ang1 + (ang2 - ang1) * rmult
477            outdata[param] = ang % 360
478        else:
479            outdata[param] = aval + (bval - aval) * rmult
480
481        if missing in [aval, bval]:
482            outdata[param] = missing
483
484    return outdata
485
486
487class GempakFile():
488    """Base class for GEMPAK files.
489
490    Reads ubiquitous GEMPAK file headers (i.e., the data management portion of
491    each file).
492    """
493
494    prod_desc_fmt = [('version', 'i'), ('file_headers', 'i'),
495                     ('file_keys_ptr', 'i'), ('rows', 'i'),
496                     ('row_keys', 'i'), ('row_keys_ptr', 'i'),
497                     ('row_headers_ptr', 'i'), ('columns', 'i'),
498                     ('column_keys', 'i'), ('column_keys_ptr', 'i'),
499                     ('column_headers_ptr', 'i'), ('parts', 'i'),
500                     ('parts_ptr', 'i'), ('data_mgmt_ptr', 'i'),
501                     ('data_mgmt_length', 'i'), ('data_block_ptr', 'i'),
502                     ('file_type', 'i', FileTypes),
503                     ('data_source', 'i', _data_source),
504                     ('machine_type', 'i'), ('missing_int', 'i'),
505                     (None, '12x'), ('missing_float', 'f')]
506
507    grid_nav_fmt = [('grid_definition_type', 'f'),
508                    ('projection', '3sx', bytes.decode),
509                    ('left_grid_number', 'f'), ('bottom_grid_number', 'f'),
510                    ('right_grid_number', 'f'), ('top_grid_number', 'f'),
511                    ('lower_left_lat', 'f'), ('lower_left_lon', 'f'),
512                    ('upper_right_lat', 'f'), ('upper_right_lon', 'f'),
513                    ('proj_angle1', 'f'), ('proj_angle2', 'f'),
514                    ('proj_angle3', 'f'), (None, '972x')]
515
516    grid_anl_fmt1 = [('analysis_type', 'f'), ('delta_n', 'f'),
517                     ('delta_x', 'f'), ('delta_y', 'f'),
518                     (None, '4x'), ('garea_llcr_lat', 'f'),
519                     ('garea_llcr_lon', 'f'), ('garea_urcr_lat', 'f'),
520                     ('garea_urcr_lon', 'f'), ('extarea_llcr_lat', 'f'),
521                     ('extarea_llcr_lon', 'f'), ('extarea_urcr_lat', 'f'),
522                     ('extarea_urcr_lon', 'f'), ('datarea_llcr_lat', 'f'),
523                     ('datarea_llcr_lon', 'f'), ('datarea_urcr_lat', 'f'),
524                     ('datarea_urcrn_lon', 'f'), (None, '444x')]
525
526    grid_anl_fmt2 = [('analysis_type', 'f'), ('delta_n', 'f'),
527                     ('grid_ext_left', 'f'), ('grid_ext_down', 'f'),
528                     ('grid_ext_right', 'f'), ('grid_ext_up', 'f'),
529                     ('garea_llcr_lat', 'f'), ('garea_llcr_lon', 'f'),
530                     ('garea_urcr_lat', 'f'), ('garea_urcr_lon', 'f'),
531                     ('extarea_llcr_lat', 'f'), ('extarea_llcr_lon', 'f'),
532                     ('extarea_urcr_lat', 'f'), ('extarea_urcr_lon', 'f'),
533                     ('datarea_llcr_lat', 'f'), ('datarea_llcr_lon', 'f'),
534                     ('datarea_urcr_lat', 'f'), ('datarea_urcrn_lon', 'f'),
535                     (None, '440x')]
536
537    data_management_fmt = ([('next_free_word', 'i'), ('max_free_pairs', 'i'),
538                           ('actual_free_pairs', 'i'), ('last_word', 'i')]
539                           + [('free_word{:d}'.format(n), 'i') for n in range(1, 29)])
540
541    def __init__(self, file):
542        """Instantiate GempakFile object from file."""
543        fobj = open_as_needed(file)
544
545        with contextlib.closing(fobj):
546            self._buffer = IOBuffer.fromfile(fobj)
547
548        # Save file start position as pointers use this as reference
549        self._start = self._buffer.set_mark()
550
551        # Process the main GEMPAK header to verify file format
552        self._process_gempak_header()
553        meta = self._buffer.set_mark()
554
555        # # Check for byte swapping
556        self._swap_bytes(bytes(self._buffer.read_binary(4)))
557        self._buffer.jump_to(meta)
558
559        # Process main metadata header
560        self.prod_desc = self._buffer.read_struct(NamedStruct(self.prod_desc_fmt,
561                                                              self.prefmt,
562                                                              'ProductDescription'))
563
564        # File Keys
565        # Surface and upper-air files will not have the file headers, so we need to check.
566        if self.prod_desc.file_headers > 0:
567            # This would grab any file headers, but NAVB and ANLB are the only ones used.
568            fkey_prod = product(['header_name', 'header_length', 'header_type'],
569                                range(1, self.prod_desc.file_headers + 1))
570            fkey_names = ['{}{}'.format(*x) for x in fkey_prod]
571            fkey_info = list(zip(fkey_names, np.repeat(('4s', 'i', 'i'),
572                                                       self.prod_desc.file_headers)))
573            self.file_keys_format = NamedStruct(fkey_info, self.prefmt, 'FileKeys')
574
575            self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.file_keys_ptr))
576            self.file_keys = self._buffer.read_struct(self.file_keys_format)
577
578            # file_key_blocks = self._buffer.set_mark()
579            # Navigation Block
580            navb_size = self._buffer.read_int(4, self.endian, False)
581
582            nav_stuct = NamedStruct(self.grid_nav_fmt,
583                                    self.prefmt,
584                                    'NavigationBlock')
585
586            if navb_size != nav_stuct.size // BYTES_PER_WORD:
587                raise ValueError('Navigation block size does not match GEMPAK specification')
588            else:
589                self.navigation_block = (
590                    self._buffer.read_struct(nav_stuct)
591                )
592            self.kx = int(self.navigation_block.right_grid_number)
593            self.ky = int(self.navigation_block.top_grid_number)
594
595            # Analysis Block
596            anlb_size = self._buffer.read_int(4, self.endian, False)
597            anlb_start = self._buffer.set_mark()
598            anlb1_struct = NamedStruct(self.grid_anl_fmt1,
599                                       self.prefmt,
600                                       'AnalysisBlock')
601            anlb2_struct = NamedStruct(self.grid_anl_fmt2,
602                                       self.prefmt,
603                                       'AnalysisBlock')
604
605            if anlb_size not in [anlb1_struct.size // BYTES_PER_WORD,
606                                 anlb2_struct.size // BYTES_PER_WORD]:
607                raise ValueError('Analysis block size does not match GEMPAK specification')
608            else:
609                anlb_type = self._buffer.read_struct(struct.Struct(self.prefmt + 'f'))[0]
610                self._buffer.jump_to(anlb_start)
611                if anlb_type == 1:
612                    self.analysis_block = (
613                        self._buffer.read_struct(anlb1_struct)
614                    )
615                elif anlb_type == 2:
616                    self.analysis_block = (
617                        self._buffer.read_struct(anlb2_struct)
618                    )
619                else:
620                    self.analysis_block = None
621        else:
622            self.analysis_block = None
623            self.navigation_block = None
624
625        # Data Management
626        self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.data_mgmt_ptr))
627        self.data_management = self._buffer.read_struct(NamedStruct(self.data_management_fmt,
628                                                                    self.prefmt,
629                                                                    'DataManagement'))
630
631        # Row Keys
632        self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.row_keys_ptr))
633        row_key_info = [('row_key{:d}'.format(n), '4s', self._decode_strip)
634                        for n in range(1, self.prod_desc.row_keys + 1)]
635        row_key_info.extend([(None, None)])
636        row_keys_fmt = NamedStruct(row_key_info, self.prefmt, 'RowKeys')
637        self.row_keys = self._buffer.read_struct(row_keys_fmt)
638
639        # Column Keys
640        self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.column_keys_ptr))
641        column_key_info = [('column_key{:d}'.format(n), '4s', self._decode_strip)
642                           for n in range(1, self.prod_desc.column_keys + 1)]
643        column_key_info.extend([(None, None)])
644        column_keys_fmt = NamedStruct(column_key_info, self.prefmt, 'ColumnKeys')
645        self.column_keys = self._buffer.read_struct(column_keys_fmt)
646
647        # Parts
648        self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.parts_ptr))
649        # parts = self._buffer.set_mark()
650        self.parts = []
651        parts_info = [('name', '4s', self._decode_strip),
652                      (None, '{:d}x'.format((self.prod_desc.parts - 1) * BYTES_PER_WORD)),
653                      ('header_length', 'i'),
654                      (None, '{:d}x'.format((self.prod_desc.parts - 1) * BYTES_PER_WORD)),
655                      ('data_type', 'i', DataTypes),
656                      (None, '{:d}x'.format((self.prod_desc.parts - 1) * BYTES_PER_WORD)),
657                      ('parameter_count', 'i')]
658        parts_info.extend([(None, None)])
659        parts_fmt = NamedStruct(parts_info, self.prefmt, 'Parts')
660        for n in range(1, self.prod_desc.parts + 1):
661            self.parts.append(self._buffer.read_struct(parts_fmt))
662            self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.parts_ptr + n))
663
664        # Parameters
665        # No need to jump to any position as this follows parts information
666        self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.parts_ptr
667                                                            + self.prod_desc.parts * 4))
668        self.parameters = [{key: [] for key, _ in PARAM_ATTR}
669                           for n in range(self.prod_desc.parts)]
670        for attr, fmt in PARAM_ATTR:
671            fmt = (fmt[0], self.prefmt + fmt[1] if fmt[1] != 's' else fmt[1])
672            for n, part in enumerate(self.parts):
673                for _ in range(part.parameter_count):
674                    if 's' in fmt[1]:
675                        self.parameters[n][attr] += [
676                            self._decode_strip(self._buffer.read_binary(*fmt)[0])
677                        ]
678                    else:
679                        self.parameters[n][attr] += self._buffer.read_binary(*fmt)
680
681    def _swap_bytes(self, binary):
682        """Swap between little and big endian."""
683        self.swaped_bytes = (struct.pack('@i', 1) != binary)
684
685        if self.swaped_bytes:
686            if sys.byteorder == 'little':
687                self.prefmt = '>'
688                self.endian = 'big'
689            elif sys.byteorder == 'big':
690                self.prefmt = '<'
691                self.endian = 'little'
692        else:
693            self.prefmt = ''
694            self.endian = sys.byteorder
695
696    def _process_gempak_header(self):
697        """Read the GEMPAK header from the file."""
698        fmt = [('text', '28s', bytes.decode), (None, None)]
699
700        header = self._buffer.read_struct(NamedStruct(fmt, '', 'GempakHeader'))
701        if header.text != GEMPAK_HEADER:
702            raise TypeError('Unknown file format or invalid GEMPAK file')
703
704    @staticmethod
705    def _convert_dattim(dattim):
706        """Convert GEMPAK DATTIM integer to datetime object."""
707        if dattim:
708            if dattim < 100000000:
709                dt = datetime.strptime(str(dattim), '%y%m%d')
710            else:
711                dt = datetime.strptime('{:010d}'.format(dattim), '%m%d%y%H%M')
712        else:
713            dt = None
714        return dt
715
716    @staticmethod
717    def _convert_ftime(ftime):
718        """Convert GEMPAK forecast time and type integer."""
719        if ftime >= 0:
720            iftype = ForecastType(ftime // 100000)
721            iftime = ftime - iftype.value * 100000
722            hours = iftime // 100
723            minutes = iftime - hours * 100
724            out = (iftype.name, timedelta(hours=hours, minutes=minutes))
725        else:
726            out = None
727        return out
728
729    @staticmethod
730    def _convert_level(level):
731        """Convert levels."""
732        if isinstance(level, (int, float)) and level >= 0:
733            return level
734        else:
735            return None
736
737    @staticmethod
738    def _convert_vertical_coord(coord):
739        """Convert integer vertical coordinate to name."""
740        if coord <= 8:
741            return VerticalCoordinates(coord).name.upper()
742        else:
743            return struct.pack('i', coord).decode()
744
745    @staticmethod
746    def _fortran_ishift(i, shift):
747        """Python-friendly bit shifting."""
748        if shift >= 0:
749            # Shift left and only keep low 32 bits
750            ret = (i << shift) & 0xffffffff
751
752            # If high bit, convert back to negative of two's complement
753            if ret > 0x7fffffff:
754                ret = -(~ret & 0x7fffffff) - 1
755            return ret
756        else:
757            # Shift right the low 32 bits
758            return (i & 0xffffffff) >> -shift
759
760    @staticmethod
761    def _decode_strip(b):
762        """Decode bytes to string and strip whitespace."""
763        return b.decode().strip()
764
765    @staticmethod
766    def _make_date(dattim):
767        """Make a date object from GEMPAK DATTIM integer."""
768        return GempakFile._convert_dattim(dattim).date()
769
770    @staticmethod
771    def _make_time(t):
772        """Make a time object from GEMPAK FTIME integer."""
773        string = '{:04d}'.format(t)
774        return datetime.strptime(string, '%H%M').time()
775
776    def _unpack_real(self, buffer, parameters, length):
777        """Unpack floating point data packed in integers.
778
779        Similar to DP_UNPK subroutine in GEMPAK.
780        """
781        nparms = len(parameters['name'])
782        mskpat = 0xffffffff
783
784        pwords = (sum(parameters['bits']) - 1) // 32 + 1
785        npack = (length - 1) // pwords + 1
786        unpacked = np.ones(npack * nparms, dtype=np.float32) * self.prod_desc.missing_float
787        if npack * pwords != length:
788            raise ValueError('Unpacking length mismatch.')
789
790        ir = 0
791        ii = 0
792        for _i in range(npack):
793            pdat = buffer[ii:(ii + pwords)]
794            rdat = unpacked[ir:(ir + nparms)]
795            itotal = 0
796            for idata in range(nparms):
797                scale = 10**parameters['scale'][idata]
798                offset = parameters['offset'][idata]
799                bits = parameters['bits'][idata]
800                isbitc = (itotal % 32) + 1
801                iswrdc = (itotal // 32)
802                imissc = self._fortran_ishift(mskpat, bits - 32)
803
804                jbit = bits
805                jsbit = isbitc
806                jshift = 1 - jsbit
807                jsword = iswrdc
808                jword = pdat[jsword]
809                mask = self._fortran_ishift(mskpat, jbit - 32)
810                ifield = self._fortran_ishift(jword, jshift)
811                ifield &= mask
812
813                if (jsbit + jbit - 1) > 32:
814                    jword = pdat[jsword + 1]
815                    jshift += 32
816                    iword = self._fortran_ishift(jword, jshift)
817                    iword &= mask
818                    ifield |= iword
819
820                if ifield == imissc:
821                    rdat[idata] = self.prod_desc.missing_float
822                else:
823                    rdat[idata] = (ifield + offset) * scale
824                itotal += bits
825            unpacked[ir:(ir + nparms)] = rdat
826            ir += nparms
827            ii += pwords
828
829        return unpacked.tolist()
830
831
832@exporter.export
833class GempakGrid(GempakFile):
834    """Subclass of GempakFile specific to GEMPAK gridded data."""
835
836    def __init__(self, file, *args, **kwargs):
837        super().__init__(file)
838
839        datetime_names = ['GDT1', 'GDT2']
840        level_names = ['GLV1', 'GLV2']
841        ftime_names = ['GTM1', 'GTM2']
842        string_names = ['GPM1', 'GPM2', 'GPM3']
843
844        # Row Headers
845        # Based on GEMPAK source, row/col headers have a 0th element in their Fortran arrays.
846        # This appears to be a flag value to say a header is used or not. 9999
847        # means its in use, otherwise -9999. GEMPAK allows empty grids, etc., but
848        # no real need to keep track of that in Python.
849        self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.row_headers_ptr))
850        self.row_headers = []
851        row_headers_info = [(key, 'i') for key in self.row_keys]
852        row_headers_info.extend([(None, None)])
853        row_headers_fmt = NamedStruct(row_headers_info, self.prefmt, 'RowHeaders')
854        for _ in range(1, self.prod_desc.rows + 1):
855            if self._buffer.read_int(4, self.endian, False) == USED_FLAG:
856                self.row_headers.append(self._buffer.read_struct(row_headers_fmt))
857
858        # Column Headers
859        self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.column_headers_ptr))
860        self.column_headers = []
861        column_headers_info = [(key, 'i', self._convert_level) if key in level_names
862                               else (key, 'i', self._convert_vertical_coord) if key == 'GVCD'
863                               else (key, 'i', self._convert_dattim) if key in datetime_names
864                               else (key, 'i', self._convert_ftime) if key in ftime_names
865                               else (key, '4s', self._decode_strip) if key in string_names
866                               else (key, 'i')
867                               for key in self.column_keys]
868        column_headers_info.extend([(None, None)])
869        column_headers_fmt = NamedStruct(column_headers_info, self.prefmt, 'ColumnHeaders')
870        for _ in range(1, self.prod_desc.columns + 1):
871            if self._buffer.read_int(4, self.endian, False) == USED_FLAG:
872                self.column_headers.append(self._buffer.read_struct(column_headers_fmt))
873
874        self._gdinfo = []
875        for n, head in enumerate(self.column_headers):
876            self._gdinfo.append(
877                Grid(
878                    n,
879                    head.GTM1[0],
880                    head.GDT1 + head.GTM1[1],
881                    head.GDT2 + head.GTM2[1] if head.GDT2 and head.GDTM2 else None,
882                    head.GPM1 + head.GPM2 + head.GPM3,
883                    head.GLV1,
884                    head.GLV2,
885                    head.GVCD,
886                )
887            )
888
889        # Coordinates
890        if self.navigation_block is not None:
891            self._get_crs()
892            self._set_coordinates()
893
894    def gdinfo(self):
895        """Return grid information."""
896        return self._gdinfo
897
898    def _get_crs(self):
899        """Create CRS from GEMPAK navigation block."""
900        gemproj = self.navigation_block.projection
901        proj, ptype = GEMPROJ_TO_PROJ[gemproj]
902        radius_sph = 6371200.0
903
904        if ptype == 'azm':
905            lat_0 = self.navigation_block.proj_angle1
906            lon_0 = self.navigation_block.proj_angle2
907            rot = self.navigation_block.proj_angle3
908            if rot != 0:
909                log.warning('Rotated projections currently '
910                            'not supported. Angle3 (%7.2f) ignored.', rot)
911            self.crs = pyproj.CRS.from_dict({'proj': proj,
912                                             'lat_0': lat_0,
913                                             'lon_0': lon_0,
914                                             'R': radius_sph})
915        elif ptype == 'cyl':
916            if gemproj != 'mcd':
917                lat_0 = self.navigation_block.proj_angle1
918                lon_0 = self.navigation_block.proj_angle2
919                rot = self.navigation_block.proj_angle3
920                if rot != 0:
921                    log.warning('Rotated projections currently '
922                                'not supported. Angle3 (%7.2f) ignored.', rot)
923                self.crs = pyproj.CRS.from_dict({'proj': proj,
924                                                 'lat_0': lat_0,
925                                                 'lon_0': lon_0,
926                                                 'R': radius_sph})
927            else:
928                avglat = (self.navigation_block.upper_right_lat
929                          + self.navigation_block.lower_left_lat) * 0.5
930                k_0 = (1 / math.cos(avglat)
931                       if self.navigation_block.proj_angle1 == 0
932                       else self.navigation_block.proj_angle1
933                       )
934                lon_0 = self.navigation_block.proj_angle2
935                self.crs = pyproj.CRS.from_dict({'proj': proj,
936                                                 'lat_0': avglat,
937                                                 'lon_0': lon_0,
938                                                 'k_0': k_0,
939                                                 'R': radius_sph})
940        elif ptype == 'con':
941            lat_1 = self.navigation_block.proj_angle1
942            lon_0 = self.navigation_block.proj_angle2
943            lat_2 = self.navigation_block.proj_angle3
944            self.crs = pyproj.CRS.from_dict({'proj': proj,
945                                             'lon_0': lon_0,
946                                             'lat_1': lat_1,
947                                             'lat_2': lat_2,
948                                             'R': radius_sph})
949
950    def _set_coordinates(self):
951        """Use GEMPAK navigation block to define coordinates.
952
953        Defines geographic and projection coordinates for the object.
954        """
955        transform = pyproj.Proj(self.crs)
956        llx, lly = transform(self.navigation_block.lower_left_lon,
957                             self.navigation_block.lower_left_lat)
958        urx, ury = transform(self.navigation_block.upper_right_lon,
959                             self.navigation_block.upper_right_lat)
960        self.x = np.linspace(llx, urx, self.kx, dtype=np.float32)
961        self.y = np.linspace(lly, ury, self.ky, dtype=np.float32)
962        xx, yy = np.meshgrid(self.x, self.y, copy=False)
963        self.lon, self.lat = transform(xx, yy, inverse=True)
964        self.lon = self.lon.astype(np.float32)
965        self.lat = self.lat.astype(np.float32)
966
967    def _unpack_grid(self, packing_type, part):
968        """Read raw GEMPAK grid integers and unpack into floats."""
969        if packing_type == PackingType.none:
970            lendat = self.data_header_length - part.header_length - 1
971
972            if lendat > 1:
973                buffer_fmt = '{}{}f'.format(self.prefmt, lendat)
974                buffer = self._buffer.read_struct(struct.Struct(buffer_fmt))
975                grid = np.zeros(self.ky * self.kx, dtype=np.float32)
976                grid[...] = buffer
977            else:
978                grid = None
979
980            return grid
981
982        elif packing_type == PackingType.nmc:
983            raise NotImplementedError('NMC unpacking not supported.')
984            # integer_meta_fmt = [('bits', 'i'), ('missing_flag', 'i'), ('kxky', 'i')]
985            # real_meta_fmt = [('reference', 'f'), ('scale', 'f')]
986            # self.grid_meta_int = self._buffer.read_struct(NamedStruct(integer_meta_fmt,
987            #                                                           self.prefmt,
988            #                                                           'GridMetaInt'))
989            # self.grid_meta_real = self._buffer.read_struct(NamedStruct(real_meta_fmt,
990            #                                                            self.prefmt,
991            #                                                            'GridMetaReal'))
992            # grid_start = self._buffer.set_mark()
993        elif packing_type == PackingType.diff:
994            integer_meta_fmt = [('bits', 'i'), ('missing_flag', 'i'),
995                                ('kxky', 'i'), ('kx', 'i')]
996            real_meta_fmt = [('reference', 'f'), ('scale', 'f'), ('diffmin', 'f')]
997            self.grid_meta_int = self._buffer.read_struct(NamedStruct(integer_meta_fmt,
998                                                                      self.prefmt,
999                                                                      'GridMetaInt'))
1000            self.grid_meta_real = self._buffer.read_struct(NamedStruct(real_meta_fmt,
1001                                                                       self.prefmt,
1002                                                                       'GridMetaReal'))
1003            # grid_start = self._buffer.set_mark()
1004
1005            imiss = 2**self.grid_meta_int.bits - 1
1006            lendat = self.data_header_length - part.header_length - 8
1007            packed_buffer_fmt = '{}{}i'.format(self.prefmt, lendat)
1008            packed_buffer = self._buffer.read_struct(struct.Struct(packed_buffer_fmt))
1009            grid = np.zeros((self.ky, self.kx), dtype=np.float32)
1010
1011            if lendat > 1:
1012                iword = 0
1013                ibit = 1
1014                first = True
1015                for j in range(self.ky):
1016                    line = False
1017                    for i in range(self.kx):
1018                        jshft = self.grid_meta_int.bits + ibit - 33
1019                        idat = self._fortran_ishift(packed_buffer[iword], jshft)
1020                        idat &= imiss
1021
1022                        if jshft > 0:
1023                            jshft -= 32
1024                            idat2 = self._fortran_ishift(packed_buffer[iword + 1], jshft)
1025                            idat |= idat2
1026
1027                        ibit += self.grid_meta_int.bits
1028                        if ibit > 32:
1029                            ibit -= 32
1030                            iword += 1
1031
1032                        if (self.grid_meta_int.missing_flag and idat == imiss):
1033                            grid[j, i] = self.prod_desc.missing_float
1034                        else:
1035                            if first:
1036                                grid[j, i] = self.grid_meta_real.reference
1037                                psav = self.grid_meta_real.reference
1038                                plin = self.grid_meta_real.reference
1039                                line = True
1040                                first = False
1041                            else:
1042                                if not line:
1043                                    grid[j, i] = plin + (self.grid_meta_real.diffmin
1044                                                         + idat * self.grid_meta_real.scale)
1045                                    line = True
1046                                    plin = grid[j, i]
1047                                else:
1048                                    grid[j, i] = psav + (self.grid_meta_real.diffmin
1049                                                         + idat * self.grid_meta_real.scale)
1050                                psav = grid[j, i]
1051            else:
1052                grid = None
1053
1054            return grid
1055
1056        elif packing_type in [PackingType.grib, PackingType.dec]:
1057            integer_meta_fmt = [('bits', 'i'), ('missing_flag', 'i'), ('kxky', 'i')]
1058            real_meta_fmt = [('reference', 'f'), ('scale', 'f')]
1059            self.grid_meta_int = self._buffer.read_struct(NamedStruct(integer_meta_fmt,
1060                                                                      self.prefmt,
1061                                                                      'GridMetaInt'))
1062            self.grid_meta_real = self._buffer.read_struct(NamedStruct(real_meta_fmt,
1063                                                                       self.prefmt,
1064                                                                       'GridMetaReal'))
1065            # grid_start = self._buffer.set_mark()
1066
1067            lendat = self.data_header_length - part.header_length - 6
1068            packed_buffer_fmt = '{}{}i'.format(self.prefmt, lendat)
1069
1070            grid = np.zeros(self.grid_meta_int.kxky, dtype=np.float32)
1071            packed_buffer = self._buffer.read_struct(struct.Struct(packed_buffer_fmt))
1072            if lendat > 1:
1073                imax = 2**self.grid_meta_int.bits - 1
1074                ibit = 1
1075                iword = 0
1076                for cell in range(self.grid_meta_int.kxky):
1077                    jshft = self.grid_meta_int.bits + ibit - 33
1078                    idat = self._fortran_ishift(packed_buffer[iword], jshft)
1079                    idat &= imax
1080
1081                    if jshft > 0:
1082                        jshft -= 32
1083                        idat2 = self._fortran_ishift(packed_buffer[iword + 1], jshft)
1084                        idat |= idat2
1085
1086                    if (idat == imax) and self.grid_meta_int.missing_flag:
1087                        grid[cell] = self.prod_desc.missing_float
1088                    else:
1089                        grid[cell] = (self.grid_meta_real.reference
1090                                      + (idat * self.grid_meta_real.scale))
1091
1092                    ibit += self.grid_meta_int.bits
1093                    if ibit > 32:
1094                        ibit -= 32
1095                        iword += 1
1096            else:
1097                grid = None
1098
1099            return grid
1100        elif packing_type == PackingType.grib2:
1101            raise NotImplementedError('GRIB2 unpacking not supported.')
1102            # integer_meta_fmt = [('iuscal', 'i'), ('kx', 'i'),
1103            #                     ('ky', 'i'), ('iscan_mode', 'i')]
1104            # real_meta_fmt = [('rmsval', 'f')]
1105            # self.grid_meta_int = self._buffer.read_struct(NamedStruct(integer_meta_fmt,
1106            #                                                           self.prefmt,
1107            #                                                           'GridMetaInt'))
1108            # self.grid_meta_real = self._buffer.read_struct(NamedStruct(real_meta_fmt,
1109            #                                                            self.prefmt,
1110            #                                                            'GridMetaReal'))
1111            # grid_start = self._buffer.set_mark()
1112        else:
1113            raise NotImplementedError('No method for unknown grid packing {}'
1114                                      .format(packing_type.name))
1115
1116    def gdxarray(self, parameter=None, date_time=None, coordinate=None,
1117                 level=None, date_time2=None, level2=None):
1118        """Select grids and output as list of xarray DataArrays.
1119
1120        Subset the data by parameter values. The default is to not
1121        subset and return the entire dataset.
1122
1123        Parameters
1124        ----------
1125        parameter : str or array-like of str
1126            Name of GEMPAK parameter.
1127
1128        date_time : datetime or array-like of datetime
1129            Valid datetime of the grid. Alternatively can be
1130            a string with the format YYYYmmddHHMM.
1131
1132        coordinate : str or array-like of str
1133            Vertical coordinate.
1134
1135        level : float or array-like of float
1136            Vertical level.
1137
1138        date_time2 : datetime or array-like of datetime
1139            Secondary valid datetime of the grid. Alternatively can be
1140            a string with the format YYYYmmddHHMM.
1141
1142        level2: float or array_like of float
1143            Secondary vertical level. Typically used for layers.
1144
1145        Returns
1146        -------
1147        list
1148            List of xarray.DataArray objects for each grid.
1149        """
1150        if parameter is not None:
1151            if (not isinstance(parameter, Iterable)
1152               or isinstance(parameter, str)):
1153                parameter = [parameter]
1154            parameter = [p.upper() for p in parameter]
1155
1156        if date_time is not None:
1157            if (not isinstance(date_time, Iterable)
1158               or isinstance(date_time, str)):
1159                date_time = [date_time]
1160            for i, dt in enumerate(date_time):
1161                if isinstance(dt, str):
1162                    date_time[i] = datetime.strptime(dt, '%Y%m%d%H%M')
1163
1164        if coordinate is not None:
1165            if (not isinstance(coordinate, Iterable)
1166               or isinstance(coordinate, str)):
1167                coordinate = [coordinate]
1168            coordinate = [c.upper() for c in coordinate]
1169
1170        if level is not None and not isinstance(level, Iterable):
1171            level = [level]
1172
1173        if date_time2 is not None:
1174            if (not isinstance(date_time2, Iterable)
1175               or isinstance(date_time2, str)):
1176                date_time2 = [date_time2]
1177            for i, dt in enumerate(date_time2):
1178                if isinstance(dt, str):
1179                    date_time2[i] = datetime.strptime(dt, '%Y%m%d%H%M')
1180
1181        if level2 is not None and not isinstance(level2, Iterable):
1182            level2 = [level2]
1183
1184        # Figure out which columns to extract from the file
1185        matched = self._gdinfo.copy()
1186
1187        if parameter is not None:
1188            matched = filter(
1189                lambda grid: grid if grid.PARM in parameter else False,
1190                matched
1191            )
1192
1193        if date_time is not None:
1194            matched = filter(
1195                lambda grid: grid if grid.DATTIM1 in date_time else False,
1196                matched
1197            )
1198
1199        if coordinate is not None:
1200            matched = filter(
1201                lambda grid: grid if grid.COORD in coordinate else False,
1202                matched
1203            )
1204
1205        if level is not None:
1206            matched = filter(
1207                lambda grid: grid if grid.LEVEL1 in level else False,
1208                matched
1209            )
1210
1211        if date_time2 is not None:
1212            matched = filter(
1213                lambda grid: grid if grid.DATTIM2 in date_time2 else False,
1214                matched
1215            )
1216
1217        if level2 is not None:
1218            matched = filter(
1219                lambda grid: grid if grid.LEVEL2 in level2 else False,
1220                matched
1221            )
1222
1223        matched = list(matched)
1224
1225        if len(matched) < 1:
1226            raise KeyError('No grids were matched with given parameters.')
1227
1228        gridno = [g.GRIDNO for g in matched]
1229
1230        grids = []
1231        irow = 0  # Only one row for grids
1232        for icol, col_head in enumerate(self.column_headers):
1233            if icol not in gridno:
1234                continue
1235            for iprt, part in enumerate(self.parts):
1236                pointer = (self.prod_desc.data_block_ptr
1237                           + (irow * self.prod_desc.columns * self.prod_desc.parts)
1238                           + (icol * self.prod_desc.parts + iprt))
1239                self._buffer.jump_to(self._start, _word_to_position(pointer))
1240                self.data_ptr = self._buffer.read_int(4, self.endian, False)
1241                self._buffer.jump_to(self._start, _word_to_position(self.data_ptr))
1242                self.data_header_length = self._buffer.read_int(4, self.endian, False)
1243                data_header = self._buffer.set_mark()
1244                self._buffer.jump_to(data_header,
1245                                     _word_to_position(part.header_length + 1))
1246                packing_type = PackingType(self._buffer.read_int(4, self.endian, False))
1247
1248                full_name = col_head.GPM1 + col_head.GPM2 + col_head.GPM3
1249                ftype, ftime = col_head.GTM1
1250                valid = col_head.GDT1 + ftime
1251                gvcord = col_head.GVCD.lower() if col_head.GVCD is not None else 'none'
1252                var = (GVCORD_TO_VAR[full_name]
1253                       if full_name in GVCORD_TO_VAR
1254                       else full_name.lower()
1255                       )
1256                data = self._unpack_grid(packing_type, part)
1257                if data is not None:
1258                    if data.ndim < 2:
1259                        data = np.ma.array(data.reshape((self.ky, self.kx)),
1260                                           mask=data == self.prod_desc.missing_float,
1261                                           dtype=np.float32)
1262                    else:
1263                        data = np.ma.array(data, mask=data == self.prod_desc.missing_float,
1264                                           dtype=np.float32)
1265
1266                    xrda = xr.DataArray(
1267                        data=data[np.newaxis, np.newaxis, ...],
1268                        coords={
1269                            'time': [valid],
1270                            gvcord: [col_head.GLV1],
1271                            'x': self.x,
1272                            'y': self.y,
1273                        },
1274                        dims=['time', gvcord, 'y', 'x'],
1275                        name=var,
1276                        attrs={
1277                            **self.crs.to_cf(),
1278                            'grid_type': ftype,
1279                        }
1280                    )
1281                    grids.append(xrda)
1282
1283                else:
1284                    log.warning('Unable to read grid for %s', col_head.GPM1)
1285        return grids
1286
1287
1288@exporter.export
1289class GempakSounding(GempakFile):
1290    """Subclass of GempakFile specific to GEMPAK sounding data."""
1291
1292    def __init__(self, file, *args, **kwargs):
1293        super().__init__(file)
1294
1295        # Row Headers
1296        self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.row_headers_ptr))
1297        self.row_headers = []
1298        row_headers_info = [(key, 'i', self._make_date) if key == 'DATE'
1299                            else (key, 'i', self._make_time) if key == 'TIME'
1300                            else (key, 'i')
1301                            for key in self.row_keys]
1302        row_headers_info.extend([(None, None)])
1303        row_headers_fmt = NamedStruct(row_headers_info, self.prefmt, 'RowHeaders')
1304        for _ in range(1, self.prod_desc.rows + 1):
1305            if self._buffer.read_int(4, self.endian, False) == USED_FLAG:
1306                self.row_headers.append(self._buffer.read_struct(row_headers_fmt))
1307
1308        # Column Headers
1309        self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.column_headers_ptr))
1310        self.column_headers = []
1311        column_headers_info = [(key, '4s', self._decode_strip) if key == 'STID'
1312                               else (key, 'i') if key == 'STNM'
1313                               else (key, 'i', lambda x: x / 100) if key == 'SLAT'
1314                               else (key, 'i', lambda x: x / 100) if key == 'SLON'
1315                               else (key, 'i') if key == 'SELV'
1316                               else (key, '4s', self._decode_strip) if key == 'STAT'
1317                               else (key, '4s', self._decode_strip) if key == 'COUN'
1318                               else (key, '4s', self._decode_strip) if key == 'STD2'
1319                               else (key, 'i')
1320                               for key in self.column_keys]
1321        column_headers_info.extend([(None, None)])
1322        column_headers_fmt = NamedStruct(column_headers_info, self.prefmt, 'ColumnHeaders')
1323        for _ in range(1, self.prod_desc.columns + 1):
1324            if self._buffer.read_int(4, self.endian, False) == USED_FLAG:
1325                self.column_headers.append(self._buffer.read_struct(column_headers_fmt))
1326
1327        self.merged = 'SNDT' in (part.name for part in self.parts)
1328
1329        self._sninfo = []
1330        for irow, row_head in enumerate(self.row_headers):
1331            for icol, col_head in enumerate(self.column_headers):
1332                pointer = (self.prod_desc.data_block_ptr
1333                           + (irow * self.prod_desc.columns * self.prod_desc.parts)
1334                           + (icol * self.prod_desc.parts))
1335
1336                self._buffer.jump_to(self._start, _word_to_position(pointer))
1337                data_ptr = self._buffer.read_int(4, self.endian, False)
1338
1339                if data_ptr:
1340                    self._sninfo.append(
1341                        Sounding(
1342                            irow,
1343                            icol,
1344                            datetime.combine(row_head.DATE, row_head.TIME),
1345                            col_head.STID,
1346                            col_head.STNM,
1347                            col_head.SLAT,
1348                            col_head.SLON,
1349                            col_head.SELV,
1350                            col_head.STAT,
1351                            col_head.COUN,
1352                        )
1353                    )
1354
1355    def sninfo(self):
1356        """Return sounding information."""
1357        return self._sninfo
1358
1359    def _unpack_merged(self, sndno):
1360        """Unpack merged sounding data."""
1361        soundings = []
1362        for irow, row_head in enumerate(self.row_headers):
1363            for icol, col_head in enumerate(self.column_headers):
1364                if (irow, icol) not in sndno:
1365                    continue
1366                sounding = {'STID': col_head.STID,
1367                            'STNM': col_head.STNM,
1368                            'SLAT': col_head.SLAT,
1369                            'SLON': col_head.SLON,
1370                            'SELV': col_head.SELV,
1371                            'STAT': col_head.STAT,
1372                            'COUN': col_head.COUN,
1373                            'DATE': row_head.DATE,
1374                            'TIME': row_head.TIME,
1375                            }
1376                for iprt, part in enumerate(self.parts):
1377                    pointer = (self.prod_desc.data_block_ptr
1378                               + (irow * self.prod_desc.columns * self.prod_desc.parts)
1379                               + (icol * self.prod_desc.parts + iprt))
1380                    self._buffer.jump_to(self._start, _word_to_position(pointer))
1381                    self.data_ptr = self._buffer.read_int(4, self.endian, False)
1382                    if not self.data_ptr:
1383                        continue
1384                    self._buffer.jump_to(self._start, _word_to_position(self.data_ptr))
1385                    self.data_header_length = self._buffer.read_int(4, self.endian, False)
1386                    data_header = self._buffer.set_mark()
1387                    self._buffer.jump_to(data_header,
1388                                         _word_to_position(part.header_length + 1))
1389                    lendat = self.data_header_length - part.header_length
1390
1391                    fmt_code = {
1392                        DataTypes.real: 'f',
1393                        DataTypes.realpack: 'i',
1394                        DataTypes.character: 's',
1395                    }.get(part.data_type)
1396
1397                    if fmt_code is None:
1398                        raise NotImplementedError('No methods for data type {}'
1399                                                  .format(part.data_type))
1400                    packed_buffer = (
1401                        self._buffer.read_struct(
1402                            struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')
1403                        )
1404                    )
1405
1406                    parameters = self.parameters[iprt]
1407                    nparms = len(parameters['name'])
1408
1409                    if part.data_type == DataTypes.realpack:
1410                        unpacked = self._unpack_real(packed_buffer, parameters, lendat)
1411                        for iprm, param in enumerate(parameters['name']):
1412                            sounding[param] = unpacked[iprm::nparms]
1413                    else:
1414                        for iprm, param in enumerate(parameters['name']):
1415                            sounding[param] = np.array(
1416                                packed_buffer[iprm::nparms], dtype=np.float32
1417                            )
1418
1419                soundings.append(sounding)
1420        return soundings
1421
1422    def _unpack_unmerged(self, sndno):
1423        """Unpack unmerged sounding data."""
1424        soundings = []
1425        for irow, row_head in enumerate(self.row_headers):
1426            for icol, col_head in enumerate(self.column_headers):
1427                if (irow, icol) not in sndno:
1428                    continue
1429                sounding = {'STID': col_head.STID,
1430                            'STNM': col_head.STNM,
1431                            'SLAT': col_head.SLAT,
1432                            'SLON': col_head.SLON,
1433                            'SELV': col_head.SELV,
1434                            'STAT': col_head.STAT,
1435                            'COUN': col_head.COUN,
1436                            'DATE': row_head.DATE,
1437                            'TIME': row_head.TIME,
1438                            }
1439                for iprt, part in enumerate(self.parts):
1440                    pointer = (self.prod_desc.data_block_ptr
1441                               + (irow * self.prod_desc.columns * self.prod_desc.parts)
1442                               + (icol * self.prod_desc.parts + iprt))
1443                    self._buffer.jump_to(self._start, _word_to_position(pointer))
1444                    self.data_ptr = self._buffer.read_int(4, self.endian, False)
1445                    if not self.data_ptr:
1446                        continue
1447                    self._buffer.jump_to(self._start, _word_to_position(self.data_ptr))
1448                    self.data_header_length = self._buffer.read_int(4, self.endian, False)
1449                    data_header = self._buffer.set_mark()
1450                    self._buffer.jump_to(data_header,
1451                                         _word_to_position(part.header_length + 1))
1452                    lendat = self.data_header_length - part.header_length
1453
1454                    fmt_code = {
1455                        DataTypes.real: 'f',
1456                        DataTypes.realpack: 'i',
1457                        DataTypes.character: 's',
1458                    }.get(part.data_type)
1459
1460                    if fmt_code is None:
1461                        raise NotImplementedError('No methods for data type {}'
1462                                                  .format(part.data_type))
1463                    packed_buffer = (
1464                        self._buffer.read_struct(
1465                            struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')
1466                        )
1467                    )
1468
1469                    parameters = self.parameters[iprt]
1470                    nparms = len(parameters['name'])
1471                    sounding[part.name] = {}
1472
1473                    if part.data_type == DataTypes.realpack:
1474                        unpacked = self._unpack_real(packed_buffer, parameters, lendat)
1475                        for iprm, param in enumerate(parameters['name']):
1476                            sounding[part.name][param] = unpacked[iprm::nparms]
1477                    elif part.data_type == DataTypes.character:
1478                        for iprm, param in enumerate(parameters['name']):
1479                            sounding[part.name][param] = (
1480                                self._decode_strip(packed_buffer[iprm])
1481                            )
1482                    else:
1483                        for iprm, param in enumerate(parameters['name']):
1484                            sounding[part.name][param] = (
1485                                np.array(packed_buffer[iprm::nparms], dtype=np.float32)
1486                            )
1487
1488                soundings.append(self._merge_sounding(sounding))
1489        return soundings
1490
1491    def _merge_significant_temps(self, merged, parts, section, pbot):
1492        """Process and merge a significant temperature sections."""
1493        for isigt, press in enumerate(parts[section]['PRES']):
1494            press = abs(press)
1495            if self.prod_desc.missing_float not in [
1496                press,
1497                parts[section]['TEMP'][isigt]
1498            ] and press != 0:
1499                if press > pbot:
1500                    continue
1501                elif press in merged['PRES']:
1502                    ploc = merged['PRES'].index(press)
1503                    if merged['TEMP'][ploc] == self.prod_desc.missing_float:
1504                        merged['TEMP'][ploc] = parts[section]['TEMP'][isigt]
1505                        merged['DWPT'][ploc] = parts[section]['DWPT'][isigt]
1506                else:
1507                    size = len(merged['PRES'])
1508                    loc = size - bisect.bisect_left(merged['PRES'][::-1], press)
1509                    merged['PRES'].insert(loc, press)
1510                    merged['TEMP'].insert(loc, parts[section]['TEMP'][isigt])
1511                    merged['DWPT'].insert(loc, parts[section]['DWPT'][isigt])
1512                    merged['DRCT'].insert(loc, self.prod_desc.missing_float)
1513                    merged['SPED'].insert(loc, self.prod_desc.missing_float)
1514                    merged['HGHT'].insert(loc, self.prod_desc.missing_float)
1515            pbot = press
1516
1517        return pbot
1518
1519    def _merge_tropopause_data(self, merged, parts, section, pbot):
1520        """Process and merge tropopause sections."""
1521        for itrp, press in enumerate(parts[section]['PRES']):
1522            press = abs(press)
1523            if self.prod_desc.missing_float not in [
1524                press,
1525                parts[section]['TEMP'][itrp]
1526            ] and press != 0:
1527                if press > pbot:
1528                    continue
1529                elif press in merged['PRES']:
1530                    ploc = merged['PRES'].index(press)
1531                    if merged['TEMP'][ploc] == self.prod_desc.missing_float:
1532                        merged['TEMP'][ploc] = parts[section]['TEMP'][itrp]
1533                        merged['DWPT'][ploc] = parts[section]['DWPT'][itrp]
1534                    if merged['DRCT'][ploc] == self.prod_desc.missing_float:
1535                        merged['DRCT'][ploc] = parts[section]['DRCT'][itrp]
1536                        merged['SPED'][ploc] = parts[section]['SPED'][itrp]
1537                    merged['HGHT'][ploc] = self.prod_desc.missing_float
1538                else:
1539                    size = len(merged['PRES'])
1540                    loc = size - bisect.bisect_left(merged['PRES'][::-1], press)
1541                    merged['PRES'].insert(loc, press)
1542                    merged['TEMP'].insert(loc, parts[section]['TEMP'][itrp])
1543                    merged['DWPT'].insert(loc, parts[section]['DWPT'][itrp])
1544                    merged['DRCT'].insert(loc, parts[section]['DRCT'][itrp])
1545                    merged['SPED'].insert(loc, parts[section]['SPED'][itrp])
1546                    merged['HGHT'].insert(loc, self.prod_desc.missing_float)
1547            pbot = press
1548
1549        return pbot
1550
1551    def _merge_mandatory_temps(self, merged, parts, section, qcman, bgl, plast):
1552        """Process and merge mandatory temperature sections."""
1553        num_levels = len(parts[section]['PRES'])
1554        start_level = {
1555            'TTAA': 1,
1556            'TTCC': 0,
1557        }
1558        for i in range(start_level[section], num_levels):
1559            if (parts[section]['PRES'][i] < plast
1560                and self.prod_desc.missing_float not in [
1561                    parts[section]['PRES'][i],
1562                    parts[section]['TEMP'][i],
1563                    parts[section]['HGHT'][i]
1564            ]):
1565                for pname, pval in parts[section].items():
1566                    merged[pname].append(pval[i])
1567                plast = merged['PRES'][-1]
1568            else:
1569                if section == 'TTAA':
1570                    if parts[section]['PRES'][i] > merged['PRES'][0]:
1571                        bgl += 1
1572                    else:
1573                        # GEMPAK ignores MAN data with missing TEMP/HGHT and does not
1574                        # interpolate for them.
1575                        if parts[section]['PRES'][i] != self.prod_desc.missing_float:
1576                            qcman.append(parts[section]['PRES'][i])
1577
1578        return bgl, plast
1579
1580    def _merge_mandatory_winds(self, merged, parts, section, qcman):
1581        """Process and merge manadatory wind sections."""
1582        for iwind, press in enumerate(parts[section]['PRES']):
1583            if press in merged['PRES'][1:]:
1584                loc = merged['PRES'].index(press)
1585                if merged['DRCT'][loc] == self.prod_desc.missing_float:
1586                    merged['DRCT'][loc] = parts[section]['DRCT'][iwind]
1587                    merged['SPED'][loc] = parts[section]['SPED'][iwind]
1588            else:
1589                if press not in qcman:
1590                    size = len(merged['PRES'])
1591                    loc = size - bisect.bisect_left(merged['PRES'][1:][::-1], press)
1592                    if loc >= size + 1:
1593                        loc = -1
1594                    merged['PRES'].insert(loc, press)
1595                    merged['TEMP'].insert(loc, self.prod_desc.missing_float)
1596                    merged['DWPT'].insert(loc, self.prod_desc.missing_float)
1597                    merged['DRCT'].insert(loc, parts[section]['DRCT'][iwind])
1598                    merged['SPED'].insert(loc, parts[section]['SPED'][iwind])
1599                    merged['HGHT'].insert(loc, self.prod_desc.missing_float)
1600
1601    def _merge_winds_pressure(self, merged, parts, section, pbot):
1602        """Process and merge wind sections on pressure surfaces."""
1603        for ilevel, press in enumerate(parts[section]['PRES']):
1604            press = abs(press)
1605            if self.prod_desc.missing_float not in [
1606                press,
1607                parts[section]['DRCT'][ilevel],
1608                parts[section]['SPED'][ilevel]
1609            ] and press != 0:
1610                if press > pbot:
1611                    continue
1612                elif press in merged['PRES']:
1613                    ploc = merged['PRES'].index(press)
1614                    if self.prod_desc.missing_float in [
1615                        merged['DRCT'][ploc],
1616                        merged['SPED'][ploc]
1617                    ]:
1618                        merged['DRCT'][ploc] = parts[section]['DRCT'][ilevel]
1619                        merged['SPED'][ploc] = parts[section]['SPED'][ilevel]
1620                else:
1621                    size = len(merged['PRES'])
1622                    loc = size - bisect.bisect_left(merged['PRES'][::-1], press)
1623                    merged['PRES'].insert(loc, press)
1624                    merged['DRCT'].insert(loc, parts[section]['DRCT'][ilevel])
1625                    merged['SPED'].insert(loc, parts[section]['SPED'][ilevel])
1626                    merged['TEMP'].insert(loc, self.prod_desc.missing_float)
1627                    merged['DWPT'].insert(loc, self.prod_desc.missing_float)
1628                    merged['HGHT'].insert(loc, self.prod_desc.missing_float)
1629            pbot = press
1630
1631        return pbot
1632
1633    def _merge_winds_height(self, merged, parts, nsgw, nasw, istart):
1634        """Merge wind sections on height surfaces."""
1635        size = len(merged['HGHT'])
1636        psfc = merged['PRES'][0]
1637        zsfc = merged['HGHT'][0]
1638
1639        if self.prod_desc.missing_float not in [
1640            psfc,
1641            zsfc
1642        ] and size >= 2:
1643            more = True
1644            zold = merged['HGHT'][0]
1645            znxt = merged['HGHT'][1]
1646            ilev = 1
1647        elif size >= 3:
1648            more = True
1649            zold = merged['HGHT'][1]
1650            znxt = merged['HGHT'][2]
1651            ilev = 2
1652        else:
1653            zold = self.prod_desc.missing_float
1654            znxt = self.prod_desc.missing_float
1655
1656        if self.prod_desc.missing_float in [
1657            zold,
1658            znxt
1659        ]:
1660            more = False
1661
1662        if istart <= nsgw:
1663            above = False
1664            i = istart
1665            iend = nsgw
1666        else:
1667            above = True
1668            i = 0
1669            iend = nasw
1670
1671        while more and i < iend:
1672            if not above:
1673                hght = parts['PPBB']['HGHT'][i]
1674                drct = parts['PPBB']['DRCT'][i]
1675                sped = parts['PPBB']['SPED'][i]
1676            else:
1677                hght = parts['PPDD']['HGHT'][i]
1678                drct = parts['PPDD']['DRCT'][i]
1679                sped = parts['PPDD']['SPED'][i]
1680            skip = False
1681
1682            if self.prod_desc.missing_float in [
1683                hght,
1684                drct,
1685                sped
1686            ]:
1687                skip = True
1688            elif abs(zold - hght) < 1:
1689                skip = True
1690                if self.prod_desc.missing_float in [
1691                    merged['DRCT'][ilev - 1],
1692                    merged['SPED'][ilev - 1]
1693                ]:
1694                    merged['DRCT'][ilev - 1] = drct
1695                    merged['SPED'][ilev - 1] = sped
1696            elif hght <= zold:
1697                skip = True
1698            elif hght >= znxt:
1699                while more and hght > znxt:
1700                    zold = znxt
1701                    ilev += 1
1702                    if ilev >= size:
1703                        more = False
1704                    else:
1705                        znxt = merged['HGHT'][ilev]
1706                        if znxt == self.prod_desc.missing_float:
1707                            more = False
1708
1709            if more and not skip:
1710                if abs(znxt - hght) < 1:
1711                    if self.prod_desc.missing_float in [
1712                        merged['DRCT'][ilev - 1],
1713                        merged['SPED'][ilev - 1]
1714                    ]:
1715                        merged['DRCT'][ilev] = drct
1716                        merged['SPED'][ilev] = sped
1717                else:
1718                    loc = bisect.bisect_left(merged['HGHT'], hght)
1719                    merged['HGHT'].insert(loc, hght)
1720                    merged['DRCT'].insert(loc, drct)
1721                    merged['SPED'].insert(loc, sped)
1722                    merged['PRES'].insert(loc, self.prod_desc.missing_float)
1723                    merged['TEMP'].insert(loc, self.prod_desc.missing_float)
1724                    merged['DWPT'].insert(loc, self.prod_desc.missing_float)
1725                    size += 1
1726                    ilev += 1
1727                    zold = hght
1728
1729            if not above and i == nsgw - 1:
1730                above = True
1731                i = 0
1732                iend = nasw
1733            else:
1734                i += 1
1735
1736        return size
1737
1738    def _merge_sounding(self, parts):
1739        """Merge unmerged sounding data."""
1740        merged = {'STID': parts['STID'],
1741                  'STNM': parts['STNM'],
1742                  'SLAT': parts['SLAT'],
1743                  'SLON': parts['SLON'],
1744                  'SELV': parts['SELV'],
1745                  'STAT': parts['STAT'],
1746                  'COUN': parts['COUN'],
1747                  'DATE': parts['DATE'],
1748                  'TIME': parts['TIME'],
1749                  'PRES': [],
1750                  'HGHT': [],
1751                  'TEMP': [],
1752                  'DWPT': [],
1753                  'DRCT': [],
1754                  'SPED': [],
1755                  }
1756
1757        # Number of parameter levels
1758        num_man_levels = len(parts['TTAA']['PRES']) if 'TTAA' in parts else 0
1759        num_man_wind_levels = len(parts['PPAA']['PRES']) if 'PPAA' in parts else 0
1760        num_trop_levels = len(parts['TRPA']['PRES']) if 'TRPA' in parts else 0
1761        num_max_wind_levels = len(parts['MXWA']['PRES']) if 'MXWA' in parts else 0
1762        num_sigt_levels = len(parts['TTBB']['PRES']) if 'TTBB' in parts else 0
1763        num_sigw_levels = len(parts['PPBB']['SPED']) if 'PPBB' in parts else 0
1764        num_above_man_levels = len(parts['TTCC']['PRES']) if 'TTCC' in parts else 0
1765        num_above_trop_levels = len(parts['TRPC']['PRES']) if 'TRPC' in parts else 0
1766        num_above_max_wind_levels = len(parts['MXWC']['SPED']) if 'MXWC' in parts else 0
1767        num_above_sigt_levels = len(parts['TTDD']['PRES']) if 'TTDD' in parts else 0
1768        num_above_sigw_levels = len(parts['PPDD']['SPED']) if 'PPDD' in parts else 0
1769        num_above_man_wind_levels = len(parts['PPCC']['SPED']) if 'PPCC' in parts else 0
1770
1771        total_data = (num_man_levels
1772                      + num_man_wind_levels
1773                      + num_trop_levels
1774                      + num_max_wind_levels
1775                      + num_sigt_levels
1776                      + num_sigw_levels
1777                      + num_above_man_levels
1778                      + num_above_trop_levels
1779                      + num_above_max_wind_levels
1780                      + num_above_sigt_levels
1781                      + num_above_sigw_levels
1782                      + num_above_man_wind_levels
1783                      )
1784        if total_data == 0:
1785            return None
1786
1787        # Check SIG wind vertical coordinate
1788        # For some reason, the pressure data can get put into the
1789        # height array. Perhaps this is just a artifact of Python,
1790        # as GEMPAK itself just uses array indices without any
1791        # names involved. Since the first valid pressure of the
1792        # array will be negative in the case of pressure coordinates,
1793        # we can check for it and place data in the appropriate array.
1794        ppbb_is_z = True
1795        if num_sigw_levels:
1796            if 'PRES' in parts['PPBB']:
1797                ppbb_is_z = False
1798            else:
1799                for z in parts['PPBB']['HGHT']:
1800                    if z != self.prod_desc.missing_float and z < 0:
1801                        ppbb_is_z = False
1802                        parts['PPBB']['PRES'] = parts['PPBB']['HGHT']
1803                        break
1804
1805        ppdd_is_z = True
1806        if num_above_sigw_levels:
1807            if 'PRES' in parts['PPDD']:
1808                ppdd_is_z = False
1809            else:
1810                for z in parts['PPDD']['HGHT']:
1811                    if z != self.prod_desc.missing_float and z < 0:
1812                        ppdd_is_z = False
1813                        parts['PPDD']['PRES'] = parts['PPDD']['HGHT']
1814                        break
1815
1816        # Process surface data
1817        if num_man_levels < 1:
1818            merged['PRES'].append(self.prod_desc.missing_float)
1819            merged['HGHT'].append(self.prod_desc.missing_float)
1820            merged['TEMP'].append(self.prod_desc.missing_float)
1821            merged['DWPT'].append(self.prod_desc.missing_float)
1822            merged['DRCT'].append(self.prod_desc.missing_float)
1823            merged['SPED'].append(self.prod_desc.missing_float)
1824        else:
1825            merged['PRES'].append(parts['TTAA']['PRES'][0])
1826            merged['HGHT'].append(parts['TTAA']['HGHT'][0])
1827            merged['TEMP'].append(parts['TTAA']['TEMP'][0])
1828            merged['DWPT'].append(parts['TTAA']['DWPT'][0])
1829            merged['DRCT'].append(parts['TTAA']['DRCT'][0])
1830            merged['SPED'].append(parts['TTAA']['SPED'][0])
1831
1832        merged['HGHT'][0] = merged['SELV']
1833
1834        first_man_p = self.prod_desc.missing_float
1835        if num_man_levels >= 1:
1836            for mp, mt, mz in zip(parts['TTAA']['PRES'],
1837                                  parts['TTAA']['TEMP'],
1838                                  parts['TTAA']['HGHT']):
1839                if self.prod_desc.missing_float not in [
1840                    mp,
1841                    mt,
1842                    mz
1843                ]:
1844                    first_man_p = mp
1845                    break
1846
1847        surface_p = merged['PRES'][0]
1848        if surface_p > 1060:
1849            surface_p = self.prod_desc.missing_float
1850
1851        if (surface_p == self.prod_desc.missing_float
1852           or (surface_p < first_man_p
1853               and surface_p != self.prod_desc.missing_float)):
1854            merged['PRES'][0] = self.prod_desc.missing_float
1855            merged['HGHT'][0] = self.prod_desc.missing_float
1856            merged['TEMP'][0] = self.prod_desc.missing_float
1857            merged['DWPT'][0] = self.prod_desc.missing_float
1858            merged['DRCT'][0] = self.prod_desc.missing_float
1859            merged['SPED'][0] = self.prod_desc.missing_float
1860
1861        if (num_sigt_levels >= 1
1862           and self.prod_desc.missing_float not in [
1863               parts['TTBB']['PRES'][0],
1864               parts['TTBB']['TEMP'][0]
1865           ]):
1866            first_man_p = merged['PRES'][0]
1867            first_sig_p = parts['TTBB']['PRES'][0]
1868            if (first_man_p == self.prod_desc.missing_float
1869               or np.isclose(first_man_p, first_sig_p)):
1870                merged['PRES'][0] = parts['TTBB']['PRES'][0]
1871                merged['DWPT'][0] = parts['TTBB']['DWPT'][0]
1872                merged['TEMP'][0] = parts['TTBB']['TEMP'][0]
1873
1874        if num_sigw_levels >= 1:
1875            if ppbb_is_z:
1876                if (parts['PPBB']['HGHT'][0] == 0
1877                   and parts['PPBB']['DRCT'][0] != self.prod_desc.missing_float):
1878                    merged['DRCT'][0] = parts['PPBB']['DRCT'][0]
1879                    merged['SPED'][0] = parts['PPBB']['SPED'][0]
1880            else:
1881                if self.prod_desc.missing_float not in [
1882                    parts['PPBB']['PRES'][0],
1883                    parts['PPBB']['DRCT'][0]
1884                ]:
1885                    first_man_p = merged['PRES'][0]
1886                    first_sig_p = abs(parts['PPBB']['PRES'][0])
1887                    if (first_man_p == self.prod_desc.missing_float
1888                       or np.isclose(first_man_p, first_sig_p)):
1889                        merged['PRES'][0] = abs(parts['PPBB']['PRES'][0])
1890                        merged['DRCT'][0] = parts['PPBB']['DRCT'][0]
1891                        merged['SPED'][0] = parts['PPBB']['SPED'][0]
1892
1893        # Merge MAN temperature
1894        bgl = 0
1895        qcman = []
1896        if num_man_levels >= 2 or num_above_man_levels >= 1:
1897            if merged['PRES'][0] == self.prod_desc.missing_float:
1898                plast = 2000
1899            else:
1900                plast = merged['PRES'][0]
1901
1902        if num_man_levels >= 2:
1903            bgl, plast = self._merge_mandatory_temps(merged, parts, 'TTAA',
1904                                                     qcman, bgl, plast)
1905
1906        if num_above_man_levels >= 1:
1907            bgl, plast = self._merge_mandatory_temps(merged, parts, 'TTCC',
1908                                                     qcman, bgl, plast)
1909
1910        # Merge MAN wind
1911        if num_man_wind_levels >= 1 and num_man_levels >= 1 and len(merged['PRES']) >= 2:
1912            self._merge_mandatory_winds(merged, parts, 'PPAA', qcman)
1913
1914        if num_above_man_wind_levels >= 1 and num_man_levels >= 1 and len(merged['PRES']) >= 2:
1915            self._merge_mandatory_winds(merged, parts, 'PPCC', qcman)
1916
1917        # Merge TROP
1918        if num_trop_levels >= 1 or num_above_trop_levels >= 1:
1919            if merged['PRES'][0] != self.prod_desc.missing_float:
1920                pbot = merged['PRES'][0]
1921            elif len(merged['PRES']) > 1:
1922                pbot = merged['PRES'][1]
1923                if pbot < parts['TRPA']['PRES'][1]:
1924                    pbot = 1050
1925            else:
1926                pbot = 1050
1927
1928        if num_trop_levels >= 1:
1929            pbot = self._merge_tropopause_data(merged, parts, 'TRPA', pbot)
1930
1931        if num_above_trop_levels >= 1:
1932            pbot = self._merge_tropopause_data(merged, parts, 'TRPC', pbot)
1933
1934        # Merge SIG temperature
1935        if num_sigt_levels >= 1 or num_above_sigt_levels >= 1:
1936            if merged['PRES'][0] != self.prod_desc.missing_float:
1937                pbot = merged['PRES'][0]
1938            elif len(merged['PRES']) > 1:
1939                pbot = merged['PRES'][1]
1940                if pbot < parts['TTBB']['PRES'][1]:
1941                    pbot = 1050
1942            else:
1943                pbot = 1050
1944
1945        if num_sigt_levels >= 1:
1946            pbot = self._merge_significant_temps(merged, parts, 'TTBB', pbot)
1947
1948        if num_above_sigt_levels >= 1:
1949            pbot = self._merge_significant_temps(merged, parts, 'TTDD', pbot)
1950
1951        # Interpolate heights
1952        _interp_moist_height(merged, self.prod_desc.missing_float)
1953
1954        # Merge SIG winds on pressure surfaces
1955        if not ppbb_is_z or not ppdd_is_z:
1956            if num_sigw_levels >= 1 or num_above_sigw_levels >= 1:
1957                if merged['PRES'][0] != self.prod_desc.missing_float:
1958                    pbot = merged['PRES'][0]
1959                elif len(merged['PRES']) > 1:
1960                    pbot = merged['PRES'][1]
1961                else:
1962                    pbot = 0
1963
1964            if num_sigw_levels >= 1 and not ppbb_is_z:
1965                pbot = self._merge_winds_pressure(merged, parts, 'PPBB', pbot)
1966
1967            if num_above_sigw_levels >= 1 and not ppdd_is_z:
1968                pbot = self._merge_winds_pressure(merged, parts, 'PPDD', pbot)
1969
1970        # Merge max winds on pressure surfaces
1971        if num_max_wind_levels >= 1 or num_above_max_wind_levels >= 1:
1972            if merged['PRES'][0] != self.prod_desc.missing_float:
1973                pbot = merged['PRES'][0]
1974            elif len(merged['PRES']) > 1:
1975                pbot = merged['PRES'][1]
1976            else:
1977                pbot = 0
1978
1979        if num_max_wind_levels >= 1:
1980            pbot = self._merge_winds_pressure(merged, parts, 'MXWA', pbot)
1981
1982        if num_above_max_wind_levels >= 1:
1983            pbot = self._merge_winds_pressure(merged, parts, 'MXWC', pbot)
1984
1985        # Interpolate height for SIG/MAX winds
1986        _interp_logp_height(merged, self.prod_desc.missing_float)
1987
1988        # Merge SIG winds on height surfaces
1989        if ppbb_is_z or ppdd_is_z:
1990            nsgw = num_sigw_levels if ppbb_is_z else 0
1991            nasw = num_above_sigw_levels if ppdd_is_z else 0
1992            if (nsgw >= 1 and (parts['PPBB']['HGHT'][0] == 0
1993               or parts['PPBB']['HGHT'][0] == merged['HGHT'][0])):
1994                istart = 1
1995            else:
1996                istart = 0
1997
1998            size = self._merge_winds_height(merged, parts, nsgw, nasw, istart)
1999
2000            # Interpolate missing pressure with height
2001            _interp_logp_pressure(merged, self.prod_desc.missing_float)
2002
2003        # Interpolate missing data
2004        _interp_logp_data(merged, self.prod_desc.missing_float)
2005
2006        # Add below ground MAN data
2007        if merged['PRES'][0] != self.prod_desc.missing_float and bgl > 0:
2008            for ibgl in range(1, num_man_levels):
2009                press = parts['TTAA']['PRES'][ibgl]
2010                if press > merged['PRES'][0]:
2011                    loc = size - bisect.bisect_left(merged['PRES'][1:][::-1], press)
2012                    merged['PRES'].insert(loc, press)
2013                    merged['TEMP'].insert(loc, parts['TTAA']['TEMP'][ibgl])
2014                    merged['DWPT'].insert(loc, parts['TTAA']['DWPT'][ibgl])
2015                    merged['DRCT'].insert(loc, parts['TTAA']['DRCT'][ibgl])
2016                    merged['SPED'].insert(loc, parts['TTAA']['SPED'][ibgl])
2017                    merged['HGHT'].insert(loc, parts['TTAA']['HGHT'][ibgl])
2018                    size += 1
2019
2020        # Add text data, if it is included
2021        if 'TXTA' in parts:
2022            merged['TXTA'] = parts['TXTA']['TEXT']
2023        if 'TXTB' in parts:
2024            merged['TXTB'] = parts['TXTB']['TEXT']
2025        if 'TXTC' in parts:
2026            merged['TXTC'] = parts['TXTC']['TEXT']
2027        if 'TXPB' in parts:
2028            merged['TXPB'] = parts['TXPB']['TEXT']
2029
2030        return merged
2031
2032    def snxarray(self, station_id=None, station_number=None,
2033                 date_time=None, state=None, country=None):
2034        """Select soundings and output as list of xarray Datasets.
2035
2036        Subset the data by parameter values. The default is to not
2037        subset and return the entire dataset.
2038
2039        Parameters
2040        ----------
2041        station_id : str or array-like of str
2042            Station ID of sounding site.
2043
2044        station_number : int or array-like of int
2045            Station number of sounding site.
2046
2047        date_time : datetime or array-like of datetime
2048            Valid datetime of the grid. Alternatively can be
2049            a string with the format YYYYmmddHHMM.
2050
2051        state : str or array-like of str
2052            State where sounding site is located.
2053
2054        country : str or array-like of str
2055            Country where sounding site is located.
2056
2057        Returns
2058        -------
2059        list
2060            List of xarray.Dataset objects for each sounding.
2061        """
2062        if station_id is not None:
2063            if (not isinstance(station_id, Iterable)
2064               or isinstance(station_id, str)):
2065                station_id = [station_id]
2066            station_id = [c.upper() for c in station_id]
2067
2068        if station_number is not None:
2069            if not isinstance(station_number, Iterable):
2070                station_number = [station_number]
2071            station_number = [int(sn) for sn in station_number]
2072
2073        if date_time is not None:
2074            if (not isinstance(date_time, Iterable)
2075               or isinstance(date_time, str)):
2076                date_time = [date_time]
2077            for i, dt in enumerate(date_time):
2078                if isinstance(dt, str):
2079                    date_time[i] = datetime.strptime(dt, '%Y%m%d%H%M')
2080
2081        if (state is not None
2082           and (not isinstance(state, Iterable)
2083                or isinstance(state, str))):
2084            state = [state]
2085            state = [s.upper() for s in state]
2086
2087        if (country is not None
2088           and (not isinstance(country, Iterable)
2089                or isinstance(country, str))):
2090            country = [country]
2091            country = [c.upper() for c in country]
2092
2093        # Figure out which columns to extract from the file
2094        matched = self._sninfo.copy()
2095
2096        if station_id is not None:
2097            matched = filter(
2098                lambda snd: snd if snd.ID in station_id else False,
2099                matched
2100            )
2101
2102        if station_number is not None:
2103            matched = filter(
2104                lambda snd: snd if snd.NUMBER in station_number else False,
2105                matched
2106            )
2107
2108        if date_time is not None:
2109            matched = filter(
2110                lambda snd: snd if snd.DATTIM in date_time else False,
2111                matched
2112            )
2113
2114        if state is not None:
2115            matched = filter(
2116                lambda snd: snd if snd.STATE in state else False,
2117                matched
2118            )
2119
2120        if country is not None:
2121            matched = filter(
2122                lambda snd: snd if snd.COUNTRY in country else False,
2123                matched
2124            )
2125
2126        matched = list(matched)
2127
2128        if len(matched) < 1:
2129            raise KeyError('No stations were matched with given parameters.')
2130
2131        sndno = [(s.DTNO, s.SNDNO) for s in matched]
2132
2133        if self.merged:
2134            data = self._unpack_merged(sndno)
2135        else:
2136            data = self._unpack_unmerged(sndno)
2137
2138        soundings = []
2139        for snd in data:
2140            if snd is None or 'PRES' not in snd:
2141                continue
2142            station_pressure = snd['PRES'][0]
2143            radat_text = {}
2144            attrs = {
2145                'station_id': snd.pop('STID'),
2146                'station_number': snd.pop('STNM'),
2147                'lat': snd.pop('SLAT'),
2148                'lon': snd.pop('SLON'),
2149                'elevation': snd.pop('SELV'),
2150                'station_pressure': station_pressure,
2151                'state': snd.pop('STAT'),
2152                'country': snd.pop('COUN'),
2153            }
2154
2155            if 'TXTA' in snd:
2156                radat_text['txta'] = snd.pop('TXTA')
2157            if 'TXTB' in snd:
2158                radat_text['txtb'] = snd.pop('TXTB')
2159            if 'TXTC' in snd:
2160                radat_text['txtc'] = snd.pop('TXTC')
2161            if 'TXPB' in snd:
2162                radat_text['txpb'] = snd.pop('TXPB')
2163            if radat_text:
2164                attrs['RADAT'] = radat_text
2165
2166            dt = datetime.combine(snd.pop('DATE'), snd.pop('TIME'))
2167            press = np.array(snd.pop('PRES'))
2168
2169            var = {}
2170            for param, values in snd.items():
2171                values = np.array(values)[np.newaxis, ...]
2172                maskval = np.ma.array(values, mask=values == self.prod_desc.missing_float,
2173                                      dtype=np.float32)
2174                var[param.lower()] = (['time', 'pressure'], maskval)
2175
2176            xrds = xr.Dataset(var,
2177                              coords={'time': np.atleast_1d(dt), 'pressure': press},
2178                              attrs=attrs)
2179
2180            # Sort to fix GEMPAK surface data at first level
2181            xrds = xrds.sortby('pressure', ascending=False)
2182
2183            soundings.append(xrds)
2184        return soundings
2185
2186
2187@exporter.export
2188class GempakSurface(GempakFile):
2189    """Subclass of GempakFile specific to GEMPAK surface data."""
2190
2191    def __init__(self, file, *args, **kwargs):
2192        super().__init__(file)
2193
2194        # Row Headers
2195        self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.row_headers_ptr))
2196        self.row_headers = []
2197        row_headers_info = self._key_types(self.row_keys)
2198        row_headers_info.extend([(None, None)])
2199        row_headers_fmt = NamedStruct(row_headers_info, self.prefmt, 'RowHeaders')
2200        for _ in range(1, self.prod_desc.rows + 1):
2201            if self._buffer.read_int(4, self.endian, False) == USED_FLAG:
2202                self.row_headers.append(self._buffer.read_struct(row_headers_fmt))
2203
2204        # Column Headers
2205        self._buffer.jump_to(self._start, _word_to_position(self.prod_desc.column_headers_ptr))
2206        self.column_headers = []
2207        column_headers_info = self._key_types(self.column_keys)
2208        column_headers_info.extend([(None, None)])
2209        column_headers_fmt = NamedStruct(column_headers_info, self.prefmt, 'ColumnHeaders')
2210        for _ in range(1, self.prod_desc.columns + 1):
2211            if self._buffer.read_int(4, self.endian, False) == USED_FLAG:
2212                self.column_headers.append(self._buffer.read_struct(column_headers_fmt))
2213
2214        self._get_surface_type()
2215
2216        self._sfinfo = []
2217        if self.surface_type == 'standard':
2218            for irow, row_head in enumerate(self.row_headers):
2219                for icol, col_head in enumerate(self.column_headers):
2220                    pointer = (self.prod_desc.data_block_ptr
2221                               + (irow * self.prod_desc.columns * self.prod_desc.parts)
2222                               + (icol * self.prod_desc.parts))
2223
2224                    self._buffer.jump_to(self._start, _word_to_position(pointer))
2225                    data_ptr = self._buffer.read_int(4, self.endian, False)
2226
2227                    if data_ptr:
2228                        self._sfinfo.append(
2229                            Surface(
2230                                irow,
2231                                icol,
2232                                datetime.combine(row_head.DATE, row_head.TIME),
2233                                col_head.STID + col_head.STD2,
2234                                col_head.STNM,
2235                                col_head.SLAT,
2236                                col_head.SLON,
2237                                col_head.SELV,
2238                                col_head.STAT,
2239                                col_head.COUN,
2240                            )
2241                        )
2242        elif self.surface_type == 'ship':
2243            irow = 0
2244            for icol, col_head in enumerate(self.column_headers):
2245                pointer = (self.prod_desc.data_block_ptr
2246                           + (irow * self.prod_desc.columns * self.prod_desc.parts)
2247                           + (icol * self.prod_desc.parts))
2248
2249                self._buffer.jump_to(self._start, _word_to_position(pointer))
2250                data_ptr = self._buffer.read_int(4, self.endian, False)
2251
2252                if data_ptr:
2253                    self._sfinfo.append(
2254                        Surface(
2255                            irow,
2256                            icol,
2257                            datetime.combine(col_head.DATE, col_head.TIME),
2258                            col_head.STID + col_head.STD2,
2259                            col_head.STNM,
2260                            col_head.SLAT,
2261                            col_head.SLON,
2262                            col_head.SELV,
2263                            col_head.STAT,
2264                            col_head.COUN,
2265                        )
2266                    )
2267        elif self.surface_type == 'climate':
2268            for icol, col_head in enumerate(self.column_headers):
2269                for irow, row_head in enumerate(self.row_headers):
2270                    pointer = (self.prod_desc.data_block_ptr
2271                               + (irow * self.prod_desc.columns * self.prod_desc.parts)
2272                               + (icol * self.prod_desc.parts))
2273
2274                    self._buffer.jump_to(self._start, _word_to_position(pointer))
2275                    data_ptr = self._buffer.read_int(4, self.endian, False)
2276
2277                    if data_ptr:
2278                        self._sfinfo.append(
2279                            Surface(
2280                                irow,
2281                                icol,
2282                                datetime.combine(col_head.DATE, col_head.TIME),
2283                                row_head.STID + row_head.STD2,
2284                                row_head.STNM,
2285                                row_head.SLAT,
2286                                row_head.SLON,
2287                                row_head.SELV,
2288                                row_head.STAT,
2289                                row_head.COUN,
2290                            )
2291                        )
2292        else:
2293            raise TypeError('Unknown surface type {}'.format(self.surface_type))
2294
2295    def sfinfo(self):
2296        """Return station information."""
2297        return self._sfinfo
2298
2299    def _get_surface_type(self):
2300        """Determine type of surface file."""
2301        if len(self.row_headers) == 1:
2302            self.surface_type = 'ship'
2303        elif 'DATE' in self.row_keys:
2304            self.surface_type = 'standard'
2305        elif 'DATE' in self.column_keys:
2306            self.surface_type = 'climate'
2307        else:
2308            raise TypeError('Unknown surface data type')
2309
2310    def _key_types(self, keys):
2311        """Determine header information from a set of keys."""
2312        header_info = [(key, '4s', self._decode_strip) if key == 'STID'
2313                       else (key, 'i') if key == 'STNM'
2314                       else (key, 'i', lambda x: x / 100) if key == 'SLAT'
2315                       else (key, 'i', lambda x: x / 100) if key == 'SLON'
2316                       else (key, 'i') if key == 'SELV'
2317                       else (key, '4s', self._decode_strip) if key == 'STAT'
2318                       else (key, '4s', self._decode_strip) if key == 'COUN'
2319                       else (key, '4s', self._decode_strip) if key == 'STD2'
2320                       else (key, 'i', self._make_date) if key == 'DATE'
2321                       else (key, 'i', self._make_time) if key == 'TIME'
2322                       else (key, 'i')
2323                       for key in keys]
2324
2325        return header_info
2326
2327    def _unpack_climate(self, sfcno):
2328        """Unpack a climate surface data file."""
2329        stations = []
2330        for icol, col_head in enumerate(self.column_headers):
2331            for irow, row_head in enumerate(self.row_headers):
2332                if (irow, icol) not in sfcno:
2333                    continue
2334                station = {'STID': row_head.STID,
2335                           'STNM': row_head.STNM,
2336                           'SLAT': row_head.SLAT,
2337                           'SLON': row_head.SLON,
2338                           'SELV': row_head.SELV,
2339                           'STAT': row_head.STAT,
2340                           'COUN': row_head.COUN,
2341                           'STD2': row_head.STD2,
2342                           'SPRI': row_head.SPRI,
2343                           'DATE': col_head.DATE,
2344                           'TIME': col_head.TIME,
2345                           }
2346                for iprt, part in enumerate(self.parts):
2347                    pointer = (self.prod_desc.data_block_ptr
2348                               + (irow * self.prod_desc.columns * self.prod_desc.parts)
2349                               + (icol * self.prod_desc.parts + iprt))
2350                    self._buffer.jump_to(self._start, _word_to_position(pointer))
2351                    self.data_ptr = self._buffer.read_int(4, self.endian, False)
2352                    if not self.data_ptr:
2353                        continue
2354                    self._buffer.jump_to(self._start, _word_to_position(self.data_ptr))
2355                    self.data_header_length = self._buffer.read_int(4, self.endian, False)
2356                    data_header = self._buffer.set_mark()
2357                    self._buffer.jump_to(data_header,
2358                                         _word_to_position(part.header_length + 1))
2359                    lendat = self.data_header_length - part.header_length
2360
2361                    fmt_code = {
2362                        DataTypes.real: 'f',
2363                        DataTypes.realpack: 'i',
2364                        DataTypes.character: 's',
2365                    }.get(part.data_type)
2366
2367                    if fmt_code is None:
2368                        raise NotImplementedError('No methods for data type {}'
2369                                                  .format(part.data_type))
2370                    packed_buffer = (
2371                        self._buffer.read_struct(
2372                            struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')
2373                        )
2374                    )
2375
2376                    parameters = self.parameters[iprt]
2377
2378                    if part.data_type == DataTypes.realpack:
2379                        unpacked = self._unpack_real(packed_buffer, parameters, lendat)
2380                        for iprm, param in enumerate(parameters['name']):
2381                            station[param] = unpacked[iprm]
2382                    elif part.data_type == DataTypes.character:
2383                        for iprm, param in enumerate(parameters['name']):
2384                            station[param] = self._decode_strip(packed_buffer[iprm])
2385                    else:
2386                        for iprm, param in enumerate(parameters['name']):
2387                            station[param] = np.array(
2388                                packed_buffer[iprm], dtype=np.float32
2389                            )
2390
2391                stations.append(station)
2392        return stations
2393
2394    def _unpack_ship(self, sfcno):
2395        """Unpack ship (moving observation) surface data file."""
2396        stations = []
2397        irow = 0
2398        for icol, col_head in enumerate(self.column_headers):
2399            if (irow, icol) not in sfcno:
2400                continue
2401            station = {'STID': col_head.STID,
2402                       'STNM': col_head.STNM,
2403                       'SLAT': col_head.SLAT,
2404                       'SLON': col_head.SLON,
2405                       'SELV': col_head.SELV,
2406                       'STAT': col_head.STAT,
2407                       'COUN': col_head.COUN,
2408                       'STD2': col_head.STD2,
2409                       'SPRI': col_head.SPRI,
2410                       'DATE': col_head.DATE,
2411                       'TIME': col_head.TIME,
2412                       }
2413            for iprt, part in enumerate(self.parts):
2414                pointer = (self.prod_desc.data_block_ptr
2415                           + (irow * self.prod_desc.columns * self.prod_desc.parts)
2416                           + (icol * self.prod_desc.parts + iprt))
2417                self._buffer.jump_to(self._start, _word_to_position(pointer))
2418                self.data_ptr = self._buffer.read_int(4, self.endian, False)
2419                if not self.data_ptr:
2420                    continue
2421                self._buffer.jump_to(self._start, _word_to_position(self.data_ptr))
2422                self.data_header_length = self._buffer.read_int(4, self.endian, False)
2423                data_header = self._buffer.set_mark()
2424                self._buffer.jump_to(data_header,
2425                                     _word_to_position(part.header_length + 1))
2426                lendat = self.data_header_length - part.header_length
2427
2428                fmt_code = {
2429                    DataTypes.real: 'f',
2430                    DataTypes.realpack: 'i',
2431                    DataTypes.character: 's',
2432                }.get(part.data_type)
2433
2434                if fmt_code is None:
2435                    raise NotImplementedError('No methods for data type {}'
2436                                              .format(part.data_type))
2437                packed_buffer = (
2438                    self._buffer.read_struct(
2439                        struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')
2440                    )
2441                )
2442
2443                parameters = self.parameters[iprt]
2444
2445                if part.data_type == DataTypes.realpack:
2446                    unpacked = self._unpack_real(packed_buffer, parameters, lendat)
2447                    for iprm, param in enumerate(parameters['name']):
2448                        station[param] = unpacked[iprm]
2449                elif part.data_type == DataTypes.character:
2450                    for iprm, param in enumerate(parameters['name']):
2451                        station[param] = self._decode_strip(packed_buffer[iprm])
2452                else:
2453                    for iprm, param in enumerate(parameters['name']):
2454                        station[param] = np.array(
2455                            packed_buffer[iprm], dtype=np.float32
2456                        )
2457
2458            stations.append(station)
2459        return stations
2460
2461    def _unpack_standard(self, sfcno):
2462        """Unpack a standard surface data file."""
2463        stations = []
2464        for irow, row_head in enumerate(self.row_headers):
2465            for icol, col_head in enumerate(self.column_headers):
2466                if (irow, icol) not in sfcno:
2467                    continue
2468                station = {'STID': col_head.STID,
2469                           'STNM': col_head.STNM,
2470                           'SLAT': col_head.SLAT,
2471                           'SLON': col_head.SLON,
2472                           'SELV': col_head.SELV,
2473                           'STAT': col_head.STAT,
2474                           'COUN': col_head.COUN,
2475                           'STD2': col_head.STD2,
2476                           'SPRI': col_head.SPRI,
2477                           'DATE': row_head.DATE,
2478                           'TIME': row_head.TIME,
2479                           }
2480                for iprt, part in enumerate(self.parts):
2481                    pointer = (self.prod_desc.data_block_ptr
2482                               + (irow * self.prod_desc.columns * self.prod_desc.parts)
2483                               + (icol * self.prod_desc.parts + iprt))
2484                    self._buffer.jump_to(self._start, _word_to_position(pointer))
2485                    self.data_ptr = self._buffer.read_int(4, self.endian, False)
2486                    if not self.data_ptr:
2487                        continue
2488                    self._buffer.jump_to(self._start, _word_to_position(self.data_ptr))
2489                    self.data_header_length = self._buffer.read_int(4, self.endian, False)
2490                    data_header = self._buffer.set_mark()
2491                    self._buffer.jump_to(data_header,
2492                                         _word_to_position(part.header_length + 1))
2493                    lendat = self.data_header_length - part.header_length
2494
2495                    fmt_code = {
2496                        DataTypes.real: 'f',
2497                        DataTypes.realpack: 'i',
2498                        DataTypes.character: 's',
2499                    }.get(part.data_type)
2500
2501                    if fmt_code is None:
2502                        raise NotImplementedError('No methods for data type {}'
2503                                                  .format(part.data_type))
2504                    packed_buffer = (
2505                        self._buffer.read_struct(
2506                            struct.Struct(f'{self.prefmt}{lendat}{fmt_code}')
2507                        )
2508                    )
2509
2510                    parameters = self.parameters[iprt]
2511
2512                    if part.data_type == DataTypes.realpack:
2513                        unpacked = self._unpack_real(packed_buffer, parameters, lendat)
2514                        for iprm, param in enumerate(parameters['name']):
2515                            station[param] = unpacked[iprm]
2516                    elif part.data_type == DataTypes.character:
2517                        for iprm, param in enumerate(parameters['name']):
2518                            station[param] = self._decode_strip(packed_buffer[iprm])
2519                    else:
2520                        for iprm, param in enumerate(parameters['name']):
2521                            station[param] = packed_buffer[iprm]
2522
2523                stations.append(station)
2524        return stations
2525
2526    def nearest_time(self, date_time, station_id=None, station_number=None):
2527        """Get nearest observation to given time for selected stations.
2528
2529        Parameters
2530        ----------
2531        date_time : datetime or array-like of datetime
2532            Valid/observed datetime of the surface station. Alternatively
2533            object or a string with the format YYYYmmddHHMM.
2534
2535        station_id : str or array-like of str
2536            Station ID of the surface station.
2537
2538        station_number : int or array-like of int
2539            Station number of the surface station.
2540
2541        Returns
2542        -------
2543        list
2544            List of dicts/JSONs for each surface station.
2545
2546        Notes
2547        -----
2548        One of either station_id or station_number must be used. If both
2549        are present, station_id will take precedence.
2550        """
2551        if isinstance(date_time, str):
2552            date_time = datetime.strptime(date_time, '%Y%m%d%H%M')
2553
2554        if station_id is None and station_number is None:
2555            raise ValueError('Must have either station_id or station_number')
2556
2557        if station_id is not None and station_number is not None:
2558            station_number = None
2559
2560        if (station_id is not None
2561           and (not isinstance(station_id, Iterable)
2562                or isinstance(station_id, str))):
2563            station_id = [station_id]
2564            station_id = [c.upper() for c in station_id]
2565
2566        if station_number is not None and not isinstance(station_number, Iterable):
2567            station_number = [station_number]
2568            station_number = [int(sn) for sn in station_number]
2569
2570        time_matched = []
2571        if station_id:
2572            for stn in station_id:
2573                matched = self.sfjson(station_id=stn)
2574
2575                nearest = min(
2576                    matched,
2577                    key=lambda d: abs(d['properties']['date_time'] - date_time)
2578                )
2579
2580                time_matched.append(nearest)
2581
2582        if station_number:
2583            for stn in station_id:
2584                matched = self.sfjson(station_number=stn)
2585
2586                nearest = min(
2587                    matched,
2588                    key=lambda d: abs(d['properties']['date_time'] - date_time)
2589                )
2590
2591                time_matched.append(nearest)
2592
2593        return time_matched
2594
2595    def sfjson(self, station_id=None, station_number=None,
2596               date_time=None, state=None, country=None):
2597        """Select surface stations and output as list of JSON objects.
2598
2599        Subset the data by parameter values. The default is to not
2600        subset and return the entire dataset.
2601
2602        Parameters
2603        ----------
2604        station_id : str or array-like of str
2605            Station ID of the surface station.
2606
2607        station_number : int or array-like of int
2608            Station number of the surface station.
2609
2610        date_time : datetime or array-like of datetime
2611            Valid datetime of the grid. Alternatively can be
2612            a string with the format YYYYmmddHHMM.
2613
2614        state : str or array-like of str
2615            State where surface station is located.
2616
2617        country : str or array-like of str
2618            Country where surface station is located.
2619
2620        Returns
2621        -------
2622        list
2623            List of dicts/JSONs for each surface station.
2624        """
2625        if (station_id is not None
2626           and (not isinstance(station_id, Iterable)
2627                or isinstance(station_id, str))):
2628            station_id = [station_id]
2629            station_id = [c.upper() for c in station_id]
2630
2631        if station_number is not None and not isinstance(station_number, Iterable):
2632            station_number = [station_number]
2633            station_number = [int(sn) for sn in station_number]
2634
2635        if date_time is not None:
2636            if (not isinstance(date_time, Iterable)
2637               or isinstance(date_time, str)):
2638                date_time = [date_time]
2639            for i, dt in enumerate(date_time):
2640                if isinstance(dt, str):
2641                    date_time[i] = datetime.strptime(dt, '%Y%m%d%H%M')
2642
2643        if (state is not None
2644           and (not isinstance(state, Iterable)
2645                or isinstance(state, str))):
2646            state = [state]
2647            state = [s.upper() for s in state]
2648
2649        if (country is not None
2650           and (not isinstance(country, Iterable)
2651                or isinstance(country, str))):
2652            country = [country]
2653            country = [c.upper() for c in country]
2654
2655        # Figure out which columns to extract from the file
2656        matched = self._sfinfo.copy()
2657
2658        if station_id is not None:
2659            matched = filter(
2660                lambda sfc: sfc if sfc.ID in station_id else False,
2661                matched
2662            )
2663
2664        if station_number is not None:
2665            matched = filter(
2666                lambda sfc: sfc if sfc.NUMBER in station_number else False,
2667                matched
2668            )
2669
2670        if date_time is not None:
2671            matched = filter(
2672                lambda sfc: sfc if sfc.DATTIM in date_time else False,
2673                matched
2674            )
2675
2676        if state is not None:
2677            matched = filter(
2678                lambda sfc: sfc if sfc.STATE in state else False,
2679                matched
2680            )
2681
2682        if country is not None:
2683            matched = filter(
2684                lambda sfc: sfc if sfc.COUNTRY in country else False,
2685                matched
2686            )
2687
2688        matched = list(matched)
2689
2690        if len(matched) < 1:
2691            raise KeyError('No stations were matched with given parameters.')
2692
2693        sfcno = [(s.ROW, s.COL) for s in matched]
2694
2695        if self.surface_type == 'standard':
2696            data = self._unpack_standard(sfcno)
2697        elif self.surface_type == 'ship':
2698            data = self._unpack_ship(sfcno)
2699        elif self.surface_type == 'climate':
2700            data = self._unpack_climate(sfcno)
2701        else:
2702            raise ValueError(f'Unknown surface data type: {self.surface_type}')
2703
2704        stnarr = []
2705        for stn in data:
2706            stnobj = {}
2707            stnobj['properties'] = {}
2708            stnobj['values'] = {}
2709            stnobj['properties']['date_time'] = datetime.combine(stn.pop('DATE'),
2710                                                                 stn.pop('TIME'))
2711            stnobj['properties']['station_id'] = stn.pop('STID') + stn.pop('STD2')
2712            stnobj['properties']['station_number'] = stn.pop('STNM')
2713            stnobj['properties']['longitude'] = stn.pop('SLON')
2714            stnobj['properties']['latitude'] = stn.pop('SLAT')
2715            stnobj['properties']['elevation'] = stn.pop('SELV')
2716            stnobj['properties']['state'] = stn.pop('STAT')
2717            stnobj['properties']['country'] = stn.pop('COUN')
2718            stnobj['properties']['priority'] = stn.pop('SPRI')
2719            if stn:
2720                for name, ob in stn.items():
2721                    stnobj['values'][name.lower()] = ob
2722                stnarr.append(stnobj)
2723
2724        return stnarr
2725