1import glob
2import os
3import re
4from collections import namedtuple
5from stat import ST_CTIME
6
7import numpy as np
8
9from yt.data_objects.index_subobjects.grid_patch import AMRGridPatch
10from yt.data_objects.static_output import Dataset
11from yt.funcs import mylog, setdefaultattr
12from yt.geometry.grid_geometry_handler import GridIndex
13from yt.utilities.io_handler import io_registry
14from yt.utilities.lib.misc_utilities import get_box_grids_level
15from yt.utilities.parallel_tools.parallel_analysis_interface import parallel_root_only
16
17from .fields import (
18    BoxlibFieldInfo,
19    CastroFieldInfo,
20    MaestroFieldInfo,
21    NyxFieldInfo,
22    WarpXFieldInfo,
23)
24
25# This is what we use to find scientific notation that might include d's
26# instead of e's.
27_scinot_finder = re.compile(r"[-+]?[0-9]*\.?[0-9]+([eEdD][-+]?[0-9]+)?")
28# This is the dimensions in the Cell_H file for each level
29# It is different for different dimensionalities, so we make a list
30_1dregx = r"-?\d+"
31_2dregx = r"-?\d+,-?\d+"
32_3dregx = r"-?\d+,-?\d+,-?\d+"
33_dim_finder = [
34    re.compile(rf"\(\(({ndregx})\) \(({ndregx})\) \({ndregx}\)\)$")
35    for ndregx in (_1dregx, _2dregx, _3dregx)
36]
37# This is the line that prefixes each set of data for a FAB in the FAB file
38# It is different for different dimensionalities, so we make a list
39_endian_regex = r"^FAB\ \(\(\d+,\ \([\d\ ]+\)\),\((\d+),\ \(([\d\ ]+)\)\)\)"
40_header_pattern = [
41    re.compile(
42        rf"""{_endian_regex}            # match `endianness`
43        \(
44              \(( {ndregx} )\)          # match `start`
45            \ \(( {ndregx} )\)          # match `end`
46            \ \(( {ndregx} )\)          # match `centering`
47        \)
48        \ (-?\d+)                       # match `nc`
49        $ # end of line
50        """,
51        re.VERBOSE,
52    )
53    for ndregx in (_1dregx, _2dregx, _3dregx)
54]
55
56
57class BoxlibGrid(AMRGridPatch):
58    _id_offset = 0
59    _offset = -1
60
61    def __init__(self, grid_id, offset, filename=None, index=None):
62        super().__init__(grid_id, filename, index)
63        self._base_offset = offset
64        self._parent_id = []
65        self._children_ids = []
66        self._pdata = {}
67
68    def _prepare_grid(self):
69        super()._prepare_grid()
70        my_ind = self.id - self._id_offset
71        self.start_index = self.index.grid_start_index[my_ind]
72
73    def get_global_startindex(self):
74        return self.start_index
75
76    def _setup_dx(self):
77        # has already been read in and stored in index
78        self.dds = self.index.ds.arr(self.index.level_dds[self.Level, :], "code_length")
79        self.field_data["dx"], self.field_data["dy"], self.field_data["dz"] = self.dds
80
81    def __repr__(self):
82        return "BoxlibGrid_%04i" % (self.id)
83
84    @property
85    def Parent(self):
86        if len(self._parent_id) == 0:
87            return None
88        return [self.index.grids[pid - self._id_offset] for pid in self._parent_id]
89
90    @property
91    def Children(self):
92        return [self.index.grids[cid - self._id_offset] for cid in self._children_ids]
93
94    def _get_offset(self, f):
95        # This will either seek to the _offset or figure out the correct
96        # _offset.
97        if self._offset == -1:
98            f.seek(self._base_offset, os.SEEK_SET)
99            f.readline()
100            self._offset = f.tell()
101        return self._offset
102
103    # We override here because we can have varying refinement levels
104    def select_ires(self, dobj):
105        mask = self._get_selector_mask(dobj.selector)
106        if mask is None:
107            return np.empty(0, dtype="int64")
108        coords = np.empty(self._last_count, dtype="int64")
109        coords[:] = self.Level + self.ds.level_offsets[self.Level]
110        return coords
111
112    # Override this as well, since refine_by can vary
113    def _fill_child_mask(self, child, mask, tofill, dlevel=1):
114        rf = self.ds.ref_factors[self.Level]
115        if dlevel != 1:
116            raise NotImplementedError
117        gi, cgi = self.get_global_startindex(), child.get_global_startindex()
118        startIndex = np.maximum(0, cgi // rf - gi)
119        endIndex = np.minimum(
120            (cgi + child.ActiveDimensions) // rf - gi, self.ActiveDimensions
121        )
122        endIndex += startIndex == endIndex
123        mask[
124            startIndex[0] : endIndex[0],
125            startIndex[1] : endIndex[1],
126            startIndex[2] : endIndex[2],
127        ] = tofill
128
129
130class BoxLibParticleHeader:
131    def __init__(self, ds, directory_name, is_checkpoint, extra_field_names=None):
132
133        self.particle_type = directory_name
134        header_filename = os.path.join(ds.output_dir, directory_name, "Header")
135        with open(header_filename) as f:
136            self.version_string = f.readline().strip()
137
138            particle_real_type = self.version_string.split("_")[-1]
139            known_real_types = {"double": np.float64, "single": np.float32}
140            try:
141                self.real_type = known_real_types[particle_real_type]
142            except KeyError:
143                mylog.warning(
144                    "yt did not recognize particle real type '%s'. Assuming 'double'.",
145                    particle_real_type,
146                )
147                self.real_type = known_real_types["double"]
148
149            self.int_type = np.int32
150
151            self.dim = int(f.readline().strip())
152            self.num_int_base = 2 + self.dim
153            self.num_real_base = self.dim
154            self.num_int_extra = 0  # this should be written by Boxlib, but isn't
155            self.num_real_extra = int(f.readline().strip())
156            self.num_int = self.num_int_base + self.num_int_extra
157            self.num_real = self.num_real_base + self.num_real_extra
158            self.num_particles = int(f.readline().strip())
159            self.max_next_id = int(f.readline().strip())
160            self.finest_level = int(f.readline().strip())
161            self.num_levels = self.finest_level + 1
162
163            # Boxlib particles can be written in checkpoint or plotfile mode
164            # The base integer fields are only there for checkpoints, but some
165            # codes use the checkpoint format for plotting
166            if not is_checkpoint:
167                self.num_int_base = 0
168                self.num_int_extra = 0
169                self.num_int = 0
170
171            self.grids_per_level = np.zeros(self.num_levels, dtype="int64")
172            self.data_map = {}
173            for level_num in range(self.num_levels):
174                self.grids_per_level[level_num] = int(f.readline().strip())
175                self.data_map[level_num] = {}
176
177            pfd = namedtuple(
178                "ParticleFileDescriptor", ["file_number", "num_particles", "offset"]
179            )
180
181            for level_num in range(self.num_levels):
182                for grid_num in range(self.grids_per_level[level_num]):
183                    entry = [int(val) for val in f.readline().strip().split()]
184                    self.data_map[level_num][grid_num] = pfd(*entry)
185
186        self._generate_particle_fields(extra_field_names)
187
188    def _generate_particle_fields(self, extra_field_names):
189
190        # these are the 'base' integer fields
191        self.known_int_fields = [
192            (self.particle_type, "particle_id"),
193            (self.particle_type, "particle_cpu"),
194            (self.particle_type, "particle_cell_x"),
195            (self.particle_type, "particle_cell_y"),
196            (self.particle_type, "particle_cell_z"),
197        ]
198        self.known_int_fields = self.known_int_fields[0 : self.num_int_base]
199
200        # these are extra integer fields
201        extra_int_fields = [
202            "particle_int_comp%d" % i for i in range(self.num_int_extra)
203        ]
204        self.known_int_fields.extend(
205            [(self.particle_type, field) for field in extra_int_fields]
206        )
207
208        # these are the base real fields
209        self.known_real_fields = [
210            (self.particle_type, "particle_position_x"),
211            (self.particle_type, "particle_position_y"),
212            (self.particle_type, "particle_position_z"),
213        ]
214        self.known_real_fields = self.known_real_fields[0 : self.num_real_base]
215
216        # these are the extras
217        if extra_field_names is not None:
218            assert len(extra_field_names) == self.num_real_extra
219        else:
220            extra_field_names = [
221                "particle_real_comp%d" % i for i in range(self.num_real_extra)
222            ]
223
224        self.known_real_fields.extend(
225            [(self.particle_type, field) for field in extra_field_names]
226        )
227
228        self.known_fields = self.known_int_fields + self.known_real_fields
229
230        self.particle_int_dtype = np.dtype(
231            [(t[1], self.int_type) for t in self.known_int_fields]
232        )
233
234        self.particle_real_dtype = np.dtype(
235            [(t[1], self.real_type) for t in self.known_real_fields]
236        )
237
238
239class AMReXParticleHeader:
240    def __init__(self, ds, directory_name, is_checkpoint, extra_field_names=None):
241
242        self.particle_type = directory_name
243        header_filename = os.path.join(ds.output_dir, directory_name, "Header")
244        self.real_component_names = []
245        self.int_component_names = []
246        with open(header_filename) as f:
247            self.version_string = f.readline().strip()
248
249            particle_real_type = self.version_string.split("_")[-1]
250
251            if particle_real_type == "double":
252                self.real_type = np.float64
253            elif particle_real_type == "single":
254                self.real_type = np.float32
255            else:
256                raise RuntimeError("yt did not recognize particle real type.")
257            self.int_type = np.int32
258
259            self.dim = int(f.readline().strip())
260            self.num_int_base = 2
261            self.num_real_base = self.dim
262            self.num_real_extra = int(f.readline().strip())
263            for _ in range(self.num_real_extra):
264                self.real_component_names.append(f.readline().strip())
265            self.num_int_extra = int(f.readline().strip())
266            for _ in range(self.num_int_extra):
267                self.int_component_names.append(f.readline().strip())
268            self.num_int = self.num_int_base + self.num_int_extra
269            self.num_real = self.num_real_base + self.num_real_extra
270            self.is_checkpoint = bool(int(f.readline().strip()))
271            self.num_particles = int(f.readline().strip())
272            self.max_next_id = int(f.readline().strip())
273            self.finest_level = int(f.readline().strip())
274            self.num_levels = self.finest_level + 1
275
276            if not self.is_checkpoint:
277                self.num_int_base = 0
278                self.num_int_extra = 0
279                self.num_int = 0
280
281            self.grids_per_level = np.zeros(self.num_levels, dtype="int64")
282            self.data_map = {}
283            for level_num in range(self.num_levels):
284                self.grids_per_level[level_num] = int(f.readline().strip())
285                self.data_map[level_num] = {}
286
287            pfd = namedtuple(
288                "ParticleFileDescriptor", ["file_number", "num_particles", "offset"]
289            )
290
291            for level_num in range(self.num_levels):
292                for grid_num in range(self.grids_per_level[level_num]):
293                    entry = [int(val) for val in f.readline().strip().split()]
294                    self.data_map[level_num][grid_num] = pfd(*entry)
295
296        self._generate_particle_fields()
297
298    def _generate_particle_fields(self):
299
300        # these are the 'base' integer fields
301        self.known_int_fields = [
302            (self.particle_type, "particle_id"),
303            (self.particle_type, "particle_cpu"),
304        ]
305        self.known_int_fields = self.known_int_fields[0 : self.num_int_base]
306
307        self.known_int_fields.extend(
308            [
309                (self.particle_type, "particle_" + field)
310                for field in self.int_component_names
311            ]
312        )
313
314        # these are the base real fields
315        self.known_real_fields = [
316            (self.particle_type, "particle_position_x"),
317            (self.particle_type, "particle_position_y"),
318            (self.particle_type, "particle_position_z"),
319        ]
320        self.known_real_fields = self.known_real_fields[0 : self.num_real_base]
321
322        self.known_real_fields.extend(
323            [
324                (self.particle_type, "particle_" + field)
325                for field in self.real_component_names
326            ]
327        )
328
329        self.known_fields = self.known_int_fields + self.known_real_fields
330
331        self.particle_int_dtype = np.dtype(
332            [(t[1], self.int_type) for t in self.known_int_fields]
333        )
334
335        self.particle_real_dtype = np.dtype(
336            [(t[1], self.real_type) for t in self.known_real_fields]
337        )
338
339
340class BoxlibHierarchy(GridIndex):
341
342    grid = BoxlibGrid
343
344    def __init__(self, ds, dataset_type="boxlib_native"):
345        self.dataset_type = dataset_type
346        self.header_filename = os.path.join(ds.output_dir, "Header")
347        self.directory = ds.output_dir
348        self.particle_headers = {}
349
350        GridIndex.__init__(self, ds, dataset_type)
351        self._cache_endianness(self.grids[-1])
352
353    def _parse_index(self):
354        """
355        read the global header file for an Boxlib plotfile output.
356        """
357        self.max_level = self.dataset._max_level
358        header_file = open(self.header_filename)
359
360        self.dimensionality = self.dataset.dimensionality
361        _our_dim_finder = _dim_finder[self.dimensionality - 1]
362        DRE = self.dataset.domain_right_edge  # shortcut
363        DLE = self.dataset.domain_left_edge  # shortcut
364
365        # We can now skip to the point in the file we want to start parsing.
366        header_file.seek(self.dataset._header_mesh_start)
367
368        dx = []
369        for i in range(self.max_level + 1):
370            dx.append([float(v) for v in next(header_file).split()])
371            # account for non-3d data sets
372            if self.dimensionality < 2:
373                dx[i].append(DRE[1] - DLE[1])
374            if self.dimensionality < 3:
375                dx[i].append(DRE[2] - DLE[1])
376        self.level_dds = np.array(dx, dtype="float64")
377        next(header_file)
378        if self.ds.geometry == "cartesian":
379            default_ybounds = (0.0, 1.0)
380            default_zbounds = (0.0, 1.0)
381        elif self.ds.geometry == "cylindrical":
382            # Now we check for dimensionality issues
383            if self.dimensionality != 2:
384                raise RuntimeError("yt needs cylindrical to be 2D")
385            self.level_dds[:, 2] = 2 * np.pi
386            default_zbounds = (0.0, 2 * np.pi)
387        elif self.ds.geometry == "spherical":
388            # BoxLib only supports 1D spherical, so ensure
389            # the other dimensions have the right extent.
390            self.level_dds[:, 1] = np.pi
391            self.level_dds[:, 2] = 2 * np.pi
392            default_ybounds = (0.0, np.pi)
393            default_zbounds = (0.0, 2 * np.pi)
394        else:
395            raise RuntimeError("Unknown BoxLib coordinate system.")
396        if int(next(header_file)) != 0:
397            raise RuntimeError("INTERNAL ERROR! This should be a zero.")
398
399        # each level is one group with ngrids on it.
400        # each grid has self.dimensionality number of lines of 2 reals
401        self.grids = []
402        grid_counter = 0
403        for level in range(self.max_level + 1):
404            vals = next(header_file).split()
405            lev, ngrids = int(vals[0]), int(vals[1])
406            assert lev == level
407            nsteps = int(next(header_file))  # NOQA
408            for gi in range(ngrids):
409                xlo, xhi = (float(v) for v in next(header_file).split())
410                if self.dimensionality > 1:
411                    ylo, yhi = (float(v) for v in next(header_file).split())
412                else:
413                    ylo, yhi = default_ybounds
414                if self.dimensionality > 2:
415                    zlo, zhi = (float(v) for v in next(header_file).split())
416                else:
417                    zlo, zhi = default_zbounds
418                self.grid_left_edge[grid_counter + gi, :] = [xlo, ylo, zlo]
419                self.grid_right_edge[grid_counter + gi, :] = [xhi, yhi, zhi]
420            # Now we get to the level header filename, which we open and parse.
421            fn = os.path.join(self.dataset.output_dir, next(header_file).strip())
422            level_header_file = open(fn + "_H")
423            level_dir = os.path.dirname(fn)
424            # We skip the first two lines, which contain BoxLib header file
425            # version and 'how' the data was written
426            next(level_header_file)
427            next(level_header_file)
428            # Now we get the number of components
429            ncomp_this_file = int(next(level_header_file))  # NOQA
430            # Skip the next line, which contains the number of ghost zones
431            next(level_header_file)
432            # To decipher this next line, we expect something like:
433            # (8 0
434            # where the first is the number of FABs in this level.
435            ngrids = int(next(level_header_file).split()[0][1:])
436            # Now we can iterate over each and get the indices.
437            for gi in range(ngrids):
438                # components within it
439                start, stop = _our_dim_finder.match(next(level_header_file)).groups()
440                # fix for non-3d data
441                # note we append '0' to both ends b/c of the '+1' in dims below
442                start += ",0" * (3 - self.dimensionality)
443                stop += ",0" * (3 - self.dimensionality)
444                start = np.array(start.split(","), dtype="int64")
445                stop = np.array(stop.split(","), dtype="int64")
446                dims = stop - start + 1
447                self.grid_dimensions[grid_counter + gi, :] = dims
448                self.grid_start_index[grid_counter + gi, :] = start
449            # Now we read two more lines.  The first of these is a close
450            # parenthesis.
451            next(level_header_file)
452            # The next is again the number of grids
453            next(level_header_file)
454            # Now we iterate over grids to find their offsets in each file.
455            for gi in range(ngrids):
456                # Now we get the data file, at which point we're ready to
457                # create the grid.
458                dummy, filename, offset = next(level_header_file).split()
459                filename = os.path.join(level_dir, filename)
460                go = self.grid(grid_counter + gi, int(offset), filename, self)
461                go.Level = self.grid_levels[grid_counter + gi, :] = level
462                self.grids.append(go)
463            grid_counter += ngrids
464            # already read the filenames above...
465        self.float_type = "float64"
466
467    def _cache_endianness(self, test_grid):
468        """
469        Cache the endianness and bytes perreal of the grids by using a
470        test grid and assuming that all grids have the same
471        endianness. This is a pretty safe assumption since Boxlib uses
472        one file per processor, and if you're running on a cluster
473        with different endian processors, then you're on your own!
474        """
475        # open the test file & grab the header
476        with open(os.path.expanduser(test_grid.filename), "rb") as f:
477            header = f.readline().decode("ascii", "ignore")
478
479        bpr, endian, start, stop, centering, nc = (
480            _header_pattern[self.dimensionality - 1].search(header).groups()
481        )
482        # Note that previously we were using a different value for BPR than we
483        # use now.  Here is an example set of information directly from BoxLib
484        """
485        * DOUBLE data
486        * FAB ((8, (64 11 52 0 1 12 0 1023)),(8, (1 2 3 4 5 6 7 8)))((0,0) (63,63) (0,0)) 27  # NOQA: E501
487        * FLOAT data
488        * FAB ((8, (32 8 23 0 1 9 0 127)),(4, (1 2 3 4)))((0,0) (63,63) (0,0)) 27
489        """
490        if bpr == endian[0]:
491            dtype = f"<f{bpr}"
492        elif bpr == endian[-1]:
493            dtype = f">f{bpr}"
494        else:
495            raise ValueError(
496                "FAB header is neither big nor little endian. "
497                "Perhaps the file is corrupt?"
498            )
499
500        mylog.debug("FAB header suggests dtype of %s", dtype)
501        self._dtype = np.dtype(dtype)
502
503    def _populate_grid_objects(self):
504        mylog.debug("Creating grid objects")
505        self.grids = np.array(self.grids, dtype="object")
506        self._reconstruct_parent_child()
507        for i, grid in enumerate(self.grids):
508            if (i % 1e4) == 0:
509                mylog.debug("Prepared % 7i / % 7i grids", i, self.num_grids)
510            grid._prepare_grid()
511            grid._setup_dx()
512        mylog.debug("Done creating grid objects")
513
514    def _reconstruct_parent_child(self):
515        if self.max_level == 0:
516            return
517        mask = np.empty(len(self.grids), dtype="int32")
518        mylog.debug("First pass; identifying child grids")
519        for i, grid in enumerate(self.grids):
520            get_box_grids_level(
521                self.grid_left_edge[i, :],
522                self.grid_right_edge[i, :],
523                self.grid_levels[i] + 1,
524                self.grid_left_edge,
525                self.grid_right_edge,
526                self.grid_levels,
527                mask,
528            )
529            ids = np.where(mask.astype("bool"))  # where is a tuple
530            grid._children_ids = ids[0] + grid._id_offset
531        mylog.debug("Second pass; identifying parents")
532        for i, grid in enumerate(self.grids):  # Second pass
533            for child in grid.Children:
534                child._parent_id.append(i + grid._id_offset)
535
536    def _count_grids(self):
537        # We can get everything from the Header file, but note that we're
538        # duplicating some work done elsewhere.  In a future where we don't
539        # pre-allocate grid arrays, this becomes unnecessary.
540        header_file = open(self.header_filename)
541        header_file.seek(self.dataset._header_mesh_start)
542        # Skip over the level dxs, geometry and the zero:
543        [next(header_file) for i in range(self.dataset._max_level + 3)]
544        # Now we need to be very careful, as we've seeked, and now we iterate.
545        # Does this work?  We are going to count the number of places that we
546        # have a three-item line.  The three items would be level, number of
547        # grids, and then grid time.
548        self.num_grids = 0
549        for line in header_file:
550            if len(line.split()) != 3:
551                continue
552            self.num_grids += int(line.split()[1])
553
554    def _initialize_grid_arrays(self):
555        super()._initialize_grid_arrays()
556        self.grid_start_index = np.zeros((self.num_grids, 3), "int64")
557
558    def _initialize_state_variables(self):
559        """override to not re-initialize num_grids in AMRHierarchy.__init__"""
560        self._parallel_locking = False
561        self._data_file = None
562        self._data_mode = None
563
564    def _detect_output_fields(self):
565        # This is all done in _parse_header_file
566        self.field_list = [("boxlib", f) for f in self.dataset._field_list]
567        self.field_indexes = {f[1]: i for i, f in enumerate(self.field_list)}
568        # There are times when field_list may change.  We copy it here to
569        # avoid that possibility.
570        self.field_order = [f for f in self.field_list]
571
572    def _setup_data_io(self):
573        self.io = io_registry[self.dataset_type](self.dataset)
574
575    def _determine_particle_output_type(self, directory_name):
576        header_filename = self.ds.output_dir + "/" + directory_name + "/Header"
577        with open(header_filename) as f:
578            version_string = f.readline().strip()
579            if version_string.startswith("Version_Two"):
580                return AMReXParticleHeader
581            else:
582                return BoxLibParticleHeader
583
584    def _read_particles(self, directory_name, is_checkpoint, extra_field_names=None):
585        pheader = self._determine_particle_output_type(directory_name)
586        self.particle_headers[directory_name] = pheader(
587            self.ds, directory_name, is_checkpoint, extra_field_names
588        )
589
590        num_parts = self.particle_headers[directory_name].num_particles
591        if self.ds._particle_type_counts is None:
592            self.ds._particle_type_counts = {}
593        self.ds._particle_type_counts[directory_name] = num_parts
594
595        base = os.path.join(self.ds.output_dir, directory_name)
596        if len(glob.glob(os.path.join(base, "Level_?", "DATA_????"))) > 0:
597            base_particle_fn = os.path.join(base, "Level_%d", "DATA_%.4d")
598        elif len(glob.glob(os.path.join(base, "Level_?", "DATA_?????"))) > 0:
599            base_particle_fn = os.path.join(base, "Level_%d", "DATA_%.5d")
600        else:
601            return
602
603        gid = 0
604        for lev, data in self.particle_headers[directory_name].data_map.items():
605            for pdf in data.values():
606                pdict = self.grids[gid]._pdata
607                pdict[directory_name] = {}
608                pdict[directory_name]["particle_filename"] = base_particle_fn % (
609                    lev,
610                    pdf.file_number,
611                )
612                pdict[directory_name]["offset"] = pdf.offset
613                pdict[directory_name]["NumberOfParticles"] = pdf.num_particles
614                self.grid_particle_count[gid] += pdf.num_particles
615                self.grids[gid].NumberOfParticles += pdf.num_particles
616                gid += 1
617
618        # add particle fields to field_list
619        pfield_list = self.particle_headers[directory_name].known_fields
620        self.field_list.extend(pfield_list)
621
622
623class BoxlibDataset(Dataset):
624    """
625    This class is a stripped down class that simply reads and parses
626    *filename*, without looking at the Boxlib index.
627    """
628
629    _index_class = BoxlibHierarchy
630    _field_info_class = BoxlibFieldInfo
631    _output_prefix = None
632    _default_cparam_filename = "job_info"
633    _periodicity = (False, False, False)
634
635    def __init__(
636        self,
637        output_dir,
638        cparam_filename=None,
639        fparam_filename=None,
640        dataset_type="boxlib_native",
641        storage_filename=None,
642        units_override=None,
643        unit_system="cgs",
644        default_species_fields=None,
645    ):
646        """
647        The paramfile is usually called "inputs"
648        and there may be a fortran inputs file usually called "probin"
649        plotname here will be a directory name
650        as per BoxLib, dataset_type will be Native (implemented here), IEEE (not
651        yet implemented) or ASCII (not yet implemented.)
652        """
653        self.fluid_types += ("boxlib",)
654        self.output_dir = os.path.abspath(os.path.expanduser(output_dir))
655
656        cparam_filename = cparam_filename or self.__class__._default_cparam_filename
657        self.cparam_filename = self._lookup_cparam_filepath(
658            output_dir, cparam_filename=cparam_filename
659        )
660        self.fparam_filename = self._localize_check(fparam_filename)
661        self.storage_filename = storage_filename
662
663        Dataset.__init__(
664            self,
665            output_dir,
666            dataset_type,
667            units_override=units_override,
668            unit_system=unit_system,
669            default_species_fields=default_species_fields,
670        )
671
672        # These are still used in a few places.
673        if "HydroMethod" not in self.parameters.keys():
674            self.parameters["HydroMethod"] = "boxlib"
675        self.parameters["Time"] = 1.0  # default unit is 1...
676        self.parameters["EOSType"] = -1  # default
677        self.parameters["gamma"] = self.parameters.get("materials.gamma", 1.6667)
678
679    def _localize_check(self, fn):
680        if fn is None:
681            return None
682        # If the file exists, use it.  If not, set it to None.
683        root_dir = os.path.dirname(self.output_dir)
684        full_fn = os.path.join(root_dir, fn)
685        if os.path.exists(full_fn):
686            return full_fn
687        return None
688
689    @classmethod
690    def _is_valid(cls, filename, *args, cparam_filename=None, **kwargs):
691        output_dir = filename
692        header_filename = os.path.join(output_dir, "Header")
693        # boxlib datasets are always directories, and
694        # We *know* it's not boxlib if Header doesn't exist.
695        if not os.path.exists(header_filename):
696            return False
697
698        if cls is BoxlibDataset:
699            # Stop checks here for the boxlib base class.
700            # Further checks are performed on subclasses.
701            return True
702
703        cparam_filename = cparam_filename or cls._default_cparam_filename
704        cparam_filepath = cls._lookup_cparam_filepath(output_dir, cparam_filename)
705
706        if cparam_filepath is None:
707            return False
708
709        lines = [line.lower() for line in open(cparam_filepath).readlines()]
710        return any(cls._subtype_keyword in line for line in lines)
711
712    @classmethod
713    def _lookup_cparam_filepath(cls, output_dir, cparam_filename):
714        lookup_table = [
715            os.path.abspath(os.path.join(p, cparam_filename))
716            for p in (output_dir, os.path.dirname(output_dir))
717        ]
718        found = [os.path.exists(file) for file in lookup_table]
719
720        if not any(found):
721            return None
722
723        return lookup_table[found.index(True)]
724
725    def _parse_parameter_file(self):
726        """
727        Parses the parameter file and establishes the various
728        dictionaries.
729        """
730        self._parse_header_file()
731        # Let's read the file
732        hfn = os.path.join(self.output_dir, "Header")
733        self.unique_identifier = int(os.stat(hfn)[ST_CTIME])
734        # the 'inputs' file is now optional
735        self._parse_cparams()
736        self._parse_fparams()
737
738    def _parse_cparams(self):
739        if self.cparam_filename is None:
740            return
741        for line in (line.split("#")[0].strip() for line in open(self.cparam_filename)):
742            try:
743                param, vals = (s.strip() for s in line.split("="))
744            except ValueError:
745                continue
746            if param == "amr.n_cell":
747                vals = self.domain_dimensions = np.array(vals.split(), dtype="int32")
748
749                # For 1D and 2D simulations in BoxLib usually only the relevant
750                # dimensions have a specified number of zones, but yt requires
751                # domain_dimensions to have three elements, with 1 in the additional
752                # slots if we're not in 3D, so append them as necessary.
753
754                if self.dimensionality == 1:
755                    vals = self.domain_dimensions = np.array([vals[0], 1, 1])
756                elif self.dimensionality == 2:
757                    vals = self.domain_dimensions = np.array([vals[0], vals[1], 1])
758            elif param == "amr.ref_ratio":
759                vals = self.refine_by = int(vals[0])
760            elif param == "Prob.lo_bc":
761                vals = tuple(p == "1" for p in vals.split())
762                assert len(vals) == self.dimensionality
763                periodicity = [False, False, False]  # default to non periodic
764                periodicity[: self.dimensionality] = vals  # fill in ndim parsed values
765                self._periodicity = tuple(periodicity)
766            elif param == "castro.use_comoving":
767                vals = self.cosmological_simulation = int(vals)
768            else:
769                try:
770                    vals = _guess_pcast(vals)
771                except (IndexError, ValueError):
772                    # hitting an empty string or a comment
773                    vals = None
774            self.parameters[param] = vals
775
776        if getattr(self, "cosmological_simulation", 0) == 1:
777            self.omega_lambda = self.parameters["comoving_OmL"]
778            self.omega_matter = self.parameters["comoving_OmM"]
779            self.hubble_constant = self.parameters["comoving_h"]
780            a_file = open(os.path.join(self.output_dir, "comoving_a"))
781            line = a_file.readline().strip()
782            a_file.close()
783            self.current_redshift = 1 / float(line) - 1
784        else:
785            self.current_redshift = 0.0
786            self.omega_lambda = 0.0
787            self.omega_matter = 0.0
788            self.hubble_constant = 0.0
789            self.cosmological_simulation = 0
790
791    def _parse_fparams(self):
792        """
793        Parses the fortran parameter file for Orion. Most of this will
794        be useless, but this is where it keeps mu = mass per
795        particle/m_hydrogen.
796        """
797        if self.fparam_filename is None:
798            return
799        for line in (l for l in open(self.fparam_filename) if "=" in l):
800            param, vals = (v.strip() for v in line.split("="))
801            # Now, there are a couple different types of parameters.
802            # Some will be where you only have floating point values, others
803            # will be where things are specified as string literals.
804            # Unfortunately, we're also using Fortran values, which will have
805            # things like 1.d-2 which is pathologically difficult to parse if
806            # your C library doesn't include 'd' in its locale for strtod.
807            # So we'll try to determine this.
808            vals = vals.split()
809            if any(_scinot_finder.match(v) for v in vals):
810                vals = [float(v.replace("D", "e").replace("d", "e")) for v in vals]
811            if len(vals) == 1:
812                vals = vals[0]
813            self.parameters[param] = vals
814
815    def _parse_header_file(self):
816        """
817        We parse the Boxlib header, which we use as our basis.  Anything in the
818        inputs file will override this, but the inputs file is not strictly
819        necessary for orientation of the data in space.
820        """
821
822        # Note: Python uses a read-ahead buffer, so using next(), which would
823        # be my preferred solution, won't work here.  We have to explicitly
824        # call readline() if we want to end up with an offset at the very end.
825        # Fortunately, elsewhere we don't care about the offset, so we're fine
826        # everywhere else using iteration exclusively.
827        header_file = open(os.path.join(self.output_dir, "Header"))
828        self.orion_version = header_file.readline().rstrip()
829        n_fields = int(header_file.readline())
830
831        self._field_list = [header_file.readline().strip() for i in range(n_fields)]
832
833        self.dimensionality = int(header_file.readline())
834        self.current_time = float(header_file.readline())
835        # This is traditionally a index attribute, so we will set it, but
836        # in a slightly hidden variable.
837        self._max_level = int(header_file.readline())
838
839        for side, init in zip(["left", "right"], [np.zeros, np.ones]):
840            domain_edge = init(3, dtype="float64")
841            domain_edge[: self.dimensionality] = header_file.readline().split()
842            setattr(self, f"domain_{side}_edge", domain_edge)
843
844        ref_factors = np.array(header_file.readline().split(), dtype="int64")
845        if ref_factors.size == 0:
846            # We use a default of two, as Nyx doesn't always output this value
847            ref_factors = [2] * (self._max_level + 1)
848        # We can't vary refinement factors based on dimension, or whatever else
849        # they are varied on.  In one curious thing, I found that some Castro 3D
850        # data has only two refinement factors, which I don't know how to
851        # understand.
852        self.ref_factors = ref_factors
853        if np.unique(ref_factors).size > 1:
854            # We want everything to be a multiple of this.
855            self.refine_by = min(ref_factors)
856            # Check that they're all multiples of the minimum.
857            if not all(
858                float(rf) / self.refine_by == int(float(rf) / self.refine_by)
859                for rf in ref_factors
860            ):
861                raise RuntimeError
862            base_log = np.log2(self.refine_by)
863            self.level_offsets = [0]  # level 0 has to have 0 offset
864            lo = 0
865            for rf in self.ref_factors:
866                lo += int(np.log2(rf) / base_log) - 1
867                self.level_offsets.append(lo)
868        # assert(np.unique(ref_factors).size == 1)
869        else:
870            self.refine_by = ref_factors[0]
871            self.level_offsets = [0 for l in range(self._max_level + 1)]
872        # Now we read the global index space, to get
873        index_space = header_file.readline()
874        # This will be of the form:
875        #  ((0,0,0) (255,255,255) (0,0,0)) ((0,0,0) (511,511,511) (0,0,0))
876        # So note that if we split it all up based on spaces, we should be
877        # fine, as long as we take the first two entries, which correspond to
878        # the root level.  I'm not 100% pleased with this solution.
879        root_space = index_space.replace("(", "").replace(")", "").split()[:2]
880        start = np.array(root_space[0].split(","), dtype="int64")
881        stop = np.array(root_space[1].split(","), dtype="int64")
882        dd = np.ones(3, dtype="int64")
883        dd[: self.dimensionality] = stop - start + 1
884        self.domain_dimensions = dd
885
886        # Skip timesteps per level
887        header_file.readline()
888        self._header_mesh_start = header_file.tell()
889        # Skip the cell size information per level - we'll get this later
890        for _ in range(self._max_level + 1):
891            header_file.readline()
892        # Get the geometry
893        next_line = header_file.readline()
894        if len(next_line.split()) == 1:
895            coordinate_type = int(next_line)
896        else:
897            coordinate_type = 0
898
899        known_types = {0: "cartesian", 1: "cylindrical", 2: "spherical"}
900        try:
901            self.geometry = known_types[coordinate_type]
902        except KeyError as err:
903            raise ValueError(f"Unknown BoxLib coord_type `{coordinate_type}`.") from err
904
905        if self.geometry == "cylindrical":
906            dre = self.domain_right_edge
907            dre[2] = 2.0 * np.pi
908            self.domain_right_edge = dre
909
910    def _set_code_unit_attributes(self):
911        setdefaultattr(self, "length_unit", self.quan(1.0, "cm"))
912        setdefaultattr(self, "mass_unit", self.quan(1.0, "g"))
913        setdefaultattr(self, "time_unit", self.quan(1.0, "s"))
914        setdefaultattr(self, "velocity_unit", self.quan(1.0, "cm/s"))
915
916    @parallel_root_only
917    def print_key_parameters(self):
918        for a in [
919            "current_time",
920            "domain_dimensions",
921            "domain_left_edge",
922            "domain_right_edge",
923        ]:
924            if not hasattr(self, a):
925                mylog.error("Missing %s in parameter file definition!", a)
926                continue
927            v = getattr(self, a)
928            mylog.info("Parameters: %-25s = %s", a, v)
929
930    def relative_refinement(self, l0, l1):
931        offset = self.level_offsets[l1] - self.level_offsets[l0]
932        return self.refine_by ** (l1 - l0 + offset)
933
934
935class OrionHierarchy(BoxlibHierarchy):
936    def __init__(self, ds, dataset_type="orion_native"):
937        BoxlibHierarchy.__init__(self, ds, dataset_type)
938        self._read_particles()
939        # self.io = IOHandlerOrion
940
941    def _detect_output_fields(self):
942        # This is all done in _parse_header_file
943        self.field_list = [("boxlib", f) for f in self.dataset._field_list]
944        self.field_indexes = {f[1]: i for i, f in enumerate(self.field_list)}
945        # There are times when field_list may change.  We copy it here to
946        # avoid that possibility.
947        self.field_order = [f for f in self.field_list]
948
949        # look for particle fields
950        self.particle_filename = None
951        for particle_filename in ["StarParticles", "SinkParticles"]:
952            fn = os.path.join(self.ds.output_dir, particle_filename)
953            if os.path.exists(fn):
954                self.particle_filename = fn
955
956        if self.particle_filename is None:
957            return
958
959        pfield_list = [("io", c) for c in self.io.particle_field_index.keys()]
960        self.field_list.extend(pfield_list)
961
962    def _read_particles(self):
963        """
964        Reads in particles and assigns them to grids. Will search for
965        Star particles, then sink particles if no star particle file
966        is found, and finally will simply note that no particles are
967        found if neither works. To add a new Orion particle type,
968        simply add it to the if/elif/else block.
969
970        """
971        self.grid_particle_count = np.zeros(len(self.grids))
972
973        if self.particle_filename is not None:
974            self._read_particle_file(self.particle_filename)
975
976    def _read_particle_file(self, fn):
977        """actually reads the orion particle data file itself."""
978        if not os.path.exists(fn):
979            return
980        with open(fn) as f:
981            lines = f.readlines()
982            self.num_stars = int(lines[0].strip()[0])
983            for num, line in enumerate(lines[1:]):
984                particle_position_x = float(line.split(" ")[1])
985                particle_position_y = float(line.split(" ")[2])
986                particle_position_z = float(line.split(" ")[3])
987                coord = [particle_position_x, particle_position_y, particle_position_z]
988                # for each particle, determine which grids contain it
989                # copied from object_finding_mixin.py
990                mask = np.ones(self.num_grids)
991                for i in range(len(coord)):
992                    np.choose(
993                        np.greater(self.grid_left_edge.d[:, i], coord[i]),
994                        (mask, 0),
995                        mask,
996                    )
997                    np.choose(
998                        np.greater(self.grid_right_edge.d[:, i], coord[i]),
999                        (0, mask),
1000                        mask,
1001                    )
1002                ind = np.where(mask == 1)
1003                selected_grids = self.grids[ind]
1004                # in orion, particles always live on the finest level.
1005                # so, we want to assign the particle to the finest of
1006                # the grids we just found
1007                if len(selected_grids) != 0:
1008                    grid = sorted(selected_grids, key=lambda grid: grid.Level)[-1]
1009                    ind = np.where(self.grids == grid)[0][0]
1010                    self.grid_particle_count[ind] += 1
1011                    self.grids[ind].NumberOfParticles += 1
1012
1013                    # store the position in the particle file for fast access.
1014                    try:
1015                        self.grids[ind]._particle_line_numbers.append(num + 1)
1016                    except AttributeError:
1017                        self.grids[ind]._particle_line_numbers = [num + 1]
1018        return True
1019
1020
1021class OrionDataset(BoxlibDataset):
1022
1023    _index_class = OrionHierarchy
1024    _subtype_keyword = "hyp."
1025    _default_cparam_filename = "inputs"
1026
1027    def __init__(
1028        self,
1029        output_dir,
1030        cparam_filename=None,
1031        fparam_filename="probin",
1032        dataset_type="orion_native",
1033        storage_filename=None,
1034        units_override=None,
1035        unit_system="cgs",
1036        default_species_fields=None,
1037    ):
1038
1039        BoxlibDataset.__init__(
1040            self,
1041            output_dir,
1042            cparam_filename,
1043            fparam_filename,
1044            dataset_type,
1045            units_override=units_override,
1046            unit_system=unit_system,
1047            default_species_fields=default_species_fields,
1048        )
1049
1050
1051class CastroHierarchy(BoxlibHierarchy):
1052    def __init__(self, ds, dataset_type="castro_native"):
1053        super().__init__(ds, dataset_type)
1054
1055        if "particles" in self.ds.parameters:
1056
1057            # extra beyond the base real fields that all Boxlib
1058            # particles have, i.e. the xyz positions
1059            castro_extra_real_fields = [
1060                "particle_velocity_x",
1061                "particle_velocity_y",
1062                "particle_velocity_z",
1063            ]
1064
1065            is_checkpoint = True
1066
1067            self._read_particles(
1068                "Tracer",
1069                is_checkpoint,
1070                castro_extra_real_fields[0 : self.ds.dimensionality],
1071            )
1072
1073
1074class CastroDataset(BoxlibDataset):
1075
1076    _index_class = CastroHierarchy
1077    _field_info_class = CastroFieldInfo
1078    _subtype_keyword = "castro"
1079    _default_cparam_filename = "job_info"
1080
1081    def __init__(
1082        self,
1083        output_dir,
1084        cparam_filename=None,
1085        fparam_filename=None,
1086        dataset_type="boxlib_native",
1087        storage_filename=None,
1088        units_override=None,
1089        unit_system="cgs",
1090        default_species_fields=None,
1091    ):
1092
1093        super().__init__(
1094            output_dir,
1095            cparam_filename,
1096            fparam_filename,
1097            dataset_type,
1098            storage_filename,
1099            units_override,
1100            unit_system,
1101            default_species_fields=default_species_fields,
1102        )
1103
1104    def _parse_parameter_file(self):
1105        super()._parse_parameter_file()
1106        jobinfo_filename = os.path.join(self.output_dir, self.cparam_filename)
1107        line = ""
1108        with open(jobinfo_filename) as f:
1109            while not line.startswith(" Inputs File Parameters"):
1110                # boundary condition info starts with -x:, etc.
1111                bcs = ["-x:", "+x:", "-y:", "+y:", "-z:", "+z:"]
1112                if any(b in line for b in bcs):
1113                    p, v = line.strip().split(":")
1114                    self.parameters[p] = v.strip()
1115                if "git describe" in line or "git hash" in line:
1116                    # Castro release 17.02 and later
1117                    #    line format: codename git describe:  the-hash
1118                    # Castro before release 17.02
1119                    #    line format: codename git hash:  the-hash
1120                    fields = line.split(":")
1121                    self.parameters[fields[0]] = fields[1].strip()
1122                line = next(f)
1123
1124            # runtime parameters that we overrode follow "Inputs File
1125            # Parameters"
1126            # skip the "====..." line
1127            line = next(f)
1128            for line in f:
1129                if line.strip() == "" or "fortin parameters" in line:
1130                    continue
1131                p, v = line.strip().split("=")
1132                self.parameters[p] = v.strip()
1133
1134        # hydro method is set by the base class -- override it here
1135        self.parameters["HydroMethod"] = "Castro"
1136
1137        # set the periodicity based on the runtime parameters
1138        # https://amrex-astro.github.io/Castro/docs/inputs.html?highlight=periodicity
1139        periodicity = [False, False, False]
1140        for i, axis in enumerate("xyz"):
1141            try:
1142                periodicity[i] = self.parameters[f"-{axis}"] == "interior"
1143            except KeyError:
1144                break
1145
1146        self._periodicity = tuple(periodicity)
1147
1148        if os.path.isdir(os.path.join(self.output_dir, "Tracer")):
1149            # we have particles
1150            self.parameters["particles"] = 1
1151            self.particle_types = ("Tracer",)
1152            self.particle_types_raw = self.particle_types
1153
1154
1155class MaestroDataset(BoxlibDataset):
1156
1157    _field_info_class = MaestroFieldInfo
1158    _subtype_keyword = "maestro"
1159    _default_cparam_filename = "job_info"
1160
1161    def __init__(
1162        self,
1163        output_dir,
1164        cparam_filename=None,
1165        fparam_filename=None,
1166        dataset_type="boxlib_native",
1167        storage_filename=None,
1168        units_override=None,
1169        unit_system="cgs",
1170        default_species_fields=None,
1171    ):
1172
1173        super().__init__(
1174            output_dir,
1175            cparam_filename,
1176            fparam_filename,
1177            dataset_type,
1178            storage_filename,
1179            units_override,
1180            unit_system,
1181            default_species_fields=default_species_fields,
1182        )
1183
1184    def _parse_parameter_file(self):
1185        super()._parse_parameter_file()
1186        jobinfo_filename = os.path.join(self.output_dir, self.cparam_filename)
1187
1188        with open(jobinfo_filename) as f:
1189            for line in f:
1190                # get the code git hashes
1191                if "git hash" in line:
1192                    # line format: codename git hash:  the-hash
1193                    fields = line.split(":")
1194                    self.parameters[fields[0]] = fields[1].strip()
1195
1196        with open(jobinfo_filename) as f:
1197            # get the runtime parameters
1198            for line in f:
1199                try:
1200                    p, v = (_.strip() for _ in line[4:].split("=", 1))
1201                    if len(v) == 0:
1202                        self.parameters[p] = ""
1203                    else:
1204                        self.parameters[p] = _guess_pcast(v)
1205                except ValueError:
1206                    # not a parameter line
1207                    pass
1208
1209            # hydro method is set by the base class -- override it here
1210            self.parameters["HydroMethod"] = "Maestro"
1211
1212        # set the periodicity based on the integer BC runtime parameters
1213        periodicity = [False, False, False]
1214        for i, ax in enumerate("xyz"):
1215            try:
1216                periodicity[i] = self.parameters[f"bc{ax}_lo"] != -1
1217            except KeyError:
1218                pass
1219
1220        self._periodicity = tuple(periodicity)
1221
1222
1223class NyxHierarchy(BoxlibHierarchy):
1224    def __init__(self, ds, dataset_type="nyx_native"):
1225        super().__init__(ds, dataset_type)
1226
1227        if "particles" in self.ds.parameters:
1228            # extra beyond the base real fields that all Boxlib
1229            # particles have, i.e. the xyz positions
1230            nyx_extra_real_fields = [
1231                "particle_mass",
1232                "particle_velocity_x",
1233                "particle_velocity_y",
1234                "particle_velocity_z",
1235            ]
1236
1237            is_checkpoint = False
1238
1239            self._read_particles(
1240                "DM",
1241                is_checkpoint,
1242                nyx_extra_real_fields[0 : self.ds.dimensionality + 1],
1243            )
1244
1245
1246class NyxDataset(BoxlibDataset):
1247
1248    _index_class = NyxHierarchy
1249    _field_info_class = NyxFieldInfo
1250    _subtype_keyword = "nyx"
1251    _default_cparam_filename = "job_info"
1252
1253    def __init__(
1254        self,
1255        output_dir,
1256        cparam_filename=None,
1257        fparam_filename=None,
1258        dataset_type="boxlib_native",
1259        storage_filename=None,
1260        units_override=None,
1261        unit_system="cgs",
1262        default_species_fields=None,
1263    ):
1264
1265        super().__init__(
1266            output_dir,
1267            cparam_filename,
1268            fparam_filename,
1269            dataset_type,
1270            storage_filename,
1271            units_override,
1272            unit_system,
1273            default_species_fields=default_species_fields,
1274        )
1275
1276    def _parse_parameter_file(self):
1277        super()._parse_parameter_file()
1278
1279        jobinfo_filename = os.path.join(self.output_dir, self.cparam_filename)
1280
1281        with open(jobinfo_filename) as f:
1282            for line in f:
1283                # get the code git hashes
1284                if "git hash" in line:
1285                    # line format: codename git hash:  the-hash
1286                    fields = line.split(":")
1287                    self.parameters[fields[0]] = fields[1].strip()
1288
1289                if line.startswith(" Cosmology Information"):
1290                    self.cosmological_simulation = 1
1291                    break
1292            else:
1293                self.cosmological_simulation = 0
1294
1295            if self.cosmological_simulation:
1296                # note that modern Nyx is always cosmological, but there are some old
1297                # files without these parameters so we want to special-case them
1298                for line in f:
1299                    if "Omega_m (comoving)" in line:
1300                        self.omega_matter = float(line.split(":")[1])
1301                    elif "Omega_lambda (comoving)" in line:
1302                        self.omega_lambda = float(line.split(":")[1])
1303                    elif "h (comoving)" in line:
1304                        self.hubble_constant = float(line.split(":")[1])
1305
1306        # Read in the `comoving_a` file and parse the value. We should fix this
1307        # in the new Nyx output format...
1308        a_file = open(os.path.join(self.output_dir, "comoving_a"))
1309        a_string = a_file.readline().strip()
1310        a_file.close()
1311
1312        # Set the scale factor and redshift
1313        self.cosmological_scale_factor = float(a_string)
1314        self.parameters["CosmologyCurrentRedshift"] = 1 / float(a_string) - 1
1315
1316        # alias
1317        self.current_redshift = self.parameters["CosmologyCurrentRedshift"]
1318        if os.path.isfile(os.path.join(self.output_dir, "DM/Header")):
1319            # we have particles
1320            self.parameters["particles"] = 1
1321            self.particle_types = ("DM",)
1322            self.particle_types_raw = self.particle_types
1323
1324    def _set_code_unit_attributes(self):
1325        setdefaultattr(self, "mass_unit", self.quan(1.0, "Msun"))
1326        setdefaultattr(self, "time_unit", self.quan(1.0 / 3.08568025e19, "s"))
1327        setdefaultattr(
1328            self, "length_unit", self.quan(1.0 / (1 + self.current_redshift), "Mpc")
1329        )
1330        setdefaultattr(self, "velocity_unit", self.length_unit / self.time_unit)
1331
1332
1333def _guess_pcast(vals):
1334    # Now we guess some things about the parameter and its type
1335    # Just in case there are multiple; we'll go
1336    # back afterward to using vals.
1337    v = vals.split()[0]
1338    try:
1339        float(v.upper().replace("D", "E"))
1340    except Exception:
1341        pcast = str
1342        if v in ("F", "T"):
1343            pcast = bool
1344    else:
1345        syms = (".", "D+", "D-", "E+", "E-", "E", "D")
1346        if any(sym in v.upper() for sym in syms for v in vals.split()):
1347            pcast = float
1348        else:
1349            pcast = int
1350    if pcast == bool:
1351        vals = [value == "T" for value in vals.split()]
1352    else:
1353        vals = [pcast(value) for value in vals.split()]
1354    if len(vals) == 1:
1355        vals = vals[0]
1356    return vals
1357
1358
1359def _read_raw_field_names(raw_file):
1360    header_files = glob.glob(raw_file + "*_H")
1361    return [hf.split(os.sep)[-1][:-2] for hf in header_files]
1362
1363
1364def _string_to_numpy_array(s):
1365    return np.array([int(v) for v in s[1:-1].split(",")], dtype=np.int64)
1366
1367
1368def _line_to_numpy_arrays(line):
1369    lo_corner = _string_to_numpy_array(line[0][1:])
1370    hi_corner = _string_to_numpy_array(line[1][:])
1371    node_type = _string_to_numpy_array(line[2][:-1])
1372    return lo_corner, hi_corner, node_type
1373
1374
1375def _get_active_dimensions(box):
1376    return box[1] - box[2] - box[0] + 1
1377
1378
1379def _read_header(raw_file, field):
1380    level_files = glob.glob(raw_file + "Level_*")
1381    level_files.sort()
1382
1383    all_boxes = []
1384    all_file_names = []
1385    all_offsets = []
1386
1387    for level_file in level_files:
1388        header_file = level_file + "/" + field + "_H"
1389        with open(header_file) as f:
1390
1391            f.readline()  # version
1392            f.readline()  # how
1393            f.readline()  # ncomp
1394
1395            # nghost_line will be parsed below after the number of dimensions
1396            # is determined when the boxes are read in
1397            nghost_line = f.readline().strip().split()
1398
1399            f.readline()  # num boxes
1400
1401            # read boxes
1402            boxes = []
1403            for line in f:
1404                clean_line = line.strip().split()
1405                if clean_line == [")"]:
1406                    break
1407                lo_corner, hi_corner, node_type = _line_to_numpy_arrays(clean_line)
1408                boxes.append((lo_corner, hi_corner, node_type))
1409
1410            try:
1411                # nghost_line[0] is a single number
1412                ng = int(nghost_line[0])
1413                ndims = len(lo_corner)
1414                nghost = np.array(ndims * [ng])
1415            except ValueError:
1416                # nghost_line[0] is (#,#,#)
1417                nghost_list = nghost_line[0].strip("()").split(",")
1418                nghost = np.array(nghost_list, dtype="int64")
1419
1420            # read the file and offset position for the corresponding box
1421            file_names = []
1422            offsets = []
1423            for line in f:
1424                if line.startswith("FabOnDisk:"):
1425                    clean_line = line.strip().split()
1426                    file_names.append(clean_line[1])
1427                    offsets.append(int(clean_line[2]))
1428
1429            all_boxes += boxes
1430            all_file_names += file_names
1431            all_offsets += offsets
1432
1433    return nghost, all_boxes, all_file_names, all_offsets
1434
1435
1436class WarpXHeader:
1437    def __init__(self, header_fn):
1438        self.data = {}
1439        with open(header_fn) as f:
1440            self.data["Checkpoint_version"] = int(f.readline().strip().split()[-1])
1441
1442            self.data["num_levels"] = int(f.readline().strip().split()[-1])
1443            self.data["istep"] = [int(num) for num in f.readline().strip().split()]
1444            self.data["nsubsteps"] = [int(num) for num in f.readline().strip().split()]
1445
1446            self.data["t_new"] = [float(num) for num in f.readline().strip().split()]
1447            self.data["t_old"] = [float(num) for num in f.readline().strip().split()]
1448            self.data["dt"] = [float(num) for num in f.readline().strip().split()]
1449
1450            self.data["moving_window_x"] = float(f.readline().strip().split()[-1])
1451
1452            #  not all datasets will have is_synchronized
1453            line = f.readline().strip().split()
1454            if len(line) == 1:
1455                self.data["is_synchronized"] = bool(line[-1])
1456                self.data["prob_lo"] = [
1457                    float(num) for num in f.readline().strip().split()
1458                ]
1459            else:
1460                self.data["is_synchronized"] = True
1461                self.data["prob_lo"] = [float(num) for num in line]
1462
1463            self.data["prob_hi"] = [float(num) for num in f.readline().strip().split()]
1464
1465            for _ in range(self.data["num_levels"]):
1466                num_boxes = int(f.readline().strip().split()[0][1:])
1467                for __ in range(num_boxes):
1468                    f.readline()
1469                f.readline()
1470
1471            i = 0
1472            line = f.readline()
1473            while line:
1474                line = line.strip().split()
1475                if len(line) == 1:
1476                    line = f.readline()
1477                    continue
1478                self.data["species_%d" % i] = [float(val) for val in line]
1479                i = i + 1
1480                line = f.readline()
1481
1482
1483class WarpXHierarchy(BoxlibHierarchy):
1484    def __init__(self, ds, dataset_type="boxlib_native"):
1485        super().__init__(ds, dataset_type)
1486
1487        is_checkpoint = True
1488        for ptype in self.ds.particle_types:
1489            self._read_particles(ptype, is_checkpoint)
1490
1491        # Additional WarpX particle information (used to set up species)
1492        self.warpx_header = WarpXHeader(self.ds.output_dir + "/WarpXHeader")
1493
1494        for key, val in self.warpx_header.data.items():
1495            if key.startswith("species_"):
1496                i = int(key.split("_")[-1])
1497                charge_name = "particle%.1d_charge" % i
1498                mass_name = "particle%.1d_mass" % i
1499                self.parameters[charge_name] = val[0]
1500                self.parameters[mass_name] = val[1]
1501
1502    def _detect_output_fields(self):
1503        super()._detect_output_fields()
1504
1505        # now detect the optional, non-cell-centered fields
1506        self.raw_file = self.ds.output_dir + "/raw_fields/"
1507        self.raw_fields = _read_raw_field_names(self.raw_file + "Level_0/")
1508        self.field_list += [("raw", f) for f in self.raw_fields]
1509        self.raw_field_map = {}
1510        self.ds.nodal_flags = {}
1511        self.raw_field_nghost = {}
1512        for field_name in self.raw_fields:
1513            nghost, boxes, file_names, offsets = _read_header(self.raw_file, field_name)
1514            self.raw_field_map[field_name] = (boxes, file_names, offsets)
1515            self.raw_field_nghost[field_name] = nghost
1516            self.ds.nodal_flags[field_name] = np.array(boxes[0][2])
1517
1518
1519def _skip_line(line):
1520    if len(line) == 0:
1521        return True
1522    if line[0] == "\n":
1523        return True
1524    if line[0] == "=":
1525        return True
1526    if line[0] == " ":
1527        return True
1528
1529
1530class WarpXDataset(BoxlibDataset):
1531
1532    _index_class = WarpXHierarchy
1533    _field_info_class = WarpXFieldInfo
1534    _subtype_keyword = "warpx"
1535    _default_cparam_filename = "warpx_job_info"
1536
1537    def __init__(
1538        self,
1539        output_dir,
1540        cparam_filename=None,
1541        fparam_filename=None,
1542        dataset_type="boxlib_native",
1543        storage_filename=None,
1544        units_override=None,
1545        unit_system="mks",
1546    ):
1547
1548        self.default_fluid_type = "mesh"
1549        self.default_field = ("mesh", "density")
1550        self.fluid_types = ("mesh", "index", "raw")
1551
1552        super().__init__(
1553            output_dir,
1554            cparam_filename,
1555            fparam_filename,
1556            dataset_type,
1557            storage_filename,
1558            units_override,
1559            unit_system,
1560        )
1561
1562    def _parse_parameter_file(self):
1563        super()._parse_parameter_file()
1564        jobinfo_filename = os.path.join(self.output_dir, self.cparam_filename)
1565        with open(jobinfo_filename) as f:
1566            for line in f.readlines():
1567                if _skip_line(line):
1568                    continue
1569                l = line.strip().split(":")
1570                if len(l) == 2:
1571                    self.parameters[l[0].strip()] = l[1].strip()
1572                l = line.strip().split("=")
1573                if len(l) == 2:
1574                    self.parameters[l[0].strip()] = l[1].strip()
1575
1576        # set the periodicity based on the integer BC runtime parameters
1577        # https://amrex-codes.github.io/amrex/docs_html/InputsProblemDefinition.html
1578        periodicity = [False, False, False]
1579        try:
1580            is_periodic = self.parameters["geometry.is_periodic"].split()
1581            periodicity[: len(is_periodic)] = [p == "1" for p in is_periodic]
1582        except KeyError:
1583            pass
1584        self._periodicity = tuple(periodicity)
1585
1586        particle_types = glob.glob(self.output_dir + "/*/Header")
1587        particle_types = [cpt.split(os.sep)[-2] for cpt in particle_types]
1588        if len(particle_types) > 0:
1589            self.parameters["particles"] = 1
1590            self.particle_types = tuple(particle_types)
1591            self.particle_types_raw = self.particle_types
1592        else:
1593            self.particle_types = ()
1594            self.particle_types_raw = ()
1595
1596    def _set_code_unit_attributes(self):
1597        setdefaultattr(self, "length_unit", self.quan(1.0, "m"))
1598        setdefaultattr(self, "mass_unit", self.quan(1.0, "kg"))
1599        setdefaultattr(self, "time_unit", self.quan(1.0, "s"))
1600        setdefaultattr(self, "velocity_unit", self.quan(1.0, "m/s"))
1601        setdefaultattr(self, "magnetic_unit", self.quan(1.0, "T"))
1602
1603
1604class AMReXHierarchy(BoxlibHierarchy):
1605    def __init__(self, ds, dataset_type="boxlib_native"):
1606        super().__init__(ds, dataset_type)
1607
1608        if "particles" in self.ds.parameters:
1609            is_checkpoint = True
1610            for ptype in self.ds.particle_types:
1611                self._read_particles(ptype, is_checkpoint)
1612
1613
1614class AMReXDataset(BoxlibDataset):
1615
1616    _index_class = AMReXHierarchy
1617    _subtype_keyword = "amrex"
1618    _default_cparam_filename = "job_info"
1619
1620    def __init__(
1621        self,
1622        output_dir,
1623        cparam_filename=None,
1624        fparam_filename=None,
1625        dataset_type="boxlib_native",
1626        storage_filename=None,
1627        units_override=None,
1628        unit_system="cgs",
1629        default_species_fields=None,
1630    ):
1631
1632        super().__init__(
1633            output_dir,
1634            cparam_filename,
1635            fparam_filename,
1636            dataset_type,
1637            storage_filename,
1638            units_override,
1639            unit_system,
1640            default_species_fields=default_species_fields,
1641        )
1642
1643    def _parse_parameter_file(self):
1644        super()._parse_parameter_file()
1645        particle_types = glob.glob(self.output_dir + "/*/Header")
1646        particle_types = [cpt.split(os.sep)[-2] for cpt in particle_types]
1647        if len(particle_types) > 0:
1648            self.parameters["particles"] = 1
1649            self.particle_types = tuple(particle_types)
1650            self.particle_types_raw = self.particle_types
1651