1"""
2Main ChemKED module
3"""
4# Standard libraries
5from os.path import exists
6from collections import namedtuple
7from warnings import warn
8from copy import deepcopy
9import xml.etree.ElementTree as etree
10import xml.dom.minidom as minidom
11from itertools import chain
12
13import numpy as np
14
15# Local imports
16from .validation import schema, OurValidator, yaml, Q_
17from .converters import datagroup_properties, ReSpecTh_to_ChemKED
18
19VolumeHistory = namedtuple('VolumeHistory', ['time', 'volume'])
20VolumeHistory.__doc__ = 'Time history of the volume in an RCM experiment. Deprecated, to be removed after PyKED 0.4'  # noqa: E501
21VolumeHistory.time.__doc__ = '(`~numpy.ndarray`): the time during the experiment'
22VolumeHistory.volume.__doc__ = '(`~numpy.ndarray`): the volume during the experiment'
23
24TimeHistory = namedtuple('TimeHistory', ['time', 'quantity', 'type'])
25TimeHistory.__doc__ = 'Time history of the quantity in an RCM experiment'
26TimeHistory.time.__doc__ = '(`~numpy.ndarray`): the time during the experiment'
27TimeHistory.quantity.__doc__ = '(`~numpy.ndarray`): the quantity of interest during the experiment'
28TimeHistory.type.__doc__ = """\
29(`str`): the type of time history represented. Possible options are:
30
31* volume
32* temperature
33* pressure
34* piston position
35* light emission
36* OH emission
37* absorption
38"""
39
40RCMData = namedtuple(
41    'RCMData',
42    ['compressed_pressure', 'compressed_temperature', 'compression_time', 'stroke',
43     'clearance', 'compression_ratio']
44)
45RCMData.__doc__ = 'Data fields specific to rapid compression machine experiments'
46RCMData.compressed_pressure.__doc__ = '(`~pint.Quantity`) The pressure at the end of compression'
47RCMData.compressed_temperature.__doc__ = """\
48(`~pint.Quantity`) The temperature at the end of compression"""
49RCMData.compression_time.__doc__ = '(`~pint.Quantity`) The duration of the compression stroke'
50RCMData.stroke.__doc__ = '(`~pint.Quantity`) The length of the stroke'
51RCMData.clearance.__doc__ = """\
52(`~pint.Quantity`) The clearance between piston face and end wall at the end of compression"""
53RCMData.compression_ratio.__doc__ = '(`~pint.Quantity`) The volumetric compression ratio'
54
55Reference = namedtuple('Reference',
56                       ['volume', 'journal', 'doi', 'authors', 'detail', 'year', 'pages'])
57Reference.__doc__ = 'Information about the article or report where the data can be found'
58Reference.volume.__doc__ = '(`str`) The journal volume'
59Reference.journal.__doc__ = '(`str`) The name of the journal'
60Reference.doi.__doc__ = '(`str`) The Digital Object Identifier of the article'
61Reference.authors.__doc__ = '(`list`) The list of authors of the article'
62Reference.detail.__doc__ = '(`str`) Detail about where the data can be found in the article'
63Reference.year.__doc__ = '(`str`) The year the article was published'
64Reference.pages.__doc__ = '(`str`) The pages in the journal where the article was published'
65
66Apparatus = namedtuple('Apparatus', ['kind', 'institution', 'facility'])
67Apparatus.__doc__ = 'Information about the experimental apparatus used to generate the data'
68Apparatus.kind.__doc__ = '(`str`) The kind of experimental apparatus'
69Apparatus.institution.__doc__ = '(`str`) The institution where the experiment is located'
70Apparatus.facility.__doc__ = '(`str`) The particular experimental facility at the location'
71
72Composition = namedtuple('Composition', 'species_name InChI SMILES atomic_composition amount')
73Composition.__doc__ = 'Detail of the initial composition of the mixture for the experiment'
74Composition.species_name.__doc__ = '(`str`) The name of the species'
75Composition.InChI.__doc__ = '(`str`) The InChI identifier for the species'
76Composition.SMILES.__doc__ = '(`str`) The SMILES identifier for the species'
77Composition.atomic_composition.__doc__ = '(`dict`) The atomic composition of the species'
78Composition.amount.__doc__ = '(`~pint.Quantity`) The amount of this species'
79
80
81class ChemKED(object):
82    """Main ChemKED class.
83
84    The ChemKED class stores information about the contents of a ChemKED database
85    file. It stores each datapoint associated with the database and provides access
86    the the reference information, versions, and file author.
87
88    Arguments:
89        yaml_file (`str`, optional): The filename of the YAML database in ChemKED format.
90        dict_input (`dict`, optional): A dictionary with the parsed ouput of YAML file in ChemKED
91            format.
92        skip_validation (`bool`, optional): Whether validation of the ChemKED should be done. Must
93            be supplied as a keyword-argument.
94
95    Attributes:
96        datapoints (`list`): List of `DataPoint` objects storing each datapoint in the database.
97        reference (`~collections.namedtuple`): Attributes include ``volume``, ``journal``, ``doi``,
98            ``authors``, ``detail``, ``year``, and ``pages`` describing the reference from which the
99            datapoints are derived.
100        apparatus (`~collections.namedtuple`): Attributes include ``kind`` of experimental
101            apparatus, and the ``institution`` and ``facility`` where the experimental apparatus is
102            located.
103        chemked_version (`str`): Version of the ChemKED database schema used in this file.
104        experiment_type (`str`): Type of exeperimental data contained in this database.
105        file_author (`dict`): Information about the author of the ChemKED database file.
106        file_version (`str`): Version of the ChemKED database file.
107        _properties (`dict`): Original dictionary read from ChemKED database file, meant for
108            internal use.
109    """
110    def __init__(self, yaml_file=None, dict_input=None, *, skip_validation=False):
111        if yaml_file is not None:
112            with open(yaml_file, 'r') as f:
113                self._properties = yaml.safe_load(f)
114        elif dict_input is not None:
115            self._properties = dict_input
116        else:
117            raise NameError("ChemKED needs either a YAML filename or dictionary as input.")
118
119        if not skip_validation:
120            self.validate_yaml(self._properties)
121
122        self.datapoints = []
123        for point in self._properties['datapoints']:
124            self.datapoints.append(DataPoint(point))
125
126        self.reference = Reference(
127            volume=self._properties['reference'].get('volume'),
128            journal=self._properties['reference'].get('journal'),
129            doi=self._properties['reference'].get('doi'),
130            authors=self._properties['reference'].get('authors'),
131            detail=self._properties['reference'].get('detail'),
132            year=self._properties['reference'].get('year'),
133            pages=self._properties['reference'].get('pages'),
134        )
135
136        self.apparatus = Apparatus(
137            kind=self._properties['apparatus'].get('kind'),
138            institution=self._properties['apparatus'].get('institution'),
139            facility=self._properties['apparatus'].get('facility'),
140        )
141
142        for prop in ['chemked-version', 'experiment-type', 'file-authors', 'file-version']:
143            setattr(self, prop.replace('-', '_'), self._properties[prop])
144
145    @classmethod
146    def from_respecth(cls, filename_xml, file_author='', file_author_orcid=''):
147        """Construct a ChemKED instance directly from a ReSpecTh file.
148
149        Arguments:
150            filename_xml (`str`): Filename of the ReSpecTh-formatted XML file to be imported
151            file_author (`str`, optional): File author to be added to the list generated from the
152                XML file
153            file_author_orcid (`str`, optional): ORCID for the file author being added to the list
154                of file authors
155
156        Returns:
157            `ChemKED`: Instance of the `ChemKED` class containing the data in ``filename_xml``.
158
159        Examples:
160            >>> ck = ChemKED.from_respecth('respecth_file.xml')
161            >>> ck = ChemKED.from_respecth('respecth_file.xml', file_author='Bryan W. Weber')
162            >>> ck = ChemKED.from_respecth('respecth_file.xml', file_author='Bryan W. Weber',
163                                           file_author_orcid='0000-0000-0000-0000')
164        """
165        properties = ReSpecTh_to_ChemKED(filename_xml, file_author, file_author_orcid,
166                                         validate=False)
167        return cls(dict_input=properties)
168
169    def validate_yaml(self, properties):
170        """Validate the parsed YAML file for adherance to the ChemKED format.
171
172        Arguments:
173            properties (`dict`): Dictionary created from the parsed YAML file
174
175        Raises:
176            `ValueError`: If the YAML file cannot be validated, a `ValueError` is raised whose
177                string contains the errors that are present.
178        """
179        validator = OurValidator(schema)
180        if not validator.validate(properties):
181            for key, value in validator.errors.items():
182                if any(['unallowed value' in v for v in value]):
183                    print(('{key} has an illegal value. Allowed values are {values} and are case '
184                           'sensitive.').format(key=key, values=schema[key]['allowed']))
185
186            raise ValueError(validator.errors)
187
188    def get_dataframe(self, output_columns=None):
189        """Get a Pandas DataFrame of the datapoints in this instance.
190
191        Arguments:
192            output_columns (`list`, optional): List of strings specifying the columns to include
193                in the output DataFrame. The default is `None`, which outputs all of the
194                columns. Options include (not case sensitive):
195
196                    * ``Temperature``
197                    * ``Pressure``
198                    * ``Ignition Delay``
199                    * ``Composition``
200                    * ``Equivalence Ratio``
201                    * ``Reference``
202                    * ``Apparatus``
203                    * ``Experiment Type``
204                    * ``File Author``
205                    * ``File Version``
206                    * ``ChemKED Version``
207
208                In addition, specific fields from the ``Reference`` and ``Apparatus`` attributes can
209                be included by specifying the name after a colon. These options are:
210
211                    * ``Reference:Volume``
212                    * ``Reference:Journal``
213                    * ``Reference:DOI``
214                    * ``Reference:Authors``
215                    * ``Reference:Detail``
216                    * ``Reference:Year``
217                    * ``Reference:Pages``
218                    * ``Apparatus:Kind``
219                    * ``Apparatus:Facility``
220                    * ``Apparatus:Institution``
221
222                Only the first author is printed when ``Reference`` or ``Reference:Authors`` is
223                selected because the whole author list may be quite long.
224
225        Note:
226            If the Composition is selected as an output type, the composition specified in the
227            `DataPoint` is used. No attempt is made to convert to a consistent basis; mole fractions
228            will remain mole fractions, mass fractions will remain mass fractions, and mole percent
229            will remain mole percent. Therefore, it is possible to end up with more than one type of
230            composition specification in a given column. However, if the composition is included
231            in the resulting dataframe, the type of each composition will be specified by the "Kind"
232            field in each row.
233
234        Examples:
235            >>> df = ChemKED(yaml_file).get_dataframe()
236            >>> df = ChemKED(yaml_file).get_dataframe(['Temperature', 'Ignition Delay'])
237
238        Returns:
239            `~pandas.DataFrame`: Contains the information regarding each point in the ``datapoints``
240                attribute
241        """
242        import pandas as pd
243
244        valid_labels = [a.replace('_', ' ') for a in self.__dict__
245                        if not (a.startswith('__') or a.startswith('_'))
246                        ]
247        valid_labels.remove('datapoints')
248        valid_labels.extend(
249            ['composition', 'ignition delay', 'temperature', 'pressure', 'equivalence ratio']
250        )
251        ref_index = valid_labels.index('reference')
252        valid_labels[ref_index:ref_index + 1] = ['reference:' + a for a in Reference._fields]
253        app_index = valid_labels.index('apparatus')
254        valid_labels[app_index:app_index + 1] = ['apparatus:' + a for a in Apparatus._fields]
255        species_list = list(set(chain(*[list(d.composition.keys()) for d in self.datapoints])))
256
257        if output_columns is None or len(output_columns) == 0:
258            col_labels = valid_labels
259            comp_index = col_labels.index('composition')
260            col_labels[comp_index:comp_index + 1] = species_list + ['Composition:Kind']
261        else:
262            output_columns = [a.lower() for a in output_columns]
263            col_labels = []
264            for col in output_columns:
265                if col in valid_labels or col in ['reference', 'apparatus']:
266                    col_labels.append(col)
267                else:
268                    raise ValueError('{} is not a valid output column choice'.format(col))
269
270            if 'composition' in col_labels:
271                comp_index = col_labels.index('composition')
272                col_labels[comp_index:comp_index + 1] = species_list + ['Composition:Kind']
273            if 'reference' in col_labels:
274                ref_index = col_labels.index('reference')
275                col_labels[ref_index:ref_index + 1] = ['reference:' + a for a in Reference._fields]
276            if 'apparatus' in col_labels:
277                app_index = col_labels.index('apparatus')
278                col_labels[app_index:app_index + 1] = ['apparatus:' + a for a in Apparatus._fields]
279
280        data = []
281        for d in self.datapoints:
282            row = []
283            d_species = list(d.composition.keys())
284            for col in col_labels:
285                if col in species_list:
286                    if col in d_species:
287                        row.append(d.composition[col].amount)
288                    else:
289                        row.append(Q_(0.0, 'dimensionless'))
290                elif 'reference' in col or 'apparatus' in col:
291                    split_col = col.split(':')
292                    if split_col[1] == 'authors':
293                        row.append(getattr(getattr(self, split_col[0]), split_col[1])[0]['name'])
294                    else:
295                        row.append(getattr(getattr(self, split_col[0]), split_col[1]))
296                elif col in ['temperature', 'pressure', 'ignition delay', 'equivalence ratio']:
297                    row.append(getattr(d, col.replace(' ', '_')))
298                elif col == 'file authors':
299                    row.append(getattr(self, col.replace(' ', '_'))[0]['name'])
300                elif col == 'Composition:Kind':
301                    row.append(d.composition_type)
302                else:
303                    row.append(getattr(self, col.replace(' ', '_')))
304            data.append(row)
305
306        col_labels = [a.title() for a in col_labels]
307        columns = pd.Index(col_labels)
308        return pd.DataFrame(data=data, columns=columns)
309
310    def write_file(self, filename, *, overwrite=False):
311        """Write new ChemKED YAML file based on object.
312
313        Arguments:
314            filename (`str`): Filename for target YAML file
315            overwrite (`bool`, optional): Whether to overwrite file with given name if present.
316                Must be supplied as a keyword-argument.
317
318        Raises:
319            `NameError`: If ``filename`` is already present, and ``overwrite`` is not ``True``.
320
321        Example:
322            >>> dataset = ChemKED(yaml_file)
323            >>> dataset.write_file(new_yaml_file)
324        """
325        # Ensure file isn't already present
326        if exists(filename) and not overwrite:
327            raise OSError(filename + ' already present. Specify "overwrite=True" '
328                          'to overwrite, or rename.'
329                          )
330
331        with open(filename, 'w') as yaml_file:
332            yaml.dump(self._properties, yaml_file)
333
334    def convert_to_ReSpecTh(self, filename):
335        """Convert ChemKED record to ReSpecTh XML file.
336
337        This converter uses common information in a ChemKED file to generate a
338        ReSpecTh XML file. Note that some information may be lost, as ChemKED stores
339        some additional attributes.
340
341        Arguments:
342            filename (`str`): Filename for output ReSpecTh XML file.
343
344        Example:
345            >>> dataset = ChemKED(yaml_file)
346            >>> dataset.convert_to_ReSpecTh(xml_file)
347        """
348        root = etree.Element('experiment')
349
350        file_author = etree.SubElement(root, 'fileAuthor')
351        file_author.text = self.file_authors[0]['name']
352
353        # right now ChemKED just uses an integer file version
354        file_version = etree.SubElement(root, 'fileVersion')
355        major_version = etree.SubElement(file_version, 'major')
356        major_version.text = str(self.file_version)
357        minor_version = etree.SubElement(file_version, 'minor')
358        minor_version.text = '0'
359
360        respecth_version = etree.SubElement(root, 'ReSpecThVersion')
361        major_version = etree.SubElement(respecth_version, 'major')
362        major_version.text = '1'
363        minor_version = etree.SubElement(respecth_version, 'minor')
364        minor_version.text = '0'
365
366        # Only ignition delay currently supported
367        exp = etree.SubElement(root, 'experimentType')
368        if self.experiment_type == 'ignition delay':
369            exp.text = 'Ignition delay measurement'
370        else:
371            raise NotImplementedError('Only ignition delay type supported for conversion.')
372
373        reference = etree.SubElement(root, 'bibliographyLink')
374        citation = ''
375        for author in self.reference.authors:
376            citation += author['name'] + ', '
377        citation += (self.reference.journal + ' (' + str(self.reference.year) + ') ' +
378                     str(self.reference.volume) + ':' + self.reference.pages + '. ' +
379                     self.reference.detail
380                     )
381        reference.set('preferredKey', citation)
382        reference.set('doi', self.reference.doi)
383
384        apparatus = etree.SubElement(root, 'apparatus')
385        kind = etree.SubElement(apparatus, 'kind')
386        kind.text = self.apparatus.kind
387
388        common_properties = etree.SubElement(root, 'commonProperties')
389        # ChemKED objects have no common properties once loaded. Check for properties
390        # among datapoints that tend to be common
391        common = []
392        composition = self.datapoints[0].composition
393
394        # Composition type *has* to be the same
395        composition_type = self.datapoints[0].composition_type
396        if not all(dp.composition_type == composition_type for dp in self.datapoints):
397            raise NotImplementedError('Error: ReSpecTh does not support varying composition '
398                                      'type among datapoints.'
399                                      )
400
401        if all([composition == dp.composition for dp in self.datapoints]):
402            # initial composition is common
403            common.append('composition')
404            prop = etree.SubElement(common_properties, 'property')
405            prop.set('name', 'initial composition')
406
407            for species_name, species in composition.items():
408                component = etree.SubElement(prop, 'component')
409                species_link = etree.SubElement(component, 'speciesLink')
410                species_link.set('preferredKey', species_name)
411                if species.InChI is not None:
412                    species_link.set('InChI', species.InChI)
413
414                amount = etree.SubElement(component, 'amount')
415                amount.set('units', composition_type)
416                amount.text = str(species.amount.magnitude)
417
418        # If multiple datapoints present, then find any common properties. If only
419        # one datapoint, then composition should be the only "common" property.
420        if len(self.datapoints) > 1:
421            for prop_name in datagroup_properties:
422                attribute = prop_name.replace(' ', '_')
423                quantities = [getattr(dp, attribute, False) for dp in self.datapoints]
424
425                # All quantities must have the property in question and all the
426                # values must be equal
427                if all(quantities) and quantities.count(quantities[0]) == len(quantities):
428                    common.append(prop_name)
429                    prop = etree.SubElement(common_properties, 'property')
430                    prop.set('description', '')
431                    prop.set('name', prop_name)
432                    prop.set('units', str(quantities[0].units))
433
434                    value = etree.SubElement(prop, 'value')
435                    value.text = str(quantities[0].magnitude)
436
437        # Ignition delay can't be common, unless only a single datapoint.
438
439        datagroup = etree.SubElement(root, 'dataGroup')
440        datagroup.set('id', 'dg1')
441        datagroup_link = etree.SubElement(datagroup, 'dataGroupLink')
442        datagroup_link.set('dataGroupID', '')
443        datagroup_link.set('dataPointID', '')
444
445        property_idx = {}
446        labels = {'temperature': 'T', 'pressure': 'P',
447                  'ignition delay': 'tau', 'pressure rise': 'dP/dt',
448                  }
449
450        for prop_name in datagroup_properties:
451            attribute = prop_name.replace(' ', '_')
452            # This can't be hasattr because properties are set to the value None
453            # if no value is specified in the file, so the attribute always exists
454            prop_indices = [i for i, dp in enumerate(self.datapoints)
455                            if getattr(dp, attribute) is not None
456                            ]
457            if prop_name in common or not prop_indices:
458                continue
459
460            prop = etree.SubElement(datagroup, 'property')
461            prop.set('description', '')
462            prop.set('name', prop_name)
463            units = str(getattr(self.datapoints[prop_indices[0]], attribute).units)
464            prop.set('units', units)
465            idx = 'x{}'.format(len(property_idx) + 1)
466            property_idx[idx] = {'name': prop_name, 'units': units}
467            prop.set('id', idx)
468            prop.set('label', labels[prop_name])
469
470        # Need to handle datapoints with possibly different species in the initial composition
471        if 'composition' not in common:
472            for dp in self.datapoints:
473                for species in dp.composition.values():
474                    # Only add new property for species not already considered
475                    has_spec = any([species.species_name in d.values()
476                                    for d in property_idx.values()
477                                    ])
478                    if not has_spec:
479                        prop = etree.SubElement(datagroup, 'property')
480                        prop.set('description', '')
481
482                        idx = 'x{}'.format(len(property_idx) + 1)
483                        property_idx[idx] = {'name': species.species_name}
484                        prop.set('id', idx)
485                        prop.set('label', '[' + species.species_name + ']')
486                        prop.set('name', 'composition')
487                        prop.set('units', self.datapoints[0].composition_type)
488
489                        species_link = etree.SubElement(prop, 'speciesLink')
490                        species_link.set('preferredKey', species.species_name)
491                        if species.InChI is not None:
492                            species_link.set('InChI', species.InChI)
493
494        for dp in self.datapoints:
495            datapoint = etree.SubElement(datagroup, 'dataPoint')
496            for idx, val in property_idx.items():
497                # handle regular properties a bit differently than composition
498                if val['name'] in datagroup_properties:
499                    value = etree.SubElement(datapoint, idx)
500                    quantity = getattr(dp, val['name'].replace(' ', '_')).to(val['units'])
501                    value.text = str(quantity.magnitude)
502                else:
503                    # composition
504                    for item in dp.composition.values():
505                        if item.species_name == val['name']:
506                            value = etree.SubElement(datapoint, idx)
507                            value.text = str(item.amount.magnitude)
508
509        # See https://stackoverflow.com/a/16097112 for the None.__ne__
510        history_types = ['volume_history', 'temperature_history', 'pressure_history',
511                         'piston_position_history', 'light_emission_history',
512                         'OH_emission_history', 'absorption_history']
513        time_histories = [getattr(dp, p) for dp in self.datapoints for p in history_types]
514        time_histories = list(filter(None.__ne__, time_histories))
515
516        if len(self.datapoints) > 1 and len(time_histories) > 1:
517            raise NotImplementedError('Error: ReSpecTh files do not support multiple datapoints '
518                                      'with a time history.')
519        elif len(time_histories) > 0:
520            for dg_idx, hist in enumerate(time_histories):
521                if hist.type not in ['volume', 'temperature', 'pressure']:
522                    warn('The time-history type {} is not supported by ReSpecTh for '
523                         'ignition delay experiments'.format(hist.type))
524                    continue
525
526                datagroup = etree.SubElement(root, 'dataGroup')
527                datagroup.set('id', 'dg{}'.format(dg_idx))
528                datagroup_link = etree.SubElement(datagroup, 'dataGroupLink')
529                datagroup_link.set('dataGroupID', '')
530                datagroup_link.set('dataPointID', '')
531
532                # Time history has two properties: time and quantity.
533                prop = etree.SubElement(datagroup, 'property')
534                prop.set('description', '')
535                prop.set('name', 'time')
536                prop.set('units', str(hist.time.units))
537                time_idx = 'x{}'.format(len(property_idx) + 1)
538                property_idx[time_idx] = {'name': 'time'}
539                prop.set('id', time_idx)
540                prop.set('label', 't')
541
542                prop = etree.SubElement(datagroup, 'property')
543                prop.set('description', '')
544                prop.set('name', hist.type)
545                prop.set('units', str(hist.quantity.units))
546                quant_idx = 'x{}'.format(len(property_idx) + 1)
547                property_idx[quant_idx] = {'name': hist.type}
548                prop.set('id', quant_idx)
549                prop.set('label', 'V')
550
551                for time, quantity in zip(hist.time, hist.quantity):
552                    datapoint = etree.SubElement(datagroup, 'dataPoint')
553                    value = etree.SubElement(datapoint, time_idx)
554                    value.text = str(time.magnitude)
555                    value = etree.SubElement(datapoint, quant_idx)
556                    value.text = str(quantity.magnitude)
557
558        ign_types = [getattr(dp, 'ignition_type', False) for dp in self.datapoints]
559        # All datapoints must have the same ignition target and type
560        if all(ign_types) and ign_types.count(ign_types[0]) == len(ign_types):
561            # In ReSpecTh files all datapoints must share ignition type
562            ignition = etree.SubElement(root, 'ignitionType')
563            if ign_types[0]['target'] in ['pressure', 'temperature']:
564                ignition.set('target', ign_types[0]['target'][0].upper())
565            else:
566                # options left are species
567                ignition.set('target', self.datapoints[0].ignition_type['target'])
568            if ign_types[0]['type'] == 'd/dt max extrapolated':
569                ignition.set('type', 'baseline max intercept from d/dt')
570            else:
571                ignition.set('type', self.datapoints[0].ignition_type['type'])
572        else:
573            raise NotImplementedError('Different ignition targets or types for multiple datapoints '
574                                      'are not supported in ReSpecTh.')
575
576        et = etree.ElementTree(root)
577        et.write(filename, encoding='utf-8', xml_declaration=True)
578
579        # now do a "pretty" rewrite
580        xml = minidom.parse(filename)
581        xml_string = xml.toprettyxml(indent='    ')
582        with open(filename, 'w') as f:
583            f.write(xml_string)
584
585        print('Converted to ' + filename)
586
587
588class DataPoint(object):
589    """Class for a single datapoint.
590
591    The `DataPoint` class stores the information associated with a single data point in the dataset
592    parsed from the `ChemKED` YAML input.
593
594    Arguments:
595        properties (`dict`): Dictionary adhering to the ChemKED format for ``datapoints``
596
597    Attributes:
598        composition (`list`): List of dictionaries representing the species and their quantities
599        ignition_delay (pint.Quantity): The ignition delay of the experiment
600        temperature (pint.Quantity): The temperature of the experiment
601        pressure (pint.Quantity): The pressure of the experiment
602        pressure_rise (pint.Quantity, optional): The amount of pressure rise during the induction
603            period of a shock tube experiment.
604        compression_time (pint.Quantity, optional): The compression time for an RCM experiment.
605        compressed_pressure (pint.Quantity, optional): The pressure at the end of compression for
606            an RCM experiment.
607        compressed_temperature (pint.Quantity, optional): The temperature at the end of compression
608            for an RCM experiment.
609        first_stage_ignition_delay (pint.Quantity, optional): The first stage ignition delay of the
610            experiment.
611        compression_time (pint.Quantity, optional): The compression time for an RCM experiment.
612        ignition_type (`dict`): Dictionary with the ignition target and type.
613        volume_history (`~collections.namedtuple`, optional): The volume history of the reactor
614            during an RCM experiment.
615        pressure_history (`~collections.namedtuple`, optional): The pressure history of the reactor
616            during an experiment.
617        temperature_history (`~collections.namedtuple`, optional): The temperature history of the
618            reactor during an experiment.
619        piston_position_history (`~collections.namedtuple`, optional): The piston position history
620            of the reactor during an RCM experiment.
621        light_emission_history (`~collections.namedtuple`, optional): The light emission history
622            of the reactor during an experiment.
623        OH_emission_history (`~collections.namedtuple`, optional): The OH emission history of the
624            reactor during an experiment.
625        absorption_history (`~collections.namedtuple`, optional): The absorption history of the
626            reactor during an experiment.
627    """
628    value_unit_props = [
629        'ignition-delay', 'first-stage-ignition-delay', 'temperature', 'pressure',
630        'pressure-rise',
631    ]
632
633    rcm_data_props = [
634        'compressed-pressure', 'compressed-temperature', 'compression-time', 'stroke', 'clearance',
635        'compression-ratio'
636    ]
637
638    def __init__(self, properties):
639        for prop in self.value_unit_props:
640            if prop in properties:
641                quant = self.process_quantity(properties[prop])
642                setattr(self, prop.replace('-', '_'), quant)
643            else:
644                setattr(self, prop.replace('-', '_'), None)
645
646        if 'rcm-data' in properties:
647            orig_rcm_data = properties['rcm-data']
648            rcm_props = {}
649            for prop in self.rcm_data_props:
650                if prop in orig_rcm_data:
651                    quant = self.process_quantity(orig_rcm_data[prop])
652                    rcm_props[prop.replace('-', '_')] = quant
653                else:
654                    rcm_props[prop.replace('-', '_')] = None
655            self.rcm_data = RCMData(**rcm_props)
656        else:
657            self.rcm_data = None
658
659        self.composition_type = properties['composition']['kind']
660        composition = {}
661        for species in properties['composition']['species']:
662            species_name = species['species-name']
663            amount = self.process_quantity(species['amount'])
664            InChI = species.get('InChI')
665            SMILES = species.get('SMILES')
666            atomic_composition = species.get('atomic-composition')
667            composition[species_name] = Composition(
668                species_name=species_name, InChI=InChI, SMILES=SMILES,
669                atomic_composition=atomic_composition, amount=amount)
670
671        setattr(self, 'composition', composition)
672
673        self.equivalence_ratio = properties.get('equivalence-ratio')
674        self.ignition_type = deepcopy(properties.get('ignition-type'))
675
676        if 'time-histories' in properties and 'volume-history' in properties:
677            raise TypeError('time-histories and volume-history are mutually exclusive')
678
679        if 'time-histories' in properties:
680            for hist in properties['time-histories']:
681                if hasattr(self, '{}_history'.format(hist['type'].replace(' ', '_'))):
682                    raise ValueError('Each history type may only be specified once. {} was '
683                                     'specified multiple times'.format(hist['type']))
684                time_col = hist['time']['column']
685                time_units = hist['time']['units']
686                quant_col = hist['quantity']['column']
687                quant_units = hist['quantity']['units']
688                if isinstance(hist['values'], list):
689                    values = np.array(hist['values'])
690                else:
691                    # Load the values from a file
692                    values = np.genfromtxt(hist['values']['filename'], delimiter=',')
693
694                time_history = TimeHistory(
695                    time=Q_(values[:, time_col], time_units),
696                    quantity=Q_(values[:, quant_col], quant_units),
697                    type=hist['type'],
698                )
699
700                setattr(self, '{}_history'.format(hist['type'].replace(' ', '_')), time_history)
701
702        if 'volume-history' in properties:
703            warn('The volume-history field should be replaced by time-histories. '
704                 'volume-history will be removed after PyKED 0.4',
705                 DeprecationWarning)
706            time_col = properties['volume-history']['time']['column']
707            time_units = properties['volume-history']['time']['units']
708            volume_col = properties['volume-history']['volume']['column']
709            volume_units = properties['volume-history']['volume']['units']
710            values = np.array(properties['volume-history']['values'])
711            self.volume_history = VolumeHistory(
712                time=Q_(values[:, time_col], time_units),
713                volume=Q_(values[:, volume_col], volume_units),
714            )
715
716        history_types = ['volume', 'temperature', 'pressure', 'piston_position', 'light_emission',
717                         'OH_emission', 'absorption']
718        for h in history_types:
719            if not hasattr(self, '{}_history'.format(h)):
720                setattr(self, '{}_history'.format(h), None)
721
722    def process_quantity(self, properties):
723        """Process the uncertainty information from a given quantity and return it
724        """
725        quant = Q_(properties[0])
726        if len(properties) > 1:
727            unc = properties[1]
728            uncertainty = unc.get('uncertainty', False)
729            upper_uncertainty = unc.get('upper-uncertainty', False)
730            lower_uncertainty = unc.get('lower-uncertainty', False)
731            uncertainty_type = unc.get('uncertainty-type')
732            if uncertainty_type == 'relative':
733                if uncertainty:
734                    quant = quant.plus_minus(float(uncertainty), relative=True)
735                elif upper_uncertainty and lower_uncertainty:
736                    warn('Asymmetric uncertainties are not supported. The '
737                         'maximum of lower-uncertainty and upper-uncertainty '
738                         'has been used as the symmetric uncertainty.')
739                    uncertainty = max(float(upper_uncertainty), float(lower_uncertainty))
740                    quant = quant.plus_minus(uncertainty, relative=True)
741                else:
742                    raise ValueError('Either "uncertainty" or "upper-uncertainty" and '
743                                     '"lower-uncertainty" need to be specified.')
744            elif uncertainty_type == 'absolute':
745                if uncertainty:
746                    uncertainty = Q_(uncertainty)
747                    quant = quant.plus_minus(uncertainty.to(quant.units).magnitude)
748                elif upper_uncertainty and lower_uncertainty:
749                    warn('Asymmetric uncertainties are not supported. The '
750                         'maximum of lower-uncertainty and upper-uncertainty '
751                         'has been used as the symmetric uncertainty.')
752                    uncertainty = max(Q_(upper_uncertainty), Q_(lower_uncertainty))
753                    quant = quant.plus_minus(uncertainty.to(quant.units).magnitude)
754                else:
755                    raise ValueError('Either "uncertainty" or "upper-uncertainty" and '
756                                     '"lower-uncertainty" need to be specified.')
757            else:
758                raise ValueError('uncertainty-type must be one of "absolute" or "relative"')
759
760        return quant
761
762    def get_cantera_composition_string(self, species_conversion=None):
763        """Get the composition in a string format suitable for input to Cantera.
764
765        Returns a formatted string no matter the type of composition. As such, this method
766        is not recommended for end users; instead, prefer the `get_cantera_mole_fraction`
767        or `get_cantera_mass_fraction` methods.
768
769        Arguments:
770            species_conversion (`dict`, optional): Mapping of species identifier to a
771                species name. This argument should be supplied when the name of the
772                species in the ChemKED YAML file does not match the name of the same
773                species in a chemical kinetic mechanism. The species identifier (the key
774                of the mapping) can be the name, InChI, or SMILES provided in the ChemKED
775                file, while the value associated with a key should be the desired name in
776                the Cantera format output string.
777
778        Returns:
779            `str`: String in the ``SPEC:AMT, SPEC:AMT`` format
780
781        Raises:
782            `ValueError`: If the composition type of the `DataPoint` is not one of
783                ``'mass fraction'``, ``'mole fraction'``, or ``'mole percent'``
784        """
785        if self.composition_type in ['mole fraction', 'mass fraction']:
786            factor = 1.0
787        elif self.composition_type == 'mole percent':
788            factor = 100.0
789        else:
790            raise ValueError('Unknown composition type: {}'.format(self.composition_type))
791
792        if species_conversion is None:
793            comps = ['{!s}:{:.4e}'.format(c.species_name,
794                     c.amount.magnitude/factor) for c in self.composition.values()]
795        else:
796            comps = []
797            for c in self.composition.values():
798                amount = c.amount.magnitude/factor
799                idents = [getattr(c, s, False) for s in ['species_name', 'InChI', 'SMILES']]
800                present = [i in species_conversion for i in idents]
801                if not any(present):
802                    comps.append('{!s}:{:.4e}'.format(c.species_name, amount))
803                else:
804                    if len([i for i in present if i]) > 1:
805                        raise ValueError('More than one conversion present for species {}'.format(
806                                         c.species_name))
807
808                    ident = idents[present.index(True)]
809                    species_replacement_name = species_conversion.pop(ident)
810                    comps.append('{!s}:{:.4e}'.format(species_replacement_name, amount))
811
812            if len(species_conversion) > 0:
813                raise ValueError('Unknown species in conversion: {}'.format(species_conversion))
814
815        return ', '.join(comps)
816
817    def get_cantera_mole_fraction(self, species_conversion=None):
818        """Get the mole fractions in a string format suitable for input to Cantera.
819
820        Arguments:
821            species_conversion (`dict`, optional): Mapping of species identifier to a
822                species name. This argument should be supplied when the name of the
823                species in the ChemKED YAML file does not match the name of the same
824                species in a chemical kinetic mechanism. The species identifier (the key
825                of the mapping) can be the name, InChI, or SMILES provided in the ChemKED
826                file, while the value associated with a key should be the desired name in
827                the Cantera format output string.
828
829        Returns:
830            `str`: String of mole fractions in the ``SPEC:AMT, SPEC:AMT`` format
831
832        Raises:
833            `ValueError`: If the composition type is ``'mass fraction'``, the conversion cannot
834                be done because no molecular weight information is known
835
836        Examples:
837            >>> dp = DataPoint(properties)
838            >>> dp.get_cantera_mole_fraction()
839            'H2:4.4400e-03, O2:5.5600e-03, Ar:9.9000e-01'
840            >>> species_conversion = {'H2': 'h2', 'O2': 'o2'}
841            >>> dp.get_cantera_mole_fraction(species_conversion)
842            'h2:4.4400e-03, o2:5.5600e-03, Ar:9.9000e-01'
843            >>> species_conversion = {'1S/H2/h1H': 'h2', '1S/O2/c1-2': 'o2'}
844            >>> dp.get_cantera_mole_fraction(species_conversion)
845            'h2:4.4400e-03, o2:5.5600e-03, Ar:9.9000e-01'
846        """
847        if self.composition_type == 'mass fraction':
848            raise ValueError('Cannot get mole fractions from the given composition.\n'
849                             '{}'.format(self.composition))
850        else:
851            return self.get_cantera_composition_string(species_conversion)
852
853    def get_cantera_mass_fraction(self, species_conversion=None):
854        """Get the mass fractions in a string format suitable for input to Cantera.
855
856        Arguments:
857            species_conversion (`dict`, optional): Mapping of species identifier to a
858                species name. This argument should be supplied when the name of the
859                species in the ChemKED YAML file does not match the name of the same
860                species in a chemical kinetic mechanism. The species identifier (the key
861                of the mapping) can be the name, InChI, or SMILES provided in the ChemKED
862                file, while the value associated with a key should be the desired name in
863                the Cantera format output string.
864
865        Returns:
866            `str`: String of mass fractions in the ``SPEC:AMT, SPEC:AMT`` format
867
868        Raises:
869            `ValueError`: If the composition type is ``'mole fraction'`` or
870                ``'mole percent'``, the conversion cannot be done because no molecular
871                weight information is known
872
873        Examples:
874            >>> dp = DataPoint(properties)
875            >>> dp.get_cantera_mass_fraction()
876            'H2:2.2525e-04, O2:4.4775e-03, Ar:9.9530e-01'
877            >>> species_conversion = {'H2': 'h2', 'O2': 'o2'}
878            >>> dp.get_cantera_mass_fraction(species_conversion)
879            'h2:2.2525e-04, o2:4.4775e-03, Ar:9.9530e-01'
880            >>> species_conversion = {'1S/H2/h1H': 'h2', '1S/O2/c1-2': 'o2'}
881            >>> dp.get_cantera_mass_fraction(species_conversion)
882            'h2:2.2525e-04, o2:4.4775e-03, Ar:9.9530e-01'
883        """
884        if self.composition_type in ['mole fraction', 'mole percent']:
885            raise ValueError('Cannot get mass fractions from the given composition.\n'
886                             '{}'.format(self.composition)
887                             )
888        else:
889            return self.get_cantera_composition_string(species_conversion)
890