1""" 2Main ChemKED module 3""" 4# Standard libraries 5from os.path import exists 6from collections import namedtuple 7from warnings import warn 8from copy import deepcopy 9import xml.etree.ElementTree as etree 10import xml.dom.minidom as minidom 11from itertools import chain 12 13import numpy as np 14 15# Local imports 16from .validation import schema, OurValidator, yaml, Q_ 17from .converters import datagroup_properties, ReSpecTh_to_ChemKED 18 19VolumeHistory = namedtuple('VolumeHistory', ['time', 'volume']) 20VolumeHistory.__doc__ = 'Time history of the volume in an RCM experiment. Deprecated, to be removed after PyKED 0.4' # noqa: E501 21VolumeHistory.time.__doc__ = '(`~numpy.ndarray`): the time during the experiment' 22VolumeHistory.volume.__doc__ = '(`~numpy.ndarray`): the volume during the experiment' 23 24TimeHistory = namedtuple('TimeHistory', ['time', 'quantity', 'type']) 25TimeHistory.__doc__ = 'Time history of the quantity in an RCM experiment' 26TimeHistory.time.__doc__ = '(`~numpy.ndarray`): the time during the experiment' 27TimeHistory.quantity.__doc__ = '(`~numpy.ndarray`): the quantity of interest during the experiment' 28TimeHistory.type.__doc__ = """\ 29(`str`): the type of time history represented. Possible options are: 30 31* volume 32* temperature 33* pressure 34* piston position 35* light emission 36* OH emission 37* absorption 38""" 39 40RCMData = namedtuple( 41 'RCMData', 42 ['compressed_pressure', 'compressed_temperature', 'compression_time', 'stroke', 43 'clearance', 'compression_ratio'] 44) 45RCMData.__doc__ = 'Data fields specific to rapid compression machine experiments' 46RCMData.compressed_pressure.__doc__ = '(`~pint.Quantity`) The pressure at the end of compression' 47RCMData.compressed_temperature.__doc__ = """\ 48(`~pint.Quantity`) The temperature at the end of compression""" 49RCMData.compression_time.__doc__ = '(`~pint.Quantity`) The duration of the compression stroke' 50RCMData.stroke.__doc__ = '(`~pint.Quantity`) The length of the stroke' 51RCMData.clearance.__doc__ = """\ 52(`~pint.Quantity`) The clearance between piston face and end wall at the end of compression""" 53RCMData.compression_ratio.__doc__ = '(`~pint.Quantity`) The volumetric compression ratio' 54 55Reference = namedtuple('Reference', 56 ['volume', 'journal', 'doi', 'authors', 'detail', 'year', 'pages']) 57Reference.__doc__ = 'Information about the article or report where the data can be found' 58Reference.volume.__doc__ = '(`str`) The journal volume' 59Reference.journal.__doc__ = '(`str`) The name of the journal' 60Reference.doi.__doc__ = '(`str`) The Digital Object Identifier of the article' 61Reference.authors.__doc__ = '(`list`) The list of authors of the article' 62Reference.detail.__doc__ = '(`str`) Detail about where the data can be found in the article' 63Reference.year.__doc__ = '(`str`) The year the article was published' 64Reference.pages.__doc__ = '(`str`) The pages in the journal where the article was published' 65 66Apparatus = namedtuple('Apparatus', ['kind', 'institution', 'facility']) 67Apparatus.__doc__ = 'Information about the experimental apparatus used to generate the data' 68Apparatus.kind.__doc__ = '(`str`) The kind of experimental apparatus' 69Apparatus.institution.__doc__ = '(`str`) The institution where the experiment is located' 70Apparatus.facility.__doc__ = '(`str`) The particular experimental facility at the location' 71 72Composition = namedtuple('Composition', 'species_name InChI SMILES atomic_composition amount') 73Composition.__doc__ = 'Detail of the initial composition of the mixture for the experiment' 74Composition.species_name.__doc__ = '(`str`) The name of the species' 75Composition.InChI.__doc__ = '(`str`) The InChI identifier for the species' 76Composition.SMILES.__doc__ = '(`str`) The SMILES identifier for the species' 77Composition.atomic_composition.__doc__ = '(`dict`) The atomic composition of the species' 78Composition.amount.__doc__ = '(`~pint.Quantity`) The amount of this species' 79 80 81class ChemKED(object): 82 """Main ChemKED class. 83 84 The ChemKED class stores information about the contents of a ChemKED database 85 file. It stores each datapoint associated with the database and provides access 86 the the reference information, versions, and file author. 87 88 Arguments: 89 yaml_file (`str`, optional): The filename of the YAML database in ChemKED format. 90 dict_input (`dict`, optional): A dictionary with the parsed ouput of YAML file in ChemKED 91 format. 92 skip_validation (`bool`, optional): Whether validation of the ChemKED should be done. Must 93 be supplied as a keyword-argument. 94 95 Attributes: 96 datapoints (`list`): List of `DataPoint` objects storing each datapoint in the database. 97 reference (`~collections.namedtuple`): Attributes include ``volume``, ``journal``, ``doi``, 98 ``authors``, ``detail``, ``year``, and ``pages`` describing the reference from which the 99 datapoints are derived. 100 apparatus (`~collections.namedtuple`): Attributes include ``kind`` of experimental 101 apparatus, and the ``institution`` and ``facility`` where the experimental apparatus is 102 located. 103 chemked_version (`str`): Version of the ChemKED database schema used in this file. 104 experiment_type (`str`): Type of exeperimental data contained in this database. 105 file_author (`dict`): Information about the author of the ChemKED database file. 106 file_version (`str`): Version of the ChemKED database file. 107 _properties (`dict`): Original dictionary read from ChemKED database file, meant for 108 internal use. 109 """ 110 def __init__(self, yaml_file=None, dict_input=None, *, skip_validation=False): 111 if yaml_file is not None: 112 with open(yaml_file, 'r') as f: 113 self._properties = yaml.safe_load(f) 114 elif dict_input is not None: 115 self._properties = dict_input 116 else: 117 raise NameError("ChemKED needs either a YAML filename or dictionary as input.") 118 119 if not skip_validation: 120 self.validate_yaml(self._properties) 121 122 self.datapoints = [] 123 for point in self._properties['datapoints']: 124 self.datapoints.append(DataPoint(point)) 125 126 self.reference = Reference( 127 volume=self._properties['reference'].get('volume'), 128 journal=self._properties['reference'].get('journal'), 129 doi=self._properties['reference'].get('doi'), 130 authors=self._properties['reference'].get('authors'), 131 detail=self._properties['reference'].get('detail'), 132 year=self._properties['reference'].get('year'), 133 pages=self._properties['reference'].get('pages'), 134 ) 135 136 self.apparatus = Apparatus( 137 kind=self._properties['apparatus'].get('kind'), 138 institution=self._properties['apparatus'].get('institution'), 139 facility=self._properties['apparatus'].get('facility'), 140 ) 141 142 for prop in ['chemked-version', 'experiment-type', 'file-authors', 'file-version']: 143 setattr(self, prop.replace('-', '_'), self._properties[prop]) 144 145 @classmethod 146 def from_respecth(cls, filename_xml, file_author='', file_author_orcid=''): 147 """Construct a ChemKED instance directly from a ReSpecTh file. 148 149 Arguments: 150 filename_xml (`str`): Filename of the ReSpecTh-formatted XML file to be imported 151 file_author (`str`, optional): File author to be added to the list generated from the 152 XML file 153 file_author_orcid (`str`, optional): ORCID for the file author being added to the list 154 of file authors 155 156 Returns: 157 `ChemKED`: Instance of the `ChemKED` class containing the data in ``filename_xml``. 158 159 Examples: 160 >>> ck = ChemKED.from_respecth('respecth_file.xml') 161 >>> ck = ChemKED.from_respecth('respecth_file.xml', file_author='Bryan W. Weber') 162 >>> ck = ChemKED.from_respecth('respecth_file.xml', file_author='Bryan W. Weber', 163 file_author_orcid='0000-0000-0000-0000') 164 """ 165 properties = ReSpecTh_to_ChemKED(filename_xml, file_author, file_author_orcid, 166 validate=False) 167 return cls(dict_input=properties) 168 169 def validate_yaml(self, properties): 170 """Validate the parsed YAML file for adherance to the ChemKED format. 171 172 Arguments: 173 properties (`dict`): Dictionary created from the parsed YAML file 174 175 Raises: 176 `ValueError`: If the YAML file cannot be validated, a `ValueError` is raised whose 177 string contains the errors that are present. 178 """ 179 validator = OurValidator(schema) 180 if not validator.validate(properties): 181 for key, value in validator.errors.items(): 182 if any(['unallowed value' in v for v in value]): 183 print(('{key} has an illegal value. Allowed values are {values} and are case ' 184 'sensitive.').format(key=key, values=schema[key]['allowed'])) 185 186 raise ValueError(validator.errors) 187 188 def get_dataframe(self, output_columns=None): 189 """Get a Pandas DataFrame of the datapoints in this instance. 190 191 Arguments: 192 output_columns (`list`, optional): List of strings specifying the columns to include 193 in the output DataFrame. The default is `None`, which outputs all of the 194 columns. Options include (not case sensitive): 195 196 * ``Temperature`` 197 * ``Pressure`` 198 * ``Ignition Delay`` 199 * ``Composition`` 200 * ``Equivalence Ratio`` 201 * ``Reference`` 202 * ``Apparatus`` 203 * ``Experiment Type`` 204 * ``File Author`` 205 * ``File Version`` 206 * ``ChemKED Version`` 207 208 In addition, specific fields from the ``Reference`` and ``Apparatus`` attributes can 209 be included by specifying the name after a colon. These options are: 210 211 * ``Reference:Volume`` 212 * ``Reference:Journal`` 213 * ``Reference:DOI`` 214 * ``Reference:Authors`` 215 * ``Reference:Detail`` 216 * ``Reference:Year`` 217 * ``Reference:Pages`` 218 * ``Apparatus:Kind`` 219 * ``Apparatus:Facility`` 220 * ``Apparatus:Institution`` 221 222 Only the first author is printed when ``Reference`` or ``Reference:Authors`` is 223 selected because the whole author list may be quite long. 224 225 Note: 226 If the Composition is selected as an output type, the composition specified in the 227 `DataPoint` is used. No attempt is made to convert to a consistent basis; mole fractions 228 will remain mole fractions, mass fractions will remain mass fractions, and mole percent 229 will remain mole percent. Therefore, it is possible to end up with more than one type of 230 composition specification in a given column. However, if the composition is included 231 in the resulting dataframe, the type of each composition will be specified by the "Kind" 232 field in each row. 233 234 Examples: 235 >>> df = ChemKED(yaml_file).get_dataframe() 236 >>> df = ChemKED(yaml_file).get_dataframe(['Temperature', 'Ignition Delay']) 237 238 Returns: 239 `~pandas.DataFrame`: Contains the information regarding each point in the ``datapoints`` 240 attribute 241 """ 242 import pandas as pd 243 244 valid_labels = [a.replace('_', ' ') for a in self.__dict__ 245 if not (a.startswith('__') or a.startswith('_')) 246 ] 247 valid_labels.remove('datapoints') 248 valid_labels.extend( 249 ['composition', 'ignition delay', 'temperature', 'pressure', 'equivalence ratio'] 250 ) 251 ref_index = valid_labels.index('reference') 252 valid_labels[ref_index:ref_index + 1] = ['reference:' + a for a in Reference._fields] 253 app_index = valid_labels.index('apparatus') 254 valid_labels[app_index:app_index + 1] = ['apparatus:' + a for a in Apparatus._fields] 255 species_list = list(set(chain(*[list(d.composition.keys()) for d in self.datapoints]))) 256 257 if output_columns is None or len(output_columns) == 0: 258 col_labels = valid_labels 259 comp_index = col_labels.index('composition') 260 col_labels[comp_index:comp_index + 1] = species_list + ['Composition:Kind'] 261 else: 262 output_columns = [a.lower() for a in output_columns] 263 col_labels = [] 264 for col in output_columns: 265 if col in valid_labels or col in ['reference', 'apparatus']: 266 col_labels.append(col) 267 else: 268 raise ValueError('{} is not a valid output column choice'.format(col)) 269 270 if 'composition' in col_labels: 271 comp_index = col_labels.index('composition') 272 col_labels[comp_index:comp_index + 1] = species_list + ['Composition:Kind'] 273 if 'reference' in col_labels: 274 ref_index = col_labels.index('reference') 275 col_labels[ref_index:ref_index + 1] = ['reference:' + a for a in Reference._fields] 276 if 'apparatus' in col_labels: 277 app_index = col_labels.index('apparatus') 278 col_labels[app_index:app_index + 1] = ['apparatus:' + a for a in Apparatus._fields] 279 280 data = [] 281 for d in self.datapoints: 282 row = [] 283 d_species = list(d.composition.keys()) 284 for col in col_labels: 285 if col in species_list: 286 if col in d_species: 287 row.append(d.composition[col].amount) 288 else: 289 row.append(Q_(0.0, 'dimensionless')) 290 elif 'reference' in col or 'apparatus' in col: 291 split_col = col.split(':') 292 if split_col[1] == 'authors': 293 row.append(getattr(getattr(self, split_col[0]), split_col[1])[0]['name']) 294 else: 295 row.append(getattr(getattr(self, split_col[0]), split_col[1])) 296 elif col in ['temperature', 'pressure', 'ignition delay', 'equivalence ratio']: 297 row.append(getattr(d, col.replace(' ', '_'))) 298 elif col == 'file authors': 299 row.append(getattr(self, col.replace(' ', '_'))[0]['name']) 300 elif col == 'Composition:Kind': 301 row.append(d.composition_type) 302 else: 303 row.append(getattr(self, col.replace(' ', '_'))) 304 data.append(row) 305 306 col_labels = [a.title() for a in col_labels] 307 columns = pd.Index(col_labels) 308 return pd.DataFrame(data=data, columns=columns) 309 310 def write_file(self, filename, *, overwrite=False): 311 """Write new ChemKED YAML file based on object. 312 313 Arguments: 314 filename (`str`): Filename for target YAML file 315 overwrite (`bool`, optional): Whether to overwrite file with given name if present. 316 Must be supplied as a keyword-argument. 317 318 Raises: 319 `NameError`: If ``filename`` is already present, and ``overwrite`` is not ``True``. 320 321 Example: 322 >>> dataset = ChemKED(yaml_file) 323 >>> dataset.write_file(new_yaml_file) 324 """ 325 # Ensure file isn't already present 326 if exists(filename) and not overwrite: 327 raise OSError(filename + ' already present. Specify "overwrite=True" ' 328 'to overwrite, or rename.' 329 ) 330 331 with open(filename, 'w') as yaml_file: 332 yaml.dump(self._properties, yaml_file) 333 334 def convert_to_ReSpecTh(self, filename): 335 """Convert ChemKED record to ReSpecTh XML file. 336 337 This converter uses common information in a ChemKED file to generate a 338 ReSpecTh XML file. Note that some information may be lost, as ChemKED stores 339 some additional attributes. 340 341 Arguments: 342 filename (`str`): Filename for output ReSpecTh XML file. 343 344 Example: 345 >>> dataset = ChemKED(yaml_file) 346 >>> dataset.convert_to_ReSpecTh(xml_file) 347 """ 348 root = etree.Element('experiment') 349 350 file_author = etree.SubElement(root, 'fileAuthor') 351 file_author.text = self.file_authors[0]['name'] 352 353 # right now ChemKED just uses an integer file version 354 file_version = etree.SubElement(root, 'fileVersion') 355 major_version = etree.SubElement(file_version, 'major') 356 major_version.text = str(self.file_version) 357 minor_version = etree.SubElement(file_version, 'minor') 358 minor_version.text = '0' 359 360 respecth_version = etree.SubElement(root, 'ReSpecThVersion') 361 major_version = etree.SubElement(respecth_version, 'major') 362 major_version.text = '1' 363 minor_version = etree.SubElement(respecth_version, 'minor') 364 minor_version.text = '0' 365 366 # Only ignition delay currently supported 367 exp = etree.SubElement(root, 'experimentType') 368 if self.experiment_type == 'ignition delay': 369 exp.text = 'Ignition delay measurement' 370 else: 371 raise NotImplementedError('Only ignition delay type supported for conversion.') 372 373 reference = etree.SubElement(root, 'bibliographyLink') 374 citation = '' 375 for author in self.reference.authors: 376 citation += author['name'] + ', ' 377 citation += (self.reference.journal + ' (' + str(self.reference.year) + ') ' + 378 str(self.reference.volume) + ':' + self.reference.pages + '. ' + 379 self.reference.detail 380 ) 381 reference.set('preferredKey', citation) 382 reference.set('doi', self.reference.doi) 383 384 apparatus = etree.SubElement(root, 'apparatus') 385 kind = etree.SubElement(apparatus, 'kind') 386 kind.text = self.apparatus.kind 387 388 common_properties = etree.SubElement(root, 'commonProperties') 389 # ChemKED objects have no common properties once loaded. Check for properties 390 # among datapoints that tend to be common 391 common = [] 392 composition = self.datapoints[0].composition 393 394 # Composition type *has* to be the same 395 composition_type = self.datapoints[0].composition_type 396 if not all(dp.composition_type == composition_type for dp in self.datapoints): 397 raise NotImplementedError('Error: ReSpecTh does not support varying composition ' 398 'type among datapoints.' 399 ) 400 401 if all([composition == dp.composition for dp in self.datapoints]): 402 # initial composition is common 403 common.append('composition') 404 prop = etree.SubElement(common_properties, 'property') 405 prop.set('name', 'initial composition') 406 407 for species_name, species in composition.items(): 408 component = etree.SubElement(prop, 'component') 409 species_link = etree.SubElement(component, 'speciesLink') 410 species_link.set('preferredKey', species_name) 411 if species.InChI is not None: 412 species_link.set('InChI', species.InChI) 413 414 amount = etree.SubElement(component, 'amount') 415 amount.set('units', composition_type) 416 amount.text = str(species.amount.magnitude) 417 418 # If multiple datapoints present, then find any common properties. If only 419 # one datapoint, then composition should be the only "common" property. 420 if len(self.datapoints) > 1: 421 for prop_name in datagroup_properties: 422 attribute = prop_name.replace(' ', '_') 423 quantities = [getattr(dp, attribute, False) for dp in self.datapoints] 424 425 # All quantities must have the property in question and all the 426 # values must be equal 427 if all(quantities) and quantities.count(quantities[0]) == len(quantities): 428 common.append(prop_name) 429 prop = etree.SubElement(common_properties, 'property') 430 prop.set('description', '') 431 prop.set('name', prop_name) 432 prop.set('units', str(quantities[0].units)) 433 434 value = etree.SubElement(prop, 'value') 435 value.text = str(quantities[0].magnitude) 436 437 # Ignition delay can't be common, unless only a single datapoint. 438 439 datagroup = etree.SubElement(root, 'dataGroup') 440 datagroup.set('id', 'dg1') 441 datagroup_link = etree.SubElement(datagroup, 'dataGroupLink') 442 datagroup_link.set('dataGroupID', '') 443 datagroup_link.set('dataPointID', '') 444 445 property_idx = {} 446 labels = {'temperature': 'T', 'pressure': 'P', 447 'ignition delay': 'tau', 'pressure rise': 'dP/dt', 448 } 449 450 for prop_name in datagroup_properties: 451 attribute = prop_name.replace(' ', '_') 452 # This can't be hasattr because properties are set to the value None 453 # if no value is specified in the file, so the attribute always exists 454 prop_indices = [i for i, dp in enumerate(self.datapoints) 455 if getattr(dp, attribute) is not None 456 ] 457 if prop_name in common or not prop_indices: 458 continue 459 460 prop = etree.SubElement(datagroup, 'property') 461 prop.set('description', '') 462 prop.set('name', prop_name) 463 units = str(getattr(self.datapoints[prop_indices[0]], attribute).units) 464 prop.set('units', units) 465 idx = 'x{}'.format(len(property_idx) + 1) 466 property_idx[idx] = {'name': prop_name, 'units': units} 467 prop.set('id', idx) 468 prop.set('label', labels[prop_name]) 469 470 # Need to handle datapoints with possibly different species in the initial composition 471 if 'composition' not in common: 472 for dp in self.datapoints: 473 for species in dp.composition.values(): 474 # Only add new property for species not already considered 475 has_spec = any([species.species_name in d.values() 476 for d in property_idx.values() 477 ]) 478 if not has_spec: 479 prop = etree.SubElement(datagroup, 'property') 480 prop.set('description', '') 481 482 idx = 'x{}'.format(len(property_idx) + 1) 483 property_idx[idx] = {'name': species.species_name} 484 prop.set('id', idx) 485 prop.set('label', '[' + species.species_name + ']') 486 prop.set('name', 'composition') 487 prop.set('units', self.datapoints[0].composition_type) 488 489 species_link = etree.SubElement(prop, 'speciesLink') 490 species_link.set('preferredKey', species.species_name) 491 if species.InChI is not None: 492 species_link.set('InChI', species.InChI) 493 494 for dp in self.datapoints: 495 datapoint = etree.SubElement(datagroup, 'dataPoint') 496 for idx, val in property_idx.items(): 497 # handle regular properties a bit differently than composition 498 if val['name'] in datagroup_properties: 499 value = etree.SubElement(datapoint, idx) 500 quantity = getattr(dp, val['name'].replace(' ', '_')).to(val['units']) 501 value.text = str(quantity.magnitude) 502 else: 503 # composition 504 for item in dp.composition.values(): 505 if item.species_name == val['name']: 506 value = etree.SubElement(datapoint, idx) 507 value.text = str(item.amount.magnitude) 508 509 # See https://stackoverflow.com/a/16097112 for the None.__ne__ 510 history_types = ['volume_history', 'temperature_history', 'pressure_history', 511 'piston_position_history', 'light_emission_history', 512 'OH_emission_history', 'absorption_history'] 513 time_histories = [getattr(dp, p) for dp in self.datapoints for p in history_types] 514 time_histories = list(filter(None.__ne__, time_histories)) 515 516 if len(self.datapoints) > 1 and len(time_histories) > 1: 517 raise NotImplementedError('Error: ReSpecTh files do not support multiple datapoints ' 518 'with a time history.') 519 elif len(time_histories) > 0: 520 for dg_idx, hist in enumerate(time_histories): 521 if hist.type not in ['volume', 'temperature', 'pressure']: 522 warn('The time-history type {} is not supported by ReSpecTh for ' 523 'ignition delay experiments'.format(hist.type)) 524 continue 525 526 datagroup = etree.SubElement(root, 'dataGroup') 527 datagroup.set('id', 'dg{}'.format(dg_idx)) 528 datagroup_link = etree.SubElement(datagroup, 'dataGroupLink') 529 datagroup_link.set('dataGroupID', '') 530 datagroup_link.set('dataPointID', '') 531 532 # Time history has two properties: time and quantity. 533 prop = etree.SubElement(datagroup, 'property') 534 prop.set('description', '') 535 prop.set('name', 'time') 536 prop.set('units', str(hist.time.units)) 537 time_idx = 'x{}'.format(len(property_idx) + 1) 538 property_idx[time_idx] = {'name': 'time'} 539 prop.set('id', time_idx) 540 prop.set('label', 't') 541 542 prop = etree.SubElement(datagroup, 'property') 543 prop.set('description', '') 544 prop.set('name', hist.type) 545 prop.set('units', str(hist.quantity.units)) 546 quant_idx = 'x{}'.format(len(property_idx) + 1) 547 property_idx[quant_idx] = {'name': hist.type} 548 prop.set('id', quant_idx) 549 prop.set('label', 'V') 550 551 for time, quantity in zip(hist.time, hist.quantity): 552 datapoint = etree.SubElement(datagroup, 'dataPoint') 553 value = etree.SubElement(datapoint, time_idx) 554 value.text = str(time.magnitude) 555 value = etree.SubElement(datapoint, quant_idx) 556 value.text = str(quantity.magnitude) 557 558 ign_types = [getattr(dp, 'ignition_type', False) for dp in self.datapoints] 559 # All datapoints must have the same ignition target and type 560 if all(ign_types) and ign_types.count(ign_types[0]) == len(ign_types): 561 # In ReSpecTh files all datapoints must share ignition type 562 ignition = etree.SubElement(root, 'ignitionType') 563 if ign_types[0]['target'] in ['pressure', 'temperature']: 564 ignition.set('target', ign_types[0]['target'][0].upper()) 565 else: 566 # options left are species 567 ignition.set('target', self.datapoints[0].ignition_type['target']) 568 if ign_types[0]['type'] == 'd/dt max extrapolated': 569 ignition.set('type', 'baseline max intercept from d/dt') 570 else: 571 ignition.set('type', self.datapoints[0].ignition_type['type']) 572 else: 573 raise NotImplementedError('Different ignition targets or types for multiple datapoints ' 574 'are not supported in ReSpecTh.') 575 576 et = etree.ElementTree(root) 577 et.write(filename, encoding='utf-8', xml_declaration=True) 578 579 # now do a "pretty" rewrite 580 xml = minidom.parse(filename) 581 xml_string = xml.toprettyxml(indent=' ') 582 with open(filename, 'w') as f: 583 f.write(xml_string) 584 585 print('Converted to ' + filename) 586 587 588class DataPoint(object): 589 """Class for a single datapoint. 590 591 The `DataPoint` class stores the information associated with a single data point in the dataset 592 parsed from the `ChemKED` YAML input. 593 594 Arguments: 595 properties (`dict`): Dictionary adhering to the ChemKED format for ``datapoints`` 596 597 Attributes: 598 composition (`list`): List of dictionaries representing the species and their quantities 599 ignition_delay (pint.Quantity): The ignition delay of the experiment 600 temperature (pint.Quantity): The temperature of the experiment 601 pressure (pint.Quantity): The pressure of the experiment 602 pressure_rise (pint.Quantity, optional): The amount of pressure rise during the induction 603 period of a shock tube experiment. 604 compression_time (pint.Quantity, optional): The compression time for an RCM experiment. 605 compressed_pressure (pint.Quantity, optional): The pressure at the end of compression for 606 an RCM experiment. 607 compressed_temperature (pint.Quantity, optional): The temperature at the end of compression 608 for an RCM experiment. 609 first_stage_ignition_delay (pint.Quantity, optional): The first stage ignition delay of the 610 experiment. 611 compression_time (pint.Quantity, optional): The compression time for an RCM experiment. 612 ignition_type (`dict`): Dictionary with the ignition target and type. 613 volume_history (`~collections.namedtuple`, optional): The volume history of the reactor 614 during an RCM experiment. 615 pressure_history (`~collections.namedtuple`, optional): The pressure history of the reactor 616 during an experiment. 617 temperature_history (`~collections.namedtuple`, optional): The temperature history of the 618 reactor during an experiment. 619 piston_position_history (`~collections.namedtuple`, optional): The piston position history 620 of the reactor during an RCM experiment. 621 light_emission_history (`~collections.namedtuple`, optional): The light emission history 622 of the reactor during an experiment. 623 OH_emission_history (`~collections.namedtuple`, optional): The OH emission history of the 624 reactor during an experiment. 625 absorption_history (`~collections.namedtuple`, optional): The absorption history of the 626 reactor during an experiment. 627 """ 628 value_unit_props = [ 629 'ignition-delay', 'first-stage-ignition-delay', 'temperature', 'pressure', 630 'pressure-rise', 631 ] 632 633 rcm_data_props = [ 634 'compressed-pressure', 'compressed-temperature', 'compression-time', 'stroke', 'clearance', 635 'compression-ratio' 636 ] 637 638 def __init__(self, properties): 639 for prop in self.value_unit_props: 640 if prop in properties: 641 quant = self.process_quantity(properties[prop]) 642 setattr(self, prop.replace('-', '_'), quant) 643 else: 644 setattr(self, prop.replace('-', '_'), None) 645 646 if 'rcm-data' in properties: 647 orig_rcm_data = properties['rcm-data'] 648 rcm_props = {} 649 for prop in self.rcm_data_props: 650 if prop in orig_rcm_data: 651 quant = self.process_quantity(orig_rcm_data[prop]) 652 rcm_props[prop.replace('-', '_')] = quant 653 else: 654 rcm_props[prop.replace('-', '_')] = None 655 self.rcm_data = RCMData(**rcm_props) 656 else: 657 self.rcm_data = None 658 659 self.composition_type = properties['composition']['kind'] 660 composition = {} 661 for species in properties['composition']['species']: 662 species_name = species['species-name'] 663 amount = self.process_quantity(species['amount']) 664 InChI = species.get('InChI') 665 SMILES = species.get('SMILES') 666 atomic_composition = species.get('atomic-composition') 667 composition[species_name] = Composition( 668 species_name=species_name, InChI=InChI, SMILES=SMILES, 669 atomic_composition=atomic_composition, amount=amount) 670 671 setattr(self, 'composition', composition) 672 673 self.equivalence_ratio = properties.get('equivalence-ratio') 674 self.ignition_type = deepcopy(properties.get('ignition-type')) 675 676 if 'time-histories' in properties and 'volume-history' in properties: 677 raise TypeError('time-histories and volume-history are mutually exclusive') 678 679 if 'time-histories' in properties: 680 for hist in properties['time-histories']: 681 if hasattr(self, '{}_history'.format(hist['type'].replace(' ', '_'))): 682 raise ValueError('Each history type may only be specified once. {} was ' 683 'specified multiple times'.format(hist['type'])) 684 time_col = hist['time']['column'] 685 time_units = hist['time']['units'] 686 quant_col = hist['quantity']['column'] 687 quant_units = hist['quantity']['units'] 688 if isinstance(hist['values'], list): 689 values = np.array(hist['values']) 690 else: 691 # Load the values from a file 692 values = np.genfromtxt(hist['values']['filename'], delimiter=',') 693 694 time_history = TimeHistory( 695 time=Q_(values[:, time_col], time_units), 696 quantity=Q_(values[:, quant_col], quant_units), 697 type=hist['type'], 698 ) 699 700 setattr(self, '{}_history'.format(hist['type'].replace(' ', '_')), time_history) 701 702 if 'volume-history' in properties: 703 warn('The volume-history field should be replaced by time-histories. ' 704 'volume-history will be removed after PyKED 0.4', 705 DeprecationWarning) 706 time_col = properties['volume-history']['time']['column'] 707 time_units = properties['volume-history']['time']['units'] 708 volume_col = properties['volume-history']['volume']['column'] 709 volume_units = properties['volume-history']['volume']['units'] 710 values = np.array(properties['volume-history']['values']) 711 self.volume_history = VolumeHistory( 712 time=Q_(values[:, time_col], time_units), 713 volume=Q_(values[:, volume_col], volume_units), 714 ) 715 716 history_types = ['volume', 'temperature', 'pressure', 'piston_position', 'light_emission', 717 'OH_emission', 'absorption'] 718 for h in history_types: 719 if not hasattr(self, '{}_history'.format(h)): 720 setattr(self, '{}_history'.format(h), None) 721 722 def process_quantity(self, properties): 723 """Process the uncertainty information from a given quantity and return it 724 """ 725 quant = Q_(properties[0]) 726 if len(properties) > 1: 727 unc = properties[1] 728 uncertainty = unc.get('uncertainty', False) 729 upper_uncertainty = unc.get('upper-uncertainty', False) 730 lower_uncertainty = unc.get('lower-uncertainty', False) 731 uncertainty_type = unc.get('uncertainty-type') 732 if uncertainty_type == 'relative': 733 if uncertainty: 734 quant = quant.plus_minus(float(uncertainty), relative=True) 735 elif upper_uncertainty and lower_uncertainty: 736 warn('Asymmetric uncertainties are not supported. The ' 737 'maximum of lower-uncertainty and upper-uncertainty ' 738 'has been used as the symmetric uncertainty.') 739 uncertainty = max(float(upper_uncertainty), float(lower_uncertainty)) 740 quant = quant.plus_minus(uncertainty, relative=True) 741 else: 742 raise ValueError('Either "uncertainty" or "upper-uncertainty" and ' 743 '"lower-uncertainty" need to be specified.') 744 elif uncertainty_type == 'absolute': 745 if uncertainty: 746 uncertainty = Q_(uncertainty) 747 quant = quant.plus_minus(uncertainty.to(quant.units).magnitude) 748 elif upper_uncertainty and lower_uncertainty: 749 warn('Asymmetric uncertainties are not supported. The ' 750 'maximum of lower-uncertainty and upper-uncertainty ' 751 'has been used as the symmetric uncertainty.') 752 uncertainty = max(Q_(upper_uncertainty), Q_(lower_uncertainty)) 753 quant = quant.plus_minus(uncertainty.to(quant.units).magnitude) 754 else: 755 raise ValueError('Either "uncertainty" or "upper-uncertainty" and ' 756 '"lower-uncertainty" need to be specified.') 757 else: 758 raise ValueError('uncertainty-type must be one of "absolute" or "relative"') 759 760 return quant 761 762 def get_cantera_composition_string(self, species_conversion=None): 763 """Get the composition in a string format suitable for input to Cantera. 764 765 Returns a formatted string no matter the type of composition. As such, this method 766 is not recommended for end users; instead, prefer the `get_cantera_mole_fraction` 767 or `get_cantera_mass_fraction` methods. 768 769 Arguments: 770 species_conversion (`dict`, optional): Mapping of species identifier to a 771 species name. This argument should be supplied when the name of the 772 species in the ChemKED YAML file does not match the name of the same 773 species in a chemical kinetic mechanism. The species identifier (the key 774 of the mapping) can be the name, InChI, or SMILES provided in the ChemKED 775 file, while the value associated with a key should be the desired name in 776 the Cantera format output string. 777 778 Returns: 779 `str`: String in the ``SPEC:AMT, SPEC:AMT`` format 780 781 Raises: 782 `ValueError`: If the composition type of the `DataPoint` is not one of 783 ``'mass fraction'``, ``'mole fraction'``, or ``'mole percent'`` 784 """ 785 if self.composition_type in ['mole fraction', 'mass fraction']: 786 factor = 1.0 787 elif self.composition_type == 'mole percent': 788 factor = 100.0 789 else: 790 raise ValueError('Unknown composition type: {}'.format(self.composition_type)) 791 792 if species_conversion is None: 793 comps = ['{!s}:{:.4e}'.format(c.species_name, 794 c.amount.magnitude/factor) for c in self.composition.values()] 795 else: 796 comps = [] 797 for c in self.composition.values(): 798 amount = c.amount.magnitude/factor 799 idents = [getattr(c, s, False) for s in ['species_name', 'InChI', 'SMILES']] 800 present = [i in species_conversion for i in idents] 801 if not any(present): 802 comps.append('{!s}:{:.4e}'.format(c.species_name, amount)) 803 else: 804 if len([i for i in present if i]) > 1: 805 raise ValueError('More than one conversion present for species {}'.format( 806 c.species_name)) 807 808 ident = idents[present.index(True)] 809 species_replacement_name = species_conversion.pop(ident) 810 comps.append('{!s}:{:.4e}'.format(species_replacement_name, amount)) 811 812 if len(species_conversion) > 0: 813 raise ValueError('Unknown species in conversion: {}'.format(species_conversion)) 814 815 return ', '.join(comps) 816 817 def get_cantera_mole_fraction(self, species_conversion=None): 818 """Get the mole fractions in a string format suitable for input to Cantera. 819 820 Arguments: 821 species_conversion (`dict`, optional): Mapping of species identifier to a 822 species name. This argument should be supplied when the name of the 823 species in the ChemKED YAML file does not match the name of the same 824 species in a chemical kinetic mechanism. The species identifier (the key 825 of the mapping) can be the name, InChI, or SMILES provided in the ChemKED 826 file, while the value associated with a key should be the desired name in 827 the Cantera format output string. 828 829 Returns: 830 `str`: String of mole fractions in the ``SPEC:AMT, SPEC:AMT`` format 831 832 Raises: 833 `ValueError`: If the composition type is ``'mass fraction'``, the conversion cannot 834 be done because no molecular weight information is known 835 836 Examples: 837 >>> dp = DataPoint(properties) 838 >>> dp.get_cantera_mole_fraction() 839 'H2:4.4400e-03, O2:5.5600e-03, Ar:9.9000e-01' 840 >>> species_conversion = {'H2': 'h2', 'O2': 'o2'} 841 >>> dp.get_cantera_mole_fraction(species_conversion) 842 'h2:4.4400e-03, o2:5.5600e-03, Ar:9.9000e-01' 843 >>> species_conversion = {'1S/H2/h1H': 'h2', '1S/O2/c1-2': 'o2'} 844 >>> dp.get_cantera_mole_fraction(species_conversion) 845 'h2:4.4400e-03, o2:5.5600e-03, Ar:9.9000e-01' 846 """ 847 if self.composition_type == 'mass fraction': 848 raise ValueError('Cannot get mole fractions from the given composition.\n' 849 '{}'.format(self.composition)) 850 else: 851 return self.get_cantera_composition_string(species_conversion) 852 853 def get_cantera_mass_fraction(self, species_conversion=None): 854 """Get the mass fractions in a string format suitable for input to Cantera. 855 856 Arguments: 857 species_conversion (`dict`, optional): Mapping of species identifier to a 858 species name. This argument should be supplied when the name of the 859 species in the ChemKED YAML file does not match the name of the same 860 species in a chemical kinetic mechanism. The species identifier (the key 861 of the mapping) can be the name, InChI, or SMILES provided in the ChemKED 862 file, while the value associated with a key should be the desired name in 863 the Cantera format output string. 864 865 Returns: 866 `str`: String of mass fractions in the ``SPEC:AMT, SPEC:AMT`` format 867 868 Raises: 869 `ValueError`: If the composition type is ``'mole fraction'`` or 870 ``'mole percent'``, the conversion cannot be done because no molecular 871 weight information is known 872 873 Examples: 874 >>> dp = DataPoint(properties) 875 >>> dp.get_cantera_mass_fraction() 876 'H2:2.2525e-04, O2:4.4775e-03, Ar:9.9530e-01' 877 >>> species_conversion = {'H2': 'h2', 'O2': 'o2'} 878 >>> dp.get_cantera_mass_fraction(species_conversion) 879 'h2:2.2525e-04, o2:4.4775e-03, Ar:9.9530e-01' 880 >>> species_conversion = {'1S/H2/h1H': 'h2', '1S/O2/c1-2': 'o2'} 881 >>> dp.get_cantera_mass_fraction(species_conversion) 882 'h2:2.2525e-04, o2:4.4775e-03, Ar:9.9530e-01' 883 """ 884 if self.composition_type in ['mole fraction', 'mole percent']: 885 raise ValueError('Cannot get mass fractions from the given composition.\n' 886 '{}'.format(self.composition) 887 ) 888 else: 889 return self.get_cantera_composition_string(species_conversion) 890