1"""Module with converters from other formats. 2""" 3 4# Standard libraries 5import os 6from argparse import ArgumentParser 7from warnings import warn 8import xml.etree.ElementTree as etree 9 10from requests.exceptions import HTTPError, ConnectionError 11import habanero 12import pint 13 14# Local imports 15from .validation import yaml, property_units, crossref_api 16from .validation import units as unit_registry 17from ._version import __version__ 18from . import chemked 19 20# Valid properties for ReSpecTh dataGroup 21datagroup_properties = ['temperature', 'pressure', 'ignition delay', 22 'pressure rise', 23 ] 24"""`list`: Valid properties for a ReSpecTh dataGroup""" 25 26 27class ParseError(Exception): 28 """Base class for errors.""" 29 pass 30 31 32class KeywordError(ParseError): 33 """Raised for errors in keyword parsing.""" 34 35 def __init__(self, *keywords): 36 self.keywords = keywords 37 38 def __str__(self): 39 return repr('Error: {}.'.format(self.keywords[0])) 40 41 42class MissingElementError(KeywordError): 43 """Raised for missing required elements.""" 44 45 def __str__(self): 46 return repr('Error: required element {} is missing.'.format( 47 self.keywords[0])) 48 49 50class MissingAttributeError(KeywordError): 51 """Raised for missing required attribute.""" 52 53 def __str__(self): 54 return repr('Error: required attribute {} of {} is missing.'.format( 55 self.keywords[0], self.keywords[1]) 56 ) 57 58 59def get_file_metadata(root): 60 """Read and parse ReSpecTh XML file metadata (file author, version, etc.) 61 62 Args: 63 root (`~xml.etree.ElementTree.Element`): Root of ReSpecTh XML file 64 65 Returns: 66 properties (`dict`): Dictionary with file metadata 67 """ 68 properties = {} 69 70 file_author = getattr(root.find('fileAuthor'), 'text', False) 71 # Test for missing attribute or empty string in the same statement 72 if not file_author: 73 raise MissingElementError('fileAuthor') 74 else: 75 properties['file-authors'] = [{'name': file_author}] 76 77 # Default version is 0 for the ChemKED file 78 properties['file-version'] = 0 79 80 # Default ChemKED version 81 properties['chemked-version'] = __version__ 82 83 return properties 84 85 86def get_reference(root): 87 """Read reference info from root of ReSpecTh XML file. 88 89 Args: 90 root (`~xml.etree.ElementTree.Element`): Root of ReSpecTh XML file 91 92 Returns: 93 properties (`dict`): Dictionary with reference information 94 """ 95 reference = {} 96 elem = root.find('bibliographyLink') 97 if elem is None: 98 raise MissingElementError('bibliographyLink') 99 100 # Try to get reference info via DOI, fall back on preferredKey if necessary. 101 ref_doi = elem.get('doi', None) 102 ref_key = elem.get('preferredKey', None) 103 104 if ref_doi is not None: 105 try: 106 ref = crossref_api.works(ids=ref_doi)['message'] 107 except (HTTPError, habanero.RequestError, ConnectionError): 108 if ref_key is None: 109 raise KeywordError('DOI not found and preferredKey attribute not set') 110 else: 111 warn('Missing doi attribute in bibliographyLink or lookup failed. ' 112 'Setting "detail" key as a fallback; please update to the appropriate fields.' 113 ) 114 reference['detail'] = ref_key 115 if reference['detail'][-1] != '.': 116 reference['detail'] += '.' 117 else: 118 if ref_key is not None: 119 warn('Using DOI to obtain reference information, rather than preferredKey.') 120 reference['doi'] = elem.attrib['doi'] 121 # Now get elements of the reference data 122 # Assume that the reference returned by the DOI lookup always has a container-title 123 reference['journal'] = ref.get('container-title')[0] 124 ref_year = ref.get('published-print') or ref.get('published-online') 125 reference['year'] = int(ref_year['date-parts'][0][0]) 126 reference['volume'] = int(ref.get('volume')) 127 reference['pages'] = ref.get('page') 128 reference['authors'] = [] 129 for author in ref['author']: 130 auth = {} 131 auth['name'] = ' '.join([author['given'], author['family']]) 132 # Add ORCID if available 133 orcid = author.get('ORCID') 134 if orcid: 135 auth['ORCID'] = orcid.lstrip('http://orcid.org/') 136 reference['authors'].append(auth) 137 138 elif ref_key is not None: 139 warn('Missing doi attribute in bibliographyLink. ' 140 'Setting "detail" key as a fallback; please update to the appropriate fields.' 141 ) 142 reference['detail'] = ref_key 143 if reference['detail'][-1] != '.': 144 reference['detail'] += '.' 145 else: 146 # Need one of DOI or preferredKey 147 raise MissingAttributeError('preferredKey', 'bibliographyLink') 148 149 return reference 150 151 152def get_experiment_kind(root): 153 """Read common properties from root of ReSpecTh XML file. 154 155 Args: 156 root (`~xml.etree.ElementTree.Element`): Root of ReSpecTh XML file 157 158 Returns: 159 properties (`dict`): Dictionary with experiment type and apparatus information. 160 """ 161 properties = {} 162 if root.find('experimentType').text == 'Ignition delay measurement': 163 properties['experiment-type'] = 'ignition delay' 164 else: 165 raise NotImplementedError(root.find('experimentType').text + ' not (yet) supported') 166 167 properties['apparatus'] = {'kind': '', 'institution': '', 'facility': ''} 168 kind = getattr(root.find('apparatus/kind'), 'text', False) 169 # Test for missing attribute or empty string 170 if not kind: 171 raise MissingElementError('apparatus/kind') 172 elif kind in ['shock tube', 'rapid compression machine']: 173 properties['apparatus']['kind'] = kind 174 else: 175 raise NotImplementedError(kind + ' experiment not (yet) supported') 176 177 return properties 178 179 180def get_common_properties(root): 181 """Read common properties from root of ReSpecTh XML file. 182 183 Args: 184 root (`~xml.etree.ElementTree.Element`): Root of ReSpecTh XML file 185 186 Returns: 187 properties (`dict`): Dictionary with common properties 188 """ 189 properties = {} 190 191 for elem in root.iterfind('commonProperties/property'): 192 name = elem.attrib['name'] 193 194 if name == 'initial composition': 195 properties['composition'] = {'species': [], 'kind': None} 196 197 for child in elem.iter('component'): 198 spec = {} 199 spec['species-name'] = child.find('speciesLink').attrib['preferredKey'] 200 units = child.find('amount').attrib['units'] 201 202 # use InChI for unique species identifier (if present) 203 try: 204 spec['InChI'] = child.find('speciesLink').attrib['InChI'] 205 except KeyError: 206 # TODO: add InChI validator/search 207 warn('Missing InChI for species ' + spec['species-name']) 208 pass 209 210 # If mole or mass fraction, just set value 211 if units in ['mole fraction', 'mass fraction', 'mole percent']: 212 spec['amount'] = [float(child.find('amount').text)] 213 elif units == 'percent': 214 # assume this means mole percent 215 warn('Assuming percent in composition means mole percent') 216 spec['amount'] = [float(child.find('amount').text)] 217 units = 'mole percent' 218 elif units == 'ppm': 219 # assume molar ppm, convert to mole fraction 220 warn('Assuming molar ppm in composition and converting to mole fraction') 221 spec['amount'] = [float(child.find('amount').text) * 1.e-6] 222 units = 'mole fraction' 223 elif units == 'ppb': 224 # assume molar ppb, convert to mole fraction 225 warn('Assuming molar ppb in composition and converting to mole fraction') 226 spec['amount'] = [float(child.find('amount').text) * 1.e-9] 227 units = 'mole fraction' 228 else: 229 raise KeywordError('Composition units need to be one of: mole fraction, ' 230 'mass fraction, mole percent, percent, ppm, or ppb.' 231 ) 232 233 properties['composition']['species'].append(spec) 234 235 # check consistency of composition type 236 if properties['composition']['kind'] is None: 237 properties['composition']['kind'] = units 238 elif properties['composition']['kind'] != units: 239 raise KeywordError('composition units ' + units + 240 ' not consistent with ' + 241 properties['composition']['kind'] 242 ) 243 244 elif name in datagroup_properties: 245 field = name.replace(' ', '-') 246 units = elem.attrib['units'] 247 if units == 'Torr': 248 units = 'torr' 249 quantity = 1.0 * unit_registry(units) 250 try: 251 quantity.to(property_units[field]) 252 except pint.DimensionalityError: 253 raise KeywordError('units incompatible for property ' + name) 254 255 properties[field] = [' '.join([elem.find('value').text, units])] 256 257 else: 258 raise KeywordError('Property ' + name + ' not supported as common property') 259 260 return properties 261 262 263def get_ignition_type(root): 264 """Gets ignition type and target. 265 266 Args: 267 root (`~xml.etree.ElementTree.Element`): Root of ReSpecTh XML file 268 269 Returns: 270 properties (`dict`): Dictionary with ignition type/target information 271 """ 272 properties = {} 273 elem = root.find('ignitionType') 274 275 if elem is None: 276 raise MissingElementError('ignitionType') 277 elem = elem.attrib 278 279 if 'target' in elem: 280 ign_target = elem['target'].rstrip(';').upper() 281 else: 282 raise MissingAttributeError('target', 'ignitionType') 283 284 if 'type' in elem: 285 ign_type = elem['type'] 286 if ign_type == 'baseline max intercept from d/dt': 287 ign_type = 'd/dt max extrapolated' 288 else: 289 raise MissingAttributeError('type', 'ignitionType') 290 291 # ReSpecTh allows multiple ignition targets 292 if len(ign_target.split(';')) > 1: 293 raise NotImplementedError('Multiple ignition targets not supported.') 294 295 # Acceptable ignition targets include pressure, temperature, and species 296 # concentrations 297 if ign_target == 'OHEX': 298 ign_target = 'OH*' 299 elif ign_target == 'CHEX': 300 ign_target = 'CH*' 301 elif ign_target == 'P': 302 ign_target = 'pressure' 303 elif ign_target == 'T': 304 ign_target = 'temperature' 305 306 if ign_target not in ['pressure', 'temperature', 'OH', 'OH*', 'CH*', 'CH']: 307 raise KeywordError(ign_target + ' not valid ignition target') 308 309 if ign_type not in ['max', 'd/dt max', '1/2 max', 'min', 'd/dt max extrapolated']: 310 raise KeywordError(ign_type + ' not valid ignition type') 311 312 properties['type'] = ign_type 313 properties['target'] = ign_target 314 315 return properties 316 317 318def get_datapoints(root): 319 """Parse datapoints with ignition delay from file. 320 321 Args: 322 root (`~xml.etree.ElementTree.Element`): Root of ReSpecTh XML file 323 324 Returns: 325 properties (`dict`): Dictionary with ignition delay data 326 """ 327 # Shock tube experiment will have one data group, while RCM may have one 328 # or two (one for ignition delay, one for volume-history) 329 dataGroups = root.findall('dataGroup') 330 if not dataGroups: 331 raise MissingElementError('dataGroup') 332 333 # all situations will have main experimental data in first dataGroup 334 dataGroup = dataGroups[0] 335 property_id = {} 336 unit_id = {} 337 species_id = {} 338 # get properties of dataGroup 339 for prop in dataGroup.findall('property'): 340 unit_id[prop.attrib['id']] = prop.attrib['units'] 341 temp_prop = prop.attrib['name'] 342 if temp_prop not in datagroup_properties + ['composition']: 343 raise KeyError(temp_prop + ' not valid dataPoint property') 344 property_id[prop.attrib['id']] = temp_prop 345 346 if temp_prop == 'composition': 347 spec = {'species-name': prop.find('speciesLink').attrib['preferredKey']} 348 # use InChI for unique species identifier (if present) 349 try: 350 spec['InChI'] = prop.find('speciesLink').attrib['InChI'] 351 except KeyError: 352 # TODO: add InChI validator/search 353 warn('Missing InChI for species ' + spec['species-name']) 354 pass 355 species_id[prop.attrib['id']] = spec 356 357 if not property_id: 358 raise MissingElementError('property') 359 360 # now get data points 361 datapoints = [] 362 for dp in dataGroup.findall('dataPoint'): 363 datapoint = {} 364 if 'composition' in property_id.values(): 365 datapoint['composition'] = {'species': [], 'kind': None} 366 367 for val in dp: 368 # handle "regular" properties differently than composition 369 if property_id.get(val.tag) in datagroup_properties: 370 units = unit_id[val.tag] 371 if units == 'Torr': 372 units = 'torr' 373 datapoint[property_id[val.tag].replace(' ', '-')] = [val.text + ' ' + units] 374 elif property_id.get(val.tag) == 'composition': 375 spec = {} 376 spec['species-name'] = species_id[val.tag]['species-name'] 377 spec['InChI'] = species_id[val.tag].get('InChI') 378 379 units = unit_id[val.tag] 380 # If mole or mass fraction, just set value 381 if units in ['mole fraction', 'mass fraction', 'mole percent']: 382 spec['amount'] = [float(val.text)] 383 elif units == 'percent': 384 # assume this means mole percent 385 warn('Assuming percent in composition means mole percent') 386 spec['amount'] = [float(val.text)] 387 units = 'mole percent' 388 elif units == 'ppm': 389 # assume molar ppm, convert to mole fraction 390 warn('Assuming molar ppm in composition and converting to mole fraction') 391 spec['amount'] = [float(val.text) * 1.e-6] 392 units = 'mole fraction' 393 elif units == 'ppb': 394 # assume molar ppb, convert to mole fraction 395 warn('Assuming molar ppb in composition and converting to mole fraction') 396 spec['amount'] = [float(val.text) * 1.e-9] 397 units = 'mole fraction' 398 else: 399 raise KeywordError('composition units need to be one of: mole fraction, ' 400 'mass fraction, mole percent, percent, ppm, or ppb.' 401 ) 402 403 # check consistency of composition type 404 if datapoint['composition']['kind'] is None: 405 datapoint['composition']['kind'] = units 406 elif datapoint['composition']['kind'] != units: 407 raise KeywordError( 408 'composition units ' + units + 409 ' not consistent with ' + datapoint['composition']['kind'] 410 ) 411 412 datapoint['composition']['species'].append(spec) 413 else: 414 raise KeywordError('value missing from properties: ' + val.tag) 415 416 datapoints.append(datapoint) 417 418 if len(datapoints) == 0: 419 raise MissingElementError('dataPoint') 420 421 # ReSpecTh files can have other dataGroups with pressure, volume, or temperature histories 422 if len(dataGroups) > 1: 423 datapoints[0]['time-histories'] = [] 424 for dataGroup in dataGroups[1:]: 425 time_tag = None 426 quant_tags = [] 427 quant_dicts = [] 428 quant_types = [] 429 for prop in dataGroup.findall('property'): 430 if prop.attrib['name'] == 'time': 431 time_dict = {'units': prop.attrib['units'], 'column': 0} 432 time_tag = prop.attrib['id'] 433 elif prop.attrib['name'] in ['volume', 'temperature', 'pressure']: 434 quant_types.append(prop.attrib['name']) 435 quant_dicts.append({'units': prop.attrib['units'], 'column': 1}) 436 quant_tags.append(prop.attrib['id']) 437 else: 438 raise KeywordError('Only volume, temperature, pressure, and time are allowed ' 439 'in a time-history dataGroup.') 440 441 if time_tag is None or len(quant_tags) == 0: 442 raise KeywordError('Both time and quantity properties required for time-history.') 443 444 time_histories = [ 445 {'time': time_dict, 'quantity': q, 'type': t, 'values': []} 446 for (q, t) in zip(quant_dicts, quant_types) 447 ] 448 # collect volume-time history 449 for dp in dataGroup.findall('dataPoint'): 450 time = None 451 quants = {} 452 for val in dp: 453 if val.tag == time_tag: 454 time = float(val.text) 455 elif val.tag in quant_tags: 456 quant = float(val.text) 457 tag_idx = quant_tags.index(val.tag) 458 quant_type = quant_types[tag_idx] 459 quants[quant_type] = quant 460 else: 461 raise KeywordError('Value tag {} not found in dataGroup tags: ' 462 '{}'.format(val.tag, quant_tags)) 463 if time is None or len(quants) == 0: 464 raise KeywordError('Both time and quantity values required in each ' 465 'time-history dataPoint.') 466 for t in time_histories: 467 t['values'].append([time, quants[t['type']]]) 468 469 datapoints[0]['time-histories'].extend(time_histories) 470 471 return datapoints 472 473 474def ReSpecTh_to_ChemKED(filename_xml, file_author='', file_author_orcid='', *, validate=False): 475 """Convert ReSpecTh XML file to ChemKED-compliant dictionary. 476 477 Args: 478 filename_xml (`str`): Name of ReSpecTh XML file to be converted. 479 file_author (`str`, optional): Name to override original file author 480 file_author_orcid (`str`, optional): ORCID of file author 481 validate (`bool`, optional, keyword-only): Set to `True` to validate the resulting 482 property dictionary with `ChemKED`. Set to `False` if the file is being loaded and will 483 be validated at some other point before use. 484 """ 485 # get all information from XML file 486 tree = etree.parse(filename_xml) 487 root = tree.getroot() 488 489 # get file metadata 490 properties = get_file_metadata(root) 491 492 # get reference info 493 properties['reference'] = get_reference(root) 494 # Save name of original data filename 495 properties['reference']['detail'] = (properties['reference'].get('detail', '') + 496 'Converted from ReSpecTh XML file ' + 497 os.path.basename(filename_xml) 498 ) 499 500 # Ensure ignition delay, and get which kind of experiment 501 properties.update(get_experiment_kind(root)) 502 503 # Get properties shared across the file 504 properties['common-properties'] = get_common_properties(root) 505 506 # Determine definition of ignition delay 507 properties['common-properties']['ignition-type'] = get_ignition_type(root) 508 509 # Now parse ignition delay datapoints 510 properties['datapoints'] = get_datapoints(root) 511 512 # Ensure inclusion of pressure rise or volume history matches apparatus. 513 has_pres_rise = ('pressure-rise' in properties['common-properties'] or 514 any([True for dp in properties['datapoints'] if 'pressure-rise' in dp]) 515 ) 516 if has_pres_rise and properties['apparatus']['kind'] == 'rapid compression machine': 517 raise KeywordError('Pressure rise cannot be defined for RCM.') 518 519 has_vol_hist = any( 520 [t.get('type') == 'volume' for dp in properties['datapoints'] 521 for t in dp.get('time-histories', [{}])] 522 ) 523 if has_vol_hist and properties['apparatus']['kind'] == 'shock tube': 524 raise KeywordError('Volume history cannot be defined for shock tube.') 525 526 # add any additional file authors 527 if file_author_orcid and not file_author: 528 raise KeywordError('If file_author_orcid is specified, file_author must be as well') 529 530 if file_author: 531 temp_author = {'name': file_author} 532 if file_author_orcid: 533 temp_author['ORCID'] = file_author_orcid 534 properties['file-authors'].append(temp_author) 535 536 # Now go through datapoints and apply common properties 537 for idx in range(len(properties['datapoints'])): 538 for prop in properties['common-properties']: 539 properties['datapoints'][idx][prop] = properties['common-properties'][prop] 540 541 if validate: 542 chemked.ChemKED(dict_input=properties) 543 544 return properties 545 546 547def respth2ck(argv=None): 548 """Command-line entry point for converting a ReSpecTh XML file to a ChemKED YAML file. 549 """ 550 parser = ArgumentParser( 551 description='Convert a ReSpecTh XML file to a ChemKED YAML file.' 552 ) 553 parser.add_argument('-i', '--input', 554 type=str, 555 required=True, 556 help='Input filename (e.g., "file1.yaml")' 557 ) 558 parser.add_argument('-o', '--output', 559 type=str, 560 required=False, 561 default='', 562 help='Output filename (e.g., "file1.xml")' 563 ) 564 parser.add_argument('-fa', '--file-author', 565 dest='file_author', 566 type=str, 567 required=False, 568 default='', 569 help='File author name to override original' 570 ) 571 parser.add_argument('-fo', '--file-author-orcid', 572 dest='file_author_orcid', 573 type=str, 574 required=False, 575 default='', 576 help='File author ORCID' 577 ) 578 579 args = parser.parse_args(argv) 580 581 filename_ck = args.output 582 filename_xml = args.input 583 584 properties = ReSpecTh_to_ChemKED(filename_xml, args.file_author, args.file_author_orcid, 585 validate=True) 586 587 # set output filename and path 588 if not filename_ck: 589 filename_ck = os.path.join(os.path.dirname(filename_xml), 590 os.path.splitext(os.path.basename(filename_xml))[0] + '.yaml' 591 ) 592 593 with open(filename_ck, 'w') as outfile: 594 yaml.dump(properties, outfile, default_flow_style=False) 595 print('Converted to ' + filename_ck) 596 597 598def ck2respth(argv=None): 599 """Command-line entry point for converting a ChemKED YAML file to a ReSpecTh XML file. 600 """ 601 parser = ArgumentParser( 602 description='Convert a ChemKED YAML file to a ReSpecTh XML file.' 603 ) 604 parser.add_argument('-i', '--input', 605 type=str, 606 required=True, 607 help='Input filename (e.g., "file1.xml")' 608 ) 609 parser.add_argument('-o', '--output', 610 type=str, 611 required=False, 612 default='', 613 help='Output filename (e.g., "file1.yaml")' 614 ) 615 616 args = parser.parse_args(argv) 617 618 c = chemked.ChemKED(yaml_file=args.input) 619 c.convert_to_ReSpecTh(args.output) 620 621 622def main(argv=None): 623 """General function for converting between ReSpecTh and ChemKED files based on extension. 624 """ 625 parser = ArgumentParser( 626 description='Convert between ReSpecTh XML file and ChemKED YAML file ' 627 'automatically based on file extension.' 628 ) 629 parser.add_argument('-i', '--input', 630 type=str, 631 required=True, 632 help='Input filename (e.g., "file1.yaml" or "file2.xml")' 633 ) 634 parser.add_argument('-o', '--output', 635 type=str, 636 required=False, 637 default='', 638 help='Output filename (e.g., "file1.xml" or "file2.yaml")' 639 ) 640 parser.add_argument('-fa', '--file-author', 641 dest='file_author', 642 type=str, 643 required=False, 644 default='', 645 help='File author name to override original' 646 ) 647 parser.add_argument('-fo', '--file-author-orcid', 648 dest='file_author_orcid', 649 type=str, 650 required=False, 651 default='', 652 help='File author ORCID' 653 ) 654 655 args = parser.parse_args(argv) 656 657 if os.path.splitext(args.input)[1] == '.xml' and os.path.splitext(args.output)[1] == '.yaml': 658 respth2ck(['-i', args.input, '-o', args.output, '-fa', args.file_author, 659 '-fo', args.file_author_orcid]) 660 661 elif os.path.splitext(args.input)[1] == '.yaml' and os.path.splitext(args.output)[1] == '.xml': 662 c = chemked.ChemKED(yaml_file=args.input) 663 c.convert_to_ReSpecTh(args.output) 664 665 elif os.path.splitext(args.input)[1] == '.xml' and os.path.splitext(args.output)[1] == '.xml': 666 raise KeywordError('Cannot convert .xml to .xml') 667 668 elif os.path.splitext(args.input)[1] == '.yaml' and os.path.splitext(args.output)[1] == '.yaml': 669 raise KeywordError('Cannot convert .yaml to .yaml') 670 671 else: 672 raise KeywordError('Input/output args need to be .xml/.yaml') 673 674 675if __name__ == '__main__': 676 main() 677