1"""Module with converters from other formats.
2"""
3
4# Standard libraries
5import os
6from argparse import ArgumentParser
7from warnings import warn
8import xml.etree.ElementTree as etree
9
10from requests.exceptions import HTTPError, ConnectionError
11import habanero
12import pint
13
14# Local imports
15from .validation import yaml, property_units, crossref_api
16from .validation import units as unit_registry
17from ._version import __version__
18from . import chemked
19
20# Valid properties for ReSpecTh dataGroup
21datagroup_properties = ['temperature', 'pressure', 'ignition delay',
22                        'pressure rise',
23                        ]
24"""`list`: Valid properties for a ReSpecTh dataGroup"""
25
26
27class ParseError(Exception):
28    """Base class for errors."""
29    pass
30
31
32class KeywordError(ParseError):
33    """Raised for errors in keyword parsing."""
34
35    def __init__(self, *keywords):
36        self.keywords = keywords
37
38    def __str__(self):
39        return repr('Error: {}.'.format(self.keywords[0]))
40
41
42class MissingElementError(KeywordError):
43    """Raised for missing required elements."""
44
45    def __str__(self):
46        return repr('Error: required element {} is missing.'.format(
47            self.keywords[0]))
48
49
50class MissingAttributeError(KeywordError):
51    """Raised for missing required attribute."""
52
53    def __str__(self):
54        return repr('Error: required attribute {} of {} is missing.'.format(
55            self.keywords[0], self.keywords[1])
56            )
57
58
59def get_file_metadata(root):
60    """Read and parse ReSpecTh XML file metadata (file author, version, etc.)
61
62    Args:
63        root (`~xml.etree.ElementTree.Element`): Root of ReSpecTh XML file
64
65    Returns:
66        properties (`dict`): Dictionary with file metadata
67    """
68    properties = {}
69
70    file_author = getattr(root.find('fileAuthor'), 'text', False)
71    # Test for missing attribute or empty string in the same statement
72    if not file_author:
73        raise MissingElementError('fileAuthor')
74    else:
75        properties['file-authors'] = [{'name': file_author}]
76
77    # Default version is 0 for the ChemKED file
78    properties['file-version'] = 0
79
80    # Default ChemKED version
81    properties['chemked-version'] = __version__
82
83    return properties
84
85
86def get_reference(root):
87    """Read reference info from root of ReSpecTh XML file.
88
89    Args:
90        root (`~xml.etree.ElementTree.Element`): Root of ReSpecTh XML file
91
92    Returns:
93        properties (`dict`): Dictionary with reference information
94    """
95    reference = {}
96    elem = root.find('bibliographyLink')
97    if elem is None:
98        raise MissingElementError('bibliographyLink')
99
100    # Try to get reference info via DOI, fall back on preferredKey if necessary.
101    ref_doi = elem.get('doi', None)
102    ref_key = elem.get('preferredKey', None)
103
104    if ref_doi is not None:
105        try:
106            ref = crossref_api.works(ids=ref_doi)['message']
107        except (HTTPError, habanero.RequestError, ConnectionError):
108            if ref_key is None:
109                raise KeywordError('DOI not found and preferredKey attribute not set')
110            else:
111                warn('Missing doi attribute in bibliographyLink or lookup failed. '
112                     'Setting "detail" key as a fallback; please update to the appropriate fields.'
113                     )
114                reference['detail'] = ref_key
115                if reference['detail'][-1] != '.':
116                    reference['detail'] += '.'
117        else:
118            if ref_key is not None:
119                warn('Using DOI to obtain reference information, rather than preferredKey.')
120            reference['doi'] = elem.attrib['doi']
121            # Now get elements of the reference data
122            # Assume that the reference returned by the DOI lookup always has a container-title
123            reference['journal'] = ref.get('container-title')[0]
124            ref_year = ref.get('published-print') or ref.get('published-online')
125            reference['year'] = int(ref_year['date-parts'][0][0])
126            reference['volume'] = int(ref.get('volume'))
127            reference['pages'] = ref.get('page')
128            reference['authors'] = []
129            for author in ref['author']:
130                auth = {}
131                auth['name'] = ' '.join([author['given'], author['family']])
132                # Add ORCID if available
133                orcid = author.get('ORCID')
134                if orcid:
135                    auth['ORCID'] = orcid.lstrip('http://orcid.org/')
136                reference['authors'].append(auth)
137
138    elif ref_key is not None:
139        warn('Missing doi attribute in bibliographyLink. '
140             'Setting "detail" key as a fallback; please update to the appropriate fields.'
141             )
142        reference['detail'] = ref_key
143        if reference['detail'][-1] != '.':
144            reference['detail'] += '.'
145    else:
146        # Need one of DOI or preferredKey
147        raise MissingAttributeError('preferredKey', 'bibliographyLink')
148
149    return reference
150
151
152def get_experiment_kind(root):
153    """Read common properties from root of ReSpecTh XML file.
154
155    Args:
156        root (`~xml.etree.ElementTree.Element`): Root of ReSpecTh XML file
157
158    Returns:
159        properties (`dict`): Dictionary with experiment type and apparatus information.
160    """
161    properties = {}
162    if root.find('experimentType').text == 'Ignition delay measurement':
163        properties['experiment-type'] = 'ignition delay'
164    else:
165        raise NotImplementedError(root.find('experimentType').text + ' not (yet) supported')
166
167    properties['apparatus'] = {'kind': '', 'institution': '', 'facility': ''}
168    kind = getattr(root.find('apparatus/kind'), 'text', False)
169    # Test for missing attribute or empty string
170    if not kind:
171        raise MissingElementError('apparatus/kind')
172    elif kind in ['shock tube', 'rapid compression machine']:
173        properties['apparatus']['kind'] = kind
174    else:
175        raise NotImplementedError(kind + ' experiment not (yet) supported')
176
177    return properties
178
179
180def get_common_properties(root):
181    """Read common properties from root of ReSpecTh XML file.
182
183    Args:
184        root (`~xml.etree.ElementTree.Element`): Root of ReSpecTh XML file
185
186    Returns:
187        properties (`dict`): Dictionary with common properties
188    """
189    properties = {}
190
191    for elem in root.iterfind('commonProperties/property'):
192        name = elem.attrib['name']
193
194        if name == 'initial composition':
195            properties['composition'] = {'species': [], 'kind': None}
196
197            for child in elem.iter('component'):
198                spec = {}
199                spec['species-name'] = child.find('speciesLink').attrib['preferredKey']
200                units = child.find('amount').attrib['units']
201
202                # use InChI for unique species identifier (if present)
203                try:
204                    spec['InChI'] = child.find('speciesLink').attrib['InChI']
205                except KeyError:
206                    # TODO: add InChI validator/search
207                    warn('Missing InChI for species ' + spec['species-name'])
208                    pass
209
210                # If mole or mass fraction, just set value
211                if units in ['mole fraction', 'mass fraction', 'mole percent']:
212                    spec['amount'] = [float(child.find('amount').text)]
213                elif units == 'percent':
214                    # assume this means mole percent
215                    warn('Assuming percent in composition means mole percent')
216                    spec['amount'] = [float(child.find('amount').text)]
217                    units = 'mole percent'
218                elif units == 'ppm':
219                    # assume molar ppm, convert to mole fraction
220                    warn('Assuming molar ppm in composition and converting to mole fraction')
221                    spec['amount'] = [float(child.find('amount').text) * 1.e-6]
222                    units = 'mole fraction'
223                elif units == 'ppb':
224                    # assume molar ppb, convert to mole fraction
225                    warn('Assuming molar ppb in composition and converting to mole fraction')
226                    spec['amount'] = [float(child.find('amount').text) * 1.e-9]
227                    units = 'mole fraction'
228                else:
229                    raise KeywordError('Composition units need to be one of: mole fraction, '
230                                       'mass fraction, mole percent, percent, ppm, or ppb.'
231                                       )
232
233                properties['composition']['species'].append(spec)
234
235                # check consistency of composition type
236                if properties['composition']['kind'] is None:
237                    properties['composition']['kind'] = units
238                elif properties['composition']['kind'] != units:
239                    raise KeywordError('composition units ' + units +
240                                       ' not consistent with ' +
241                                       properties['composition']['kind']
242                                       )
243
244        elif name in datagroup_properties:
245            field = name.replace(' ', '-')
246            units = elem.attrib['units']
247            if units == 'Torr':
248                units = 'torr'
249            quantity = 1.0 * unit_registry(units)
250            try:
251                quantity.to(property_units[field])
252            except pint.DimensionalityError:
253                raise KeywordError('units incompatible for property ' + name)
254
255            properties[field] = [' '.join([elem.find('value').text, units])]
256
257        else:
258            raise KeywordError('Property ' + name + ' not supported as common property')
259
260    return properties
261
262
263def get_ignition_type(root):
264    """Gets ignition type and target.
265
266    Args:
267        root (`~xml.etree.ElementTree.Element`): Root of ReSpecTh XML file
268
269    Returns:
270        properties (`dict`): Dictionary with ignition type/target information
271    """
272    properties = {}
273    elem = root.find('ignitionType')
274
275    if elem is None:
276        raise MissingElementError('ignitionType')
277    elem = elem.attrib
278
279    if 'target' in elem:
280        ign_target = elem['target'].rstrip(';').upper()
281    else:
282        raise MissingAttributeError('target', 'ignitionType')
283
284    if 'type' in elem:
285        ign_type = elem['type']
286        if ign_type == 'baseline max intercept from d/dt':
287            ign_type = 'd/dt max extrapolated'
288    else:
289        raise MissingAttributeError('type', 'ignitionType')
290
291    # ReSpecTh allows multiple ignition targets
292    if len(ign_target.split(';')) > 1:
293        raise NotImplementedError('Multiple ignition targets not supported.')
294
295    # Acceptable ignition targets include pressure, temperature, and species
296    # concentrations
297    if ign_target == 'OHEX':
298        ign_target = 'OH*'
299    elif ign_target == 'CHEX':
300        ign_target = 'CH*'
301    elif ign_target == 'P':
302        ign_target = 'pressure'
303    elif ign_target == 'T':
304        ign_target = 'temperature'
305
306    if ign_target not in ['pressure', 'temperature', 'OH', 'OH*', 'CH*', 'CH']:
307        raise KeywordError(ign_target + ' not valid ignition target')
308
309    if ign_type not in ['max', 'd/dt max', '1/2 max', 'min', 'd/dt max extrapolated']:
310        raise KeywordError(ign_type + ' not valid ignition type')
311
312    properties['type'] = ign_type
313    properties['target'] = ign_target
314
315    return properties
316
317
318def get_datapoints(root):
319    """Parse datapoints with ignition delay from file.
320
321    Args:
322        root (`~xml.etree.ElementTree.Element`): Root of ReSpecTh XML file
323
324    Returns:
325        properties (`dict`): Dictionary with ignition delay data
326    """
327    # Shock tube experiment will have one data group, while RCM may have one
328    # or two (one for ignition delay, one for volume-history)
329    dataGroups = root.findall('dataGroup')
330    if not dataGroups:
331        raise MissingElementError('dataGroup')
332
333    # all situations will have main experimental data in first dataGroup
334    dataGroup = dataGroups[0]
335    property_id = {}
336    unit_id = {}
337    species_id = {}
338    # get properties of dataGroup
339    for prop in dataGroup.findall('property'):
340        unit_id[prop.attrib['id']] = prop.attrib['units']
341        temp_prop = prop.attrib['name']
342        if temp_prop not in datagroup_properties + ['composition']:
343            raise KeyError(temp_prop + ' not valid dataPoint property')
344        property_id[prop.attrib['id']] = temp_prop
345
346        if temp_prop == 'composition':
347            spec = {'species-name': prop.find('speciesLink').attrib['preferredKey']}
348            # use InChI for unique species identifier (if present)
349            try:
350                spec['InChI'] = prop.find('speciesLink').attrib['InChI']
351            except KeyError:
352                # TODO: add InChI validator/search
353                warn('Missing InChI for species ' + spec['species-name'])
354                pass
355            species_id[prop.attrib['id']] = spec
356
357    if not property_id:
358        raise MissingElementError('property')
359
360    # now get data points
361    datapoints = []
362    for dp in dataGroup.findall('dataPoint'):
363        datapoint = {}
364        if 'composition' in property_id.values():
365            datapoint['composition'] = {'species': [], 'kind': None}
366
367        for val in dp:
368            # handle "regular" properties differently than composition
369            if property_id.get(val.tag) in datagroup_properties:
370                units = unit_id[val.tag]
371                if units == 'Torr':
372                    units = 'torr'
373                datapoint[property_id[val.tag].replace(' ', '-')] = [val.text + ' ' + units]
374            elif property_id.get(val.tag) == 'composition':
375                spec = {}
376                spec['species-name'] = species_id[val.tag]['species-name']
377                spec['InChI'] = species_id[val.tag].get('InChI')
378
379                units = unit_id[val.tag]
380                # If mole or mass fraction, just set value
381                if units in ['mole fraction', 'mass fraction', 'mole percent']:
382                    spec['amount'] = [float(val.text)]
383                elif units == 'percent':
384                    # assume this means mole percent
385                    warn('Assuming percent in composition means mole percent')
386                    spec['amount'] = [float(val.text)]
387                    units = 'mole percent'
388                elif units == 'ppm':
389                    # assume molar ppm, convert to mole fraction
390                    warn('Assuming molar ppm in composition and converting to mole fraction')
391                    spec['amount'] = [float(val.text) * 1.e-6]
392                    units = 'mole fraction'
393                elif units == 'ppb':
394                    # assume molar ppb, convert to mole fraction
395                    warn('Assuming molar ppb in composition and converting to mole fraction')
396                    spec['amount'] = [float(val.text) * 1.e-9]
397                    units = 'mole fraction'
398                else:
399                    raise KeywordError('composition units need to be one of: mole fraction, '
400                                       'mass fraction, mole percent, percent, ppm, or ppb.'
401                                       )
402
403                # check consistency of composition type
404                if datapoint['composition']['kind'] is None:
405                    datapoint['composition']['kind'] = units
406                elif datapoint['composition']['kind'] != units:
407                    raise KeywordError(
408                        'composition units ' + units +
409                        ' not consistent with ' + datapoint['composition']['kind']
410                        )
411
412                datapoint['composition']['species'].append(spec)
413            else:
414                raise KeywordError('value missing from properties: ' + val.tag)
415
416        datapoints.append(datapoint)
417
418    if len(datapoints) == 0:
419        raise MissingElementError('dataPoint')
420
421    # ReSpecTh files can have other dataGroups with pressure, volume, or temperature histories
422    if len(dataGroups) > 1:
423        datapoints[0]['time-histories'] = []
424        for dataGroup in dataGroups[1:]:
425            time_tag = None
426            quant_tags = []
427            quant_dicts = []
428            quant_types = []
429            for prop in dataGroup.findall('property'):
430                if prop.attrib['name'] == 'time':
431                    time_dict = {'units': prop.attrib['units'], 'column': 0}
432                    time_tag = prop.attrib['id']
433                elif prop.attrib['name'] in ['volume', 'temperature', 'pressure']:
434                    quant_types.append(prop.attrib['name'])
435                    quant_dicts.append({'units': prop.attrib['units'], 'column': 1})
436                    quant_tags.append(prop.attrib['id'])
437                else:
438                    raise KeywordError('Only volume, temperature, pressure, and time are allowed '
439                                       'in a time-history dataGroup.')
440
441            if time_tag is None or len(quant_tags) == 0:
442                raise KeywordError('Both time and quantity properties required for time-history.')
443
444            time_histories = [
445                {'time': time_dict, 'quantity': q, 'type': t, 'values': []}
446                for (q, t) in zip(quant_dicts, quant_types)
447            ]
448            # collect volume-time history
449            for dp in dataGroup.findall('dataPoint'):
450                time = None
451                quants = {}
452                for val in dp:
453                    if val.tag == time_tag:
454                        time = float(val.text)
455                    elif val.tag in quant_tags:
456                        quant = float(val.text)
457                        tag_idx = quant_tags.index(val.tag)
458                        quant_type = quant_types[tag_idx]
459                        quants[quant_type] = quant
460                    else:
461                        raise KeywordError('Value tag {} not found in dataGroup tags: '
462                                           '{}'.format(val.tag, quant_tags))
463                if time is None or len(quants) == 0:
464                    raise KeywordError('Both time and quantity values required in each '
465                                       'time-history dataPoint.')
466                for t in time_histories:
467                    t['values'].append([time, quants[t['type']]])
468
469            datapoints[0]['time-histories'].extend(time_histories)
470
471    return datapoints
472
473
474def ReSpecTh_to_ChemKED(filename_xml, file_author='', file_author_orcid='', *, validate=False):
475    """Convert ReSpecTh XML file to ChemKED-compliant dictionary.
476
477    Args:
478        filename_xml (`str`): Name of ReSpecTh XML file to be converted.
479        file_author (`str`, optional): Name to override original file author
480        file_author_orcid (`str`, optional): ORCID of file author
481        validate (`bool`, optional, keyword-only): Set to `True` to validate the resulting
482            property dictionary with `ChemKED`. Set to `False` if the file is being loaded and will
483            be validated at some other point before use.
484    """
485    # get all information from XML file
486    tree = etree.parse(filename_xml)
487    root = tree.getroot()
488
489    # get file metadata
490    properties = get_file_metadata(root)
491
492    # get reference info
493    properties['reference'] = get_reference(root)
494    # Save name of original data filename
495    properties['reference']['detail'] = (properties['reference'].get('detail', '') +
496                                         'Converted from ReSpecTh XML file ' +
497                                         os.path.basename(filename_xml)
498                                         )
499
500    # Ensure ignition delay, and get which kind of experiment
501    properties.update(get_experiment_kind(root))
502
503    # Get properties shared across the file
504    properties['common-properties'] = get_common_properties(root)
505
506    # Determine definition of ignition delay
507    properties['common-properties']['ignition-type'] = get_ignition_type(root)
508
509    # Now parse ignition delay datapoints
510    properties['datapoints'] = get_datapoints(root)
511
512    # Ensure inclusion of pressure rise or volume history matches apparatus.
513    has_pres_rise = ('pressure-rise' in properties['common-properties'] or
514                     any([True for dp in properties['datapoints'] if 'pressure-rise' in dp])
515                     )
516    if has_pres_rise and properties['apparatus']['kind'] == 'rapid compression machine':
517        raise KeywordError('Pressure rise cannot be defined for RCM.')
518
519    has_vol_hist = any(
520        [t.get('type') == 'volume' for dp in properties['datapoints']
521         for t in dp.get('time-histories', [{}])]
522    )
523    if has_vol_hist and properties['apparatus']['kind'] == 'shock tube':
524        raise KeywordError('Volume history cannot be defined for shock tube.')
525
526    # add any additional file authors
527    if file_author_orcid and not file_author:
528        raise KeywordError('If file_author_orcid is specified, file_author must be as well')
529
530    if file_author:
531        temp_author = {'name': file_author}
532        if file_author_orcid:
533            temp_author['ORCID'] = file_author_orcid
534        properties['file-authors'].append(temp_author)
535
536    # Now go through datapoints and apply common properties
537    for idx in range(len(properties['datapoints'])):
538        for prop in properties['common-properties']:
539            properties['datapoints'][idx][prop] = properties['common-properties'][prop]
540
541    if validate:
542        chemked.ChemKED(dict_input=properties)
543
544    return properties
545
546
547def respth2ck(argv=None):
548    """Command-line entry point for converting a ReSpecTh XML file to a ChemKED YAML file.
549    """
550    parser = ArgumentParser(
551        description='Convert a ReSpecTh XML file to a ChemKED YAML file.'
552        )
553    parser.add_argument('-i', '--input',
554                        type=str,
555                        required=True,
556                        help='Input filename (e.g., "file1.yaml")'
557                        )
558    parser.add_argument('-o', '--output',
559                        type=str,
560                        required=False,
561                        default='',
562                        help='Output filename (e.g., "file1.xml")'
563                        )
564    parser.add_argument('-fa', '--file-author',
565                        dest='file_author',
566                        type=str,
567                        required=False,
568                        default='',
569                        help='File author name to override original'
570                        )
571    parser.add_argument('-fo', '--file-author-orcid',
572                        dest='file_author_orcid',
573                        type=str,
574                        required=False,
575                        default='',
576                        help='File author ORCID'
577                        )
578
579    args = parser.parse_args(argv)
580
581    filename_ck = args.output
582    filename_xml = args.input
583
584    properties = ReSpecTh_to_ChemKED(filename_xml, args.file_author, args.file_author_orcid,
585                                     validate=True)
586
587    # set output filename and path
588    if not filename_ck:
589        filename_ck = os.path.join(os.path.dirname(filename_xml),
590                                   os.path.splitext(os.path.basename(filename_xml))[0] + '.yaml'
591                                   )
592
593    with open(filename_ck, 'w') as outfile:
594        yaml.dump(properties, outfile, default_flow_style=False)
595    print('Converted to ' + filename_ck)
596
597
598def ck2respth(argv=None):
599    """Command-line entry point for converting a ChemKED YAML file to a ReSpecTh XML file.
600    """
601    parser = ArgumentParser(
602        description='Convert a ChemKED YAML file to a ReSpecTh XML file.'
603        )
604    parser.add_argument('-i', '--input',
605                        type=str,
606                        required=True,
607                        help='Input filename (e.g., "file1.xml")'
608                        )
609    parser.add_argument('-o', '--output',
610                        type=str,
611                        required=False,
612                        default='',
613                        help='Output filename (e.g., "file1.yaml")'
614                        )
615
616    args = parser.parse_args(argv)
617
618    c = chemked.ChemKED(yaml_file=args.input)
619    c.convert_to_ReSpecTh(args.output)
620
621
622def main(argv=None):
623    """General function for converting between ReSpecTh and ChemKED files based on extension.
624    """
625    parser = ArgumentParser(
626        description='Convert between ReSpecTh XML file and ChemKED YAML file '
627                    'automatically based on file extension.'
628        )
629    parser.add_argument('-i', '--input',
630                        type=str,
631                        required=True,
632                        help='Input filename (e.g., "file1.yaml" or "file2.xml")'
633                        )
634    parser.add_argument('-o', '--output',
635                        type=str,
636                        required=False,
637                        default='',
638                        help='Output filename (e.g., "file1.xml" or "file2.yaml")'
639                        )
640    parser.add_argument('-fa', '--file-author',
641                        dest='file_author',
642                        type=str,
643                        required=False,
644                        default='',
645                        help='File author name to override original'
646                        )
647    parser.add_argument('-fo', '--file-author-orcid',
648                        dest='file_author_orcid',
649                        type=str,
650                        required=False,
651                        default='',
652                        help='File author ORCID'
653                        )
654
655    args = parser.parse_args(argv)
656
657    if os.path.splitext(args.input)[1] == '.xml' and os.path.splitext(args.output)[1] == '.yaml':
658        respth2ck(['-i', args.input, '-o', args.output, '-fa', args.file_author,
659                   '-fo', args.file_author_orcid])
660
661    elif os.path.splitext(args.input)[1] == '.yaml' and os.path.splitext(args.output)[1] == '.xml':
662        c = chemked.ChemKED(yaml_file=args.input)
663        c.convert_to_ReSpecTh(args.output)
664
665    elif os.path.splitext(args.input)[1] == '.xml' and os.path.splitext(args.output)[1] == '.xml':
666        raise KeywordError('Cannot convert .xml to .xml')
667
668    elif os.path.splitext(args.input)[1] == '.yaml' and os.path.splitext(args.output)[1] == '.yaml':
669        raise KeywordError('Cannot convert .yaml to .yaml')
670
671    else:
672        raise KeywordError('Input/output args need to be .xml/.yaml')
673
674
675if __name__ == '__main__':
676    main()
677