1#
2#  Copyright (C) 2018 Codethink Limited
3#
4#  This program is free software; you can redistribute it and/or
5#  modify it under the terms of the GNU Lesser General Public
6#  License as published by the Free Software Foundation; either
7#  version 2 of the License, or (at your option) any later version.
8#
9#  This library is distributed in the hope that it will be useful,
10#  but WITHOUT ANY WARRANTY; without even the implied warranty of
11#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the GNU
12#  Lesser General Public License for more details.
13#
14#  You should have received a copy of the GNU Lesser General Public
15#  License along with this library. If not, see <http://www.gnu.org/licenses/>.
16#
17#  Authors:
18#        Tristan Van Berkom <tristan.vanberkom@codethink.co.uk>
19
20import sys
21import collections
22import string
23from copy import deepcopy
24from contextlib import ExitStack
25from pathlib import Path
26
27from ruamel import yaml
28from ruamel.yaml.representer import SafeRepresenter, RoundTripRepresenter
29from ruamel.yaml.constructor import RoundTripConstructor
30from ._exceptions import LoadError, LoadErrorReason
31
32# This overrides the ruamel constructor to treat everything as a string
33RoundTripConstructor.add_constructor(u'tag:yaml.org,2002:int', RoundTripConstructor.construct_yaml_str)
34RoundTripConstructor.add_constructor(u'tag:yaml.org,2002:float', RoundTripConstructor.construct_yaml_str)
35
36# We store information in the loaded yaml on a DictProvenance
37# stored in all dictionaries under this key
38PROVENANCE_KEY = '__bst_provenance_info'
39
40
41# Provides information about file for provenance
42#
43# Args:
44#    name (str): Full path to the file
45#    shortname (str): Relative path to the file
46#    project (Project): Project where the shortname is relative from
47class ProvenanceFile():
48    def __init__(self, name, shortname, project):
49        self.name = name
50        self.shortname = shortname
51        self.project = project
52
53
54# Provenance tracks the origin of a given node in the parsed dictionary.
55#
56# Args:
57#   node (dict, list, value): A binding to the originally parsed value
58#   filename (string): The filename the node was loaded from
59#   toplevel (dict): The toplevel of the loaded file, suitable for later dumps
60#   line (int): The line number where node was parsed
61#   col (int): The column number where node was parsed
62#
63class Provenance():
64    def __init__(self, filename, node, toplevel, line=0, col=0):
65        self.filename = filename
66        self.node = node
67        self.toplevel = toplevel
68        self.line = line
69        self.col = col
70
71    # Convert a Provenance to a string for error reporting
72    def __str__(self):
73        filename = self.filename.shortname
74        if self.filename.project and self.filename.project.junction:
75            filename = "{}:{}".format(self.filename.project.junction.name, self.filename.shortname)
76
77        return "{} [line {:d} column {:d}]".format(filename, self.line, self.col)
78
79    # Abstract method
80    def clone(self):
81        pass  # pragma: nocover
82
83
84# A Provenance for dictionaries, these are stored in the copy of the
85# loaded YAML tree and track the provenance of all members
86#
87class DictProvenance(Provenance):
88    def __init__(self, filename, node, toplevel, line=None, col=None):
89
90        if line is None or col is None:
91            # Special case for loading an empty dict
92            if hasattr(node, 'lc'):
93                line = node.lc.line + 1
94                col = node.lc.col
95            else:
96                line = 1
97                col = 0
98
99        super(DictProvenance, self).__init__(filename, node, toplevel, line=line, col=col)
100
101        self.members = {}
102
103    def clone(self):
104        provenance = DictProvenance(self.filename, self.node, self.toplevel,
105                                    line=self.line, col=self.col)
106
107        provenance.members = {
108            member_name: member.clone()
109            for member_name, member in self.members.items()
110        }
111        return provenance
112
113
114# A Provenance for dict members
115#
116class MemberProvenance(Provenance):
117    def __init__(self, filename, parent_dict, member_name, toplevel,
118                 node=None, line=None, col=None):
119
120        if parent_dict is not None:
121            node = parent_dict[member_name]
122            line, col = parent_dict.lc.value(member_name)
123            line += 1
124
125        super(MemberProvenance, self).__init__(
126            filename, node, toplevel, line=line, col=col)
127
128        # Only used if member is a list
129        self.elements = []
130
131    def clone(self):
132        provenance = MemberProvenance(self.filename, None, None, self.toplevel,
133                                      node=self.node, line=self.line, col=self.col)
134        provenance.elements = [e.clone() for e in self.elements]
135        return provenance
136
137
138# A Provenance for list elements
139#
140class ElementProvenance(Provenance):
141    def __init__(self, filename, parent_list, index, toplevel,
142                 node=None, line=None, col=None):
143
144        if parent_list is not None:
145            node = parent_list[index]
146            line, col = parent_list.lc.item(index)
147            line += 1
148
149        super(ElementProvenance, self).__init__(
150            filename, node, toplevel, line=line, col=col)
151
152        # Only used if element is a list
153        self.elements = []
154
155    def clone(self):
156        provenance = ElementProvenance(self.filename, None, None, self.toplevel,
157                                       node=self.node, line=self.line, col=self.col)
158
159        provenance.elements = [e.clone for e in self.elements]
160        return provenance
161
162
163# These exceptions are intended to be caught entirely within
164# the BuildStream framework, hence they do not reside in the
165# public exceptions.py
166class CompositeError(Exception):
167    def __init__(self, path, message):
168        super(CompositeError, self).__init__(message)
169        self.path = path
170
171
172class CompositeTypeError(CompositeError):
173    def __init__(self, path, expected_type, actual_type):
174        super(CompositeTypeError, self).__init__(
175            path,
176            "Error compositing dictionary key '{}', expected source type '{}' "
177            "but received type '{}'"
178            .format(path, expected_type.__name__, actual_type.__name__))
179        self.expected_type = expected_type
180        self.actual_type = actual_type
181
182
183# Loads a dictionary from some YAML
184#
185# Args:
186#    filename (str): The YAML file to load
187#    shortname (str): The filename in shorthand for error reporting (or None)
188#    copy_tree (bool): Whether to make a copy, preserving the original toplevels
189#                      for later serialization
190#
191# Returns (dict): A loaded copy of the YAML file with provenance information
192#
193# Raises: LoadError
194#
195def load(filename, shortname=None, copy_tree=False, *, project=None):
196    if not shortname:
197        shortname = filename
198
199    file = ProvenanceFile(filename, shortname, project)
200
201    try:
202        with open(filename) as f:
203            return load_data(f, file, copy_tree=copy_tree)
204    except FileNotFoundError as e:
205        raise LoadError(LoadErrorReason.MISSING_FILE,
206                        "Could not find file at {}".format(filename)) from e
207    except IsADirectoryError as e:
208        raise LoadError(LoadErrorReason.LOADING_DIRECTORY,
209                        "{} is a directory. bst command expects a .bst file."
210                        .format(filename)) from e
211
212
213# Like load(), but doesnt require the data to be in a file
214#
215def load_data(data, file=None, copy_tree=False):
216
217    try:
218        contents = yaml.load(data, yaml.loader.RoundTripLoader, preserve_quotes=True)
219    except (yaml.scanner.ScannerError, yaml.composer.ComposerError, yaml.parser.ParserError) as e:
220        raise LoadError(LoadErrorReason.INVALID_YAML,
221                        "Malformed YAML:\n\n{}\n\n{}\n".format(e.problem, e.problem_mark)) from e
222
223    if not isinstance(contents, dict):
224        # Special case allowance for None, when the loaded file has only comments in it.
225        if contents is None:
226            contents = {}
227        else:
228            raise LoadError(LoadErrorReason.INVALID_YAML,
229                            "YAML file has content of type '{}' instead of expected type 'dict': {}"
230                            .format(type(contents).__name__, file.name))
231
232    return node_decorated_copy(file, contents, copy_tree=copy_tree)
233
234
235# Dumps a previously loaded YAML node to a file
236#
237# Args:
238#    node (dict): A node previously loaded with _yaml.load() above
239#    filename (str): The YAML file to load
240#
241def dump(node, filename=None):
242    with ExitStack() as stack:
243        if filename:
244            from . import utils
245            f = stack.enter_context(utils.save_file_atomic(filename, 'w'))
246        else:
247            f = sys.stdout
248        yaml.round_trip_dump(node, f)
249
250
251# node_decorated_copy()
252#
253# Create a copy of a loaded dict tree decorated with Provenance
254# information, used directly after loading yaml
255#
256# Args:
257#    filename (str): The filename
258#    toplevel (node): The toplevel dictionary node
259#    copy_tree (bool): Whether to load a copy and preserve the original
260#
261# Returns: A copy of the toplevel decorated with Provinance
262#
263def node_decorated_copy(filename, toplevel, copy_tree=False):
264    if copy_tree:
265        result = deepcopy(toplevel)
266    else:
267        result = toplevel
268
269    node_decorate_dict(filename, result, toplevel, toplevel)
270
271    return result
272
273
274def node_decorate_dict(filename, target, source, toplevel):
275    provenance = DictProvenance(filename, source, toplevel)
276    target[PROVENANCE_KEY] = provenance
277
278    for key, value in node_items(source):
279        member = MemberProvenance(filename, source, key, toplevel)
280        provenance.members[key] = member
281
282        target_value = target.get(key)
283        if isinstance(value, collections.Mapping):
284            node_decorate_dict(filename, target_value, value, toplevel)
285        elif isinstance(value, list):
286            member.elements = node_decorate_list(filename, target_value, value, toplevel)
287
288
289def node_decorate_list(filename, target, source, toplevel):
290
291    elements = []
292
293    for item in source:
294        idx = source.index(item)
295        target_item = target[idx]
296        element = ElementProvenance(filename, source, idx, toplevel)
297
298        if isinstance(item, collections.Mapping):
299            node_decorate_dict(filename, target_item, item, toplevel)
300        elif isinstance(item, list):
301            element.elements = node_decorate_list(filename, target_item, item, toplevel)
302
303        elements.append(element)
304
305    return elements
306
307
308# node_get_provenance()
309#
310# Gets the provenance for a node
311#
312# Args:
313#   node (dict): a dictionary
314#   key (str): key in the dictionary
315#   indices (list of indexes): Index path, in the case of list values
316#
317# Returns: The Provenance of the dict, member or list element
318#
319def node_get_provenance(node, key=None, indices=None):
320
321    provenance = node.get(PROVENANCE_KEY)
322    if provenance and key:
323        provenance = provenance.members.get(key)
324        if provenance and indices is not None:
325            for index in indices:
326                provenance = provenance.elements[index]
327
328    return provenance
329
330
331# Helper to use utils.sentinel without unconditional utils import,
332# which causes issues for completion.
333#
334# Local private, but defined here because sphinx appears to break if
335# it's not defined before any functions calling it in default kwarg
336# values.
337#
338def _get_sentinel():
339    from .utils import _sentinel
340    return _sentinel
341
342
343# node_get()
344#
345# Fetches a value from a dictionary node and checks it for
346# an expected value. Use default_value when parsing a value
347# which is only optionally supplied.
348#
349# Args:
350#    node (dict): The dictionary node
351#    expected_type (type): The expected type for the value being searched
352#    key (str): The key to get a value for in node
353#    indices (list of ints): Optionally decend into lists of lists
354#
355# Returns:
356#    The value if found in node, otherwise default_value is returned
357#
358# Raises:
359#    LoadError, when the value found is not of the expected type
360#
361# Note:
362#    Returned strings are stripped of leading and trailing whitespace
363#
364def node_get(node, expected_type, key, indices=None, default_value=_get_sentinel()):
365    value = node.get(key, default_value)
366    provenance = node_get_provenance(node)
367    if value is _get_sentinel():
368        raise LoadError(LoadErrorReason.INVALID_DATA,
369                        "{}: Dictionary did not contain expected key '{}'".format(provenance, key))
370
371    path = key
372    if indices is not None:
373        # Implied type check of the element itself
374        value = node_get(node, list, key)
375        for index in indices:
376            value = value[index]
377            path += '[{:d}]'.format(index)
378
379    # We want to allow None as a valid value for any type
380    if value is None:
381        return None
382
383    if not isinstance(value, expected_type):
384        # Attempt basic conversions if possible, typically we want to
385        # be able to specify numeric values and convert them to strings,
386        # but we dont want to try converting dicts/lists
387        try:
388            if (expected_type == bool and isinstance(value, str)):
389                # Dont coerce booleans to string, this makes "False" strings evaluate to True
390                if value == 'true' or value == 'True':
391                    value = True
392                elif value == 'false' or value == 'False':
393                    value = False
394                else:
395                    raise ValueError()
396            elif not (expected_type == list or
397                      expected_type == dict or
398                      isinstance(value, (list, dict))):
399                value = expected_type(value)
400            else:
401                raise ValueError()
402        except (ValueError, TypeError):
403            provenance = node_get_provenance(node, key=key, indices=indices)
404            raise LoadError(LoadErrorReason.INVALID_DATA,
405                            "{}: Value of '{}' is not of the expected type '{}'"
406                            .format(provenance, path, expected_type.__name__))
407
408    # Trim it at the bud, let all loaded strings from yaml be stripped of whitespace
409    if isinstance(value, str):
410        value = value.strip()
411
412    return value
413
414
415# node_get_project_path()
416#
417# Fetches a project path from a dictionary node and validates it
418#
419# Paths are asserted to never lead to a directory outside of the project
420# directory. In addition, paths can not point to symbolic links, fifos,
421# sockets and block/character devices.
422#
423# The `check_is_file` and `check_is_dir` parameters can be used to
424# perform additional validations on the path. Note that an exception
425# will always be raised if both parameters are set to ``True``.
426#
427# Args:
428#    node (dict): A dictionary loaded from YAML
429#    key (str): The key whose value contains a path to validate
430#    project_dir (str): The project directory
431#    check_is_file (bool): If ``True`` an error will also be raised
432#                          if path does not point to a regular file.
433#                          Defaults to ``False``
434#    check_is_dir (bool): If ``True`` an error will be also raised
435#                         if path does not point to a directory.
436#                         Defaults to ``False``
437# Returns:
438#    (str): The project path
439#
440# Raises:
441#    (LoadError): In case that the project path is not valid or does not
442#                 exist
443#
444def node_get_project_path(node, key, project_dir, *,
445                          check_is_file=False, check_is_dir=False):
446    path_str = node_get(node, str, key)
447    path = Path(path_str)
448    project_dir_path = Path(project_dir)
449
450    provenance = node_get_provenance(node, key=key)
451
452    if (project_dir_path / path).is_symlink():
453        raise LoadError(LoadErrorReason.PROJ_PATH_INVALID_KIND,
454                        "{}: Specified path '{}' must not point to "
455                        "symbolic links "
456                        .format(provenance, path_str))
457
458    if path.parts and path.parts[0] == '..':
459        raise LoadError(LoadErrorReason.PROJ_PATH_INVALID,
460                        "{}: Specified path '{}' first component must "
461                        "not be '..'"
462                        .format(provenance, path_str))
463
464    try:
465        if sys.version_info[0] == 3 and sys.version_info[1] < 6:
466            full_resolved_path = (project_dir_path / path).resolve()
467        else:
468            full_resolved_path = (project_dir_path / path).resolve(strict=True)
469    except FileNotFoundError:
470        raise LoadError(LoadErrorReason.MISSING_FILE,
471                        "{}: Specified path '{}' does not exist"
472                        .format(provenance, path_str))
473
474    is_inside = project_dir_path.resolve() in full_resolved_path.parents or (
475        full_resolved_path == project_dir_path)
476
477    if path.is_absolute() or not is_inside:
478        raise LoadError(LoadErrorReason.PROJ_PATH_INVALID,
479                        "{}: Specified path '{}' must not lead outside of the "
480                        "project directory"
481                        .format(provenance, path_str))
482
483    if full_resolved_path.is_socket() or (
484            full_resolved_path.is_fifo() or
485            full_resolved_path.is_block_device()):
486        raise LoadError(LoadErrorReason.PROJ_PATH_INVALID_KIND,
487                        "{}: Specified path '{}' points to an unsupported "
488                        "file kind"
489                        .format(provenance, path_str))
490
491    if check_is_file and not full_resolved_path.is_file():
492        raise LoadError(LoadErrorReason.PROJ_PATH_INVALID_KIND,
493                        "{}: Specified path '{}' is not a regular file"
494                        .format(provenance, path_str))
495
496    if check_is_dir and not full_resolved_path.is_dir():
497        raise LoadError(LoadErrorReason.PROJ_PATH_INVALID_KIND,
498                        "{}: Specified path '{}' is not a directory"
499                        .format(provenance, path_str))
500
501    return path_str
502
503
504# node_items()
505#
506# A convenience generator for iterating over loaded key/value
507# tuples in a dictionary loaded from project YAML.
508#
509# Args:
510#    node (dict): The dictionary node
511#
512# Yields:
513#    (str): The key name
514#    (anything): The value for the key
515#
516def node_items(node):
517    for key, value in node.items():
518        if key == PROVENANCE_KEY:
519            continue
520        yield (key, value)
521
522
523# Gives a node a dummy provenance, in case of compositing dictionaries
524# where the target is an empty {}
525def ensure_provenance(node):
526    provenance = node.get(PROVENANCE_KEY)
527    if not provenance:
528        provenance = DictProvenance(ProvenanceFile('', '', None), node, node)
529    node[PROVENANCE_KEY] = provenance
530
531    return provenance
532
533
534# is_ruamel_str():
535#
536# Args:
537#    value: A value loaded from ruamel
538#
539# This returns if the value is "stringish", since ruamel
540# has some complex types to represent strings, this is needed
541# to avoid compositing exceptions in order to allow various
542# string types to be interchangable and acceptable
543#
544def is_ruamel_str(value):
545
546    if isinstance(value, str):
547        return True
548    elif isinstance(value, yaml.scalarstring.ScalarString):
549        return True
550
551    return False
552
553
554# is_composite_list
555#
556# Checks if the given node is a Mapping with array composition
557# directives.
558#
559# Args:
560#    node (value): Any node
561#
562# Returns:
563#    (bool): True if node was a Mapping containing only
564#            list composition directives
565#
566# Raises:
567#    (LoadError): If node was a mapping and contained a mix of
568#                 list composition directives and other keys
569#
570def is_composite_list(node):
571
572    if isinstance(node, collections.Mapping):
573        has_directives = False
574        has_keys = False
575
576        for key, _ in node_items(node):
577            if key in ['(>)', '(<)', '(=)']:  # pylint: disable=simplifiable-if-statement
578                has_directives = True
579            else:
580                has_keys = True
581
582            if has_keys and has_directives:
583                provenance = node_get_provenance(node)
584                raise LoadError(LoadErrorReason.INVALID_DATA,
585                                "{}: Dictionary contains array composition directives and arbitrary keys"
586                                .format(provenance))
587        return has_directives
588
589    return False
590
591
592# composite_list_prepend
593#
594# Internal helper for list composition
595#
596# Args:
597#    target_node (dict): A simple dictionary
598#    target_key (dict): The key indicating a literal array to prepend to
599#    source_node (dict): Another simple dictionary
600#    source_key (str): The key indicating an array to prepend to the target
601#
602# Returns:
603#    (bool): True if a source list was found and compositing occurred
604#
605def composite_list_prepend(target_node, target_key, source_node, source_key):
606
607    source_list = node_get(source_node, list, source_key, default_value=[])
608    if not source_list:
609        return False
610
611    target_provenance = node_get_provenance(target_node)
612    source_provenance = node_get_provenance(source_node)
613
614    if target_node.get(target_key) is None:
615        target_node[target_key] = []
616
617    source_list = list_chain_copy(source_list)
618    target_list = target_node[target_key]
619
620    for element in reversed(source_list):
621        target_list.insert(0, element)
622
623    if not target_provenance.members.get(target_key):
624        target_provenance.members[target_key] = source_provenance.members[source_key].clone()
625    else:
626        for p in reversed(source_provenance.members[source_key].elements):
627            target_provenance.members[target_key].elements.insert(0, p.clone())
628
629    return True
630
631
632# composite_list_append
633#
634# Internal helper for list composition
635#
636# Args:
637#    target_node (dict): A simple dictionary
638#    target_key (dict): The key indicating a literal array to append to
639#    source_node (dict): Another simple dictionary
640#    source_key (str): The key indicating an array to append to the target
641#
642# Returns:
643#    (bool): True if a source list was found and compositing occurred
644#
645def composite_list_append(target_node, target_key, source_node, source_key):
646
647    source_list = node_get(source_node, list, source_key, default_value=[])
648    if not source_list:
649        return False
650
651    target_provenance = node_get_provenance(target_node)
652    source_provenance = node_get_provenance(source_node)
653
654    if target_node.get(target_key) is None:
655        target_node[target_key] = []
656
657    source_list = list_chain_copy(source_list)
658    target_list = target_node[target_key]
659
660    target_list.extend(source_list)
661
662    if not target_provenance.members.get(target_key):
663        target_provenance.members[target_key] = source_provenance.members[source_key].clone()
664    else:
665        target_provenance.members[target_key].elements.extend([
666            p.clone() for p in source_provenance.members[source_key].elements
667        ])
668
669    return True
670
671
672# composite_list_overwrite
673#
674# Internal helper for list composition
675#
676# Args:
677#    target_node (dict): A simple dictionary
678#    target_key (dict): The key indicating a literal array to overwrite
679#    source_node (dict): Another simple dictionary
680#    source_key (str): The key indicating an array to overwrite the target with
681#
682# Returns:
683#    (bool): True if a source list was found and compositing occurred
684#
685def composite_list_overwrite(target_node, target_key, source_node, source_key):
686
687    # We need to handle the legitimate case of overwriting a list with an empty
688    # list, hence the slightly odd default_value of [None] rather than [].
689    source_list = node_get(source_node, list, source_key, default_value=[None])
690    if source_list == [None]:
691        return False
692
693    target_provenance = node_get_provenance(target_node)
694    source_provenance = node_get_provenance(source_node)
695
696    target_node[target_key] = list_chain_copy(source_list)
697    target_provenance.members[target_key] = source_provenance.members[source_key].clone()
698
699    return True
700
701
702# composite_list():
703#
704# Composite the source value onto the target value, if either
705# sides are lists, or dictionaries containing list compositing directives
706#
707# Args:
708#    target_node (dict): A simple dictionary
709#    source_node (dict): Another simple dictionary
710#    key (str): The key to compose on
711#
712# Returns:
713#    (bool): True if both sides were logical lists
714#
715# Raises:
716#    (LoadError): If one side was a logical list and the other was not
717#
718def composite_list(target_node, source_node, key):
719    target_value = target_node.get(key)
720    source_value = source_node[key]
721
722    target_key_provenance = node_get_provenance(target_node, key)
723    source_key_provenance = node_get_provenance(source_node, key)
724
725    # Whenever a literal list is encountered in the source, it
726    # overwrites the target values and provenance completely.
727    #
728    if isinstance(source_value, list):
729
730        source_provenance = node_get_provenance(source_node)
731        target_provenance = node_get_provenance(target_node)
732
733        # Assert target type
734        if not (target_value is None or
735                isinstance(target_value, list) or
736                is_composite_list(target_value)):
737            raise LoadError(LoadErrorReason.INVALID_DATA,
738                            "{}: List cannot overwrite value at: {}"
739                            .format(source_key_provenance, target_key_provenance))
740
741        composite_list_overwrite(target_node, key, source_node, key)
742        return True
743
744    # When a composite list is encountered in the source, then
745    # multiple outcomes can occur...
746    #
747    elif is_composite_list(source_value):
748
749        # If there is nothing there, then the composite list
750        # is copied in it's entirety as is, and preserved
751        # for later composition
752        #
753        if target_value is None:
754            source_provenance = node_get_provenance(source_node)
755            target_provenance = node_get_provenance(target_node)
756
757            target_node[key] = node_chain_copy(source_value)
758            target_provenance.members[key] = source_provenance.members[key].clone()
759
760        # If the target is a literal list, then composition
761        # occurs directly onto that target, leaving the target
762        # as a literal list to overwrite anything in later composition
763        #
764        elif isinstance(target_value, list):
765            composite_list_overwrite(target_node, key, source_value, '(=)')
766            composite_list_prepend(target_node, key, source_value, '(<)')
767            composite_list_append(target_node, key, source_value, '(>)')
768
769        # If the target is a composite list, then composition
770        # occurs in the target composite list, and the composite
771        # target list is preserved in dictionary form for further
772        # composition.
773        #
774        elif is_composite_list(target_value):
775
776            if composite_list_overwrite(target_value, '(=)', source_value, '(=)'):
777
778                # When overwriting a target with composition directives, remove any
779                # existing prepend/append directives in the target before adding our own
780                target_provenance = node_get_provenance(target_value)
781
782                for directive in ['(<)', '(>)']:
783                    try:
784                        del target_value[directive]
785                        del target_provenance.members[directive]
786                    except KeyError:
787                        # Ignore errors from deletion of non-existing keys
788                        pass
789
790            # Prepend to the target prepend array, and append to the append array
791            composite_list_prepend(target_value, '(<)', source_value, '(<)')
792            composite_list_append(target_value, '(>)', source_value, '(>)')
793
794        else:
795            raise LoadError(LoadErrorReason.INVALID_DATA,
796                            "{}: List cannot overwrite value at: {}"
797                            .format(source_key_provenance, target_key_provenance))
798
799        # We handled list composition in some way
800        return True
801
802    # Source value was not a logical list
803    return False
804
805
806# composite_dict():
807#
808# Composites values in target with values from source
809#
810# Args:
811#    target (dict): A simple dictionary
812#    source (dict): Another simple dictionary
813#
814# Raises: CompositeError
815#
816# Unlike the dictionary update() method, nested values in source
817# will not obsolete entire subdictionaries in target, instead both
818# dictionaries will be recursed and a composition of both will result
819#
820# This is useful for overriding configuration files and element
821# configurations.
822#
823def composite_dict(target, source, path=None):
824    target_provenance = ensure_provenance(target)
825    source_provenance = ensure_provenance(source)
826
827    for key, source_value in node_items(source):
828
829        # Track the full path of keys, only for raising CompositeError
830        if path:
831            thispath = path + '.' + key
832        else:
833            thispath = key
834
835        # Handle list composition separately
836        if composite_list(target, source, key):
837            continue
838
839        target_value = target.get(key)
840
841        if isinstance(source_value, collections.Mapping):
842
843            # Handle creating new dicts on target side
844            if target_value is None:
845                target_value = {}
846                target[key] = target_value
847
848                # Give the new dict provenance
849                value_provenance = source_value.get(PROVENANCE_KEY)
850                if value_provenance:
851                    target_value[PROVENANCE_KEY] = value_provenance.clone()
852
853                # Add a new provenance member element to the containing dict
854                target_provenance.members[key] = source_provenance.members[key]
855
856            if not isinstance(target_value, collections.Mapping):
857                raise CompositeTypeError(thispath, type(target_value), type(source_value))
858
859            # Recurse into matching dictionary
860            composite_dict(target_value, source_value, path=thispath)
861
862        else:
863
864            if target_value is not None:
865
866                # Exception here: depending on how strings were declared ruamel may
867                # use a different type, but for our purposes, any stringish type will do.
868                if not (is_ruamel_str(source_value) and is_ruamel_str(target_value)) \
869                   and not isinstance(source_value, type(target_value)):
870                    raise CompositeTypeError(thispath, type(target_value), type(source_value))
871
872            # Overwrite simple values, lists and mappings have already been handled
873            target_provenance.members[key] = source_provenance.members[key].clone()
874            target[key] = source_value
875
876
877# Like composite_dict(), but raises an all purpose LoadError for convenience
878#
879def composite(target, source):
880    assert hasattr(source, 'get')
881
882    source_provenance = node_get_provenance(source)
883    try:
884        composite_dict(target, source)
885    except CompositeTypeError as e:
886        error_prefix = ""
887        if source_provenance:
888            error_prefix = "{}: ".format(source_provenance)
889        raise LoadError(LoadErrorReason.ILLEGAL_COMPOSITE,
890                        "{}Expected '{}' type for configuration '{}', instead received '{}'"
891                        .format(error_prefix,
892                                e.expected_type.__name__,
893                                e.path,
894                                e.actual_type.__name__)) from e
895
896
897# SanitizedDict is an OrderedDict that is dumped as unordered mapping.
898# This provides deterministic output for unordered mappings.
899#
900class SanitizedDict(collections.OrderedDict):
901    pass
902
903
904RoundTripRepresenter.add_representer(SanitizedDict,
905                                     SafeRepresenter.represent_dict)
906
907
908# node_sanitize()
909#
910# Returnes an alphabetically ordered recursive copy
911# of the source node with internal provenance information stripped.
912#
913# Only dicts are ordered, list elements are left in order.
914#
915def node_sanitize(node):
916
917    if isinstance(node, collections.Mapping):
918
919        result = SanitizedDict()
920
921        key_list = [key for key, _ in node_items(node)]
922        for key in sorted(key_list):
923            result[key] = node_sanitize(node[key])
924
925        return result
926
927    elif isinstance(node, list):
928        return [node_sanitize(elt) for elt in node]
929
930    return node
931
932
933# node_validate()
934#
935# Validate the node so as to ensure the user has not specified
936# any keys which are unrecognized by buildstream (usually this
937# means a typo which would otherwise not trigger an error).
938#
939# Args:
940#    node (dict): A dictionary loaded from YAML
941#    valid_keys (list): A list of valid keys for the specified node
942#
943# Raises:
944#    LoadError: In the case that the specified node contained
945#               one or more invalid keys
946#
947def node_validate(node, valid_keys):
948
949    # Probably the fastest way to do this: https://stackoverflow.com/a/23062482
950    valid_keys = set(valid_keys)
951    valid_keys.add(PROVENANCE_KEY)
952    invalid = next((key for key in node if key not in valid_keys), None)
953
954    if invalid:
955        provenance = node_get_provenance(node, key=invalid)
956        raise LoadError(LoadErrorReason.INVALID_DATA,
957                        "{}: Unexpected key: {}".format(provenance, invalid))
958
959
960# ChainMap
961#
962# This is a derivative of collections.ChainMap(), but supports
963# explicit deletions of keys.
964#
965# The purpose of this is to create a virtual copy-on-write
966# copy of a dictionary, so that mutating it in any way does
967# not effect the underlying dictionaries.
968#
969# collections.ChainMap covers this already mostly, but fails
970# to record internal state so as to hide keys which have been
971# explicitly deleted.
972#
973class ChainMap(collections.ChainMap):
974
975    def __init__(self, *maps):
976        super().__init__(*maps)
977        self.__deletions = set()
978
979    def __getitem__(self, key):
980
981        # Honor deletion state of 'key'
982        if key in self.__deletions:
983            return self.__missing__(key)
984
985        return super().__getitem__(key)
986
987    def __len__(self):
988        return len(set().union(*self.maps) - self.__deletions)
989
990    def __iter__(self):
991        return iter(set().union(*self.maps) - self.__deletions)
992
993    def __contains__(self, key):
994        if key in self.__deletions:
995            return False
996        return any(key in m for m in self.maps)
997
998    def __bool__(self):
999        # Attempt to preserve 'any' optimization
1000        any_keys = any(self.maps)
1001
1002        # Something existed, try again with deletions subtracted
1003        if any_keys:
1004            return any(set().union(*self.maps) - self.__deletions)
1005
1006        return False
1007
1008    def __setitem__(self, key, value):
1009        self.__deletions.discard(key)
1010        super().__setitem__(key, value)
1011
1012    def __delitem__(self, key):
1013        if key in self.__deletions:
1014            raise KeyError('Key was already deleted from this mapping: {!r}'.format(key))
1015
1016        # Ignore KeyError if it's not in the first map, just save the deletion state
1017        try:
1018            super().__delitem__(key)
1019        except KeyError:
1020            pass
1021
1022        # Store deleted state
1023        self.__deletions.add(key)
1024
1025    def popitem(self):
1026        poppable = set().union(*self.maps) - self.__deletions
1027        for key in poppable:
1028            return self.pop(key)
1029
1030        raise KeyError('No keys found.')
1031
1032    __marker = object()
1033
1034    def pop(self, key, default=__marker):
1035        # Reimplement MutableMapping's behavior here
1036        try:
1037            value = self[key]
1038        except KeyError:
1039            if default is self.__marker:
1040                raise
1041            return default
1042        else:
1043            del self[key]
1044            return value
1045
1046    def clear(self):
1047        clearable = set().union(*self.maps) - self.__deletions
1048        for key in clearable:
1049            del self[key]
1050
1051
1052def node_chain_copy(source):
1053    copy = ChainMap({}, source)
1054    for key, value in source.items():
1055        if isinstance(value, collections.Mapping):
1056            copy[key] = node_chain_copy(value)
1057        elif isinstance(value, list):
1058            copy[key] = list_chain_copy(value)
1059        elif isinstance(value, Provenance):
1060            copy[key] = value.clone()
1061
1062    return copy
1063
1064
1065def list_chain_copy(source):
1066    copy = []
1067    for item in source:
1068        if isinstance(item, collections.Mapping):
1069            copy.append(node_chain_copy(item))
1070        elif isinstance(item, list):
1071            copy.append(list_chain_copy(item))
1072        elif isinstance(item, Provenance):
1073            copy.append(item.clone())
1074        else:
1075            copy.append(item)
1076
1077    return copy
1078
1079
1080def node_copy(source):
1081    copy = {}
1082    for key, value in source.items():
1083        if isinstance(value, collections.Mapping):
1084            copy[key] = node_copy(value)
1085        elif isinstance(value, list):
1086            copy[key] = list_copy(value)
1087        elif isinstance(value, Provenance):
1088            copy[key] = value.clone()
1089        else:
1090            copy[key] = value
1091
1092    ensure_provenance(copy)
1093
1094    return copy
1095
1096
1097def list_copy(source):
1098    copy = []
1099    for item in source:
1100        if isinstance(item, collections.Mapping):
1101            copy.append(node_copy(item))
1102        elif isinstance(item, list):
1103            copy.append(list_copy(item))
1104        elif isinstance(item, Provenance):
1105            copy.append(item.clone())
1106        else:
1107            copy.append(item)
1108
1109    return copy
1110
1111
1112# node_final_assertions()
1113#
1114# This must be called on a fully loaded and composited node,
1115# after all composition has completed.
1116#
1117# Args:
1118#    node (Mapping): The final composited node
1119#
1120# Raises:
1121#    (LoadError): If any assertions fail
1122#
1123def node_final_assertions(node):
1124    for key, value in node_items(node):
1125
1126        # Assert that list composition directives dont remain, this
1127        # indicates that the user intended to override a list which
1128        # never existed in the underlying data
1129        #
1130        if key in ['(>)', '(<)', '(=)']:
1131            provenance = node_get_provenance(node, key)
1132            raise LoadError(LoadErrorReason.TRAILING_LIST_DIRECTIVE,
1133                            "{}: Attempt to override non-existing list".format(provenance))
1134
1135        if isinstance(value, collections.Mapping):
1136            node_final_assertions(value)
1137        elif isinstance(value, list):
1138            list_final_assertions(value)
1139
1140
1141def list_final_assertions(values):
1142    for value in values:
1143        if isinstance(value, collections.Mapping):
1144            node_final_assertions(value)
1145        elif isinstance(value, list):
1146            list_final_assertions(value)
1147
1148
1149# assert_symbol_name()
1150#
1151# A helper function to check if a loaded string is a valid symbol
1152# name and to raise a consistent LoadError if not. For strings which
1153# are required to be symbols.
1154#
1155# Args:
1156#    provenance (Provenance): The provenance of the loaded symbol, or None
1157#    symbol_name (str): The loaded symbol name
1158#    purpose (str): The purpose of the string, for an error message
1159#    allow_dashes (bool): Whether dashes are allowed for this symbol
1160#
1161# Raises:
1162#    LoadError: If the symbol_name is invalid
1163#
1164# Note that dashes are generally preferred for variable names and
1165# usage in YAML, but things such as option names which will be
1166# evaluated with jinja2 cannot use dashes.
1167#
1168def assert_symbol_name(provenance, symbol_name, purpose, *, allow_dashes=True):
1169    valid_chars = string.digits + string.ascii_letters + '_'
1170    if allow_dashes:
1171        valid_chars += '-'
1172
1173    valid = True
1174    if not symbol_name:
1175        valid = False
1176    elif any(x not in valid_chars for x in symbol_name):
1177        valid = False
1178    elif symbol_name[0] in string.digits:
1179        valid = False
1180
1181    if not valid:
1182        detail = "Symbol names must contain only alphanumeric characters, " + \
1183                 "may not start with a digit, and may contain underscores"
1184        if allow_dashes:
1185            detail += " or dashes"
1186
1187        message = "Invalid symbol name for {}: '{}'".format(purpose, symbol_name)
1188        if provenance is not None:
1189            message = "{}: {}".format(provenance, message)
1190
1191        raise LoadError(LoadErrorReason.INVALID_SYMBOL_NAME,
1192                        message, detail=detail)
1193