1# Copyright (c) 2017 Ansible Project
2# GNU General Public License v3.0+ (see COPYING or https://www.gnu.org/licenses/gpl-3.0.txt)
3from __future__ import (absolute_import, division, print_function)
4__metaclass__ = type
5
6DOCUMENTATION = '''
7    inventory: ini
8    version_added: "2.4"
9    short_description: Uses an Ansible INI file as inventory source.
10    description:
11        - INI file based inventory, sections are groups or group related with special `:modifiers`.
12        - Entries in sections C([group_1]) are hosts, members of the group.
13        - Hosts can have variables defined inline as key/value pairs separated by C(=).
14        - The C(children) modifier indicates that the section contains groups.
15        - The C(vars) modifier indicates that the section contains variables assigned to members of the group.
16        - Anything found outside a section is considered an 'ungrouped' host.
17        - Values passed in the INI format using the ``key=value`` syntax are interpreted differently depending on where they are declared within your inventory.
18        - When declared inline with the host, INI values are processed by Python's ast.literal_eval function
19          (U(https://docs.python.org/2/library/ast.html#ast.literal_eval)) and interpreted as Python literal structures
20          (strings, numbers, tuples, lists, dicts, booleans, None). Host lines accept multiple C(key=value) parameters per line.
21          Therefore they need a way to indicate that a space is part of a value rather than a separator.
22        - When declared in a C(:vars) section, INI values are interpreted as strings. For example C(var=FALSE) would create a string equal to C(FALSE).
23          Unlike host lines, C(:vars) sections accept only a single entry per line, so everything after the C(=) must be the value for the entry.
24        - Do not rely on types set during definition, always make sure you specify type with a filter when needed when consuming the variable.
25        - See the Examples for proper quoting to prevent changes to variable type.
26    notes:
27        - Whitelisted in configuration by default.
28        - Consider switching to YAML format for inventory sources to avoid confusion on the actual type of a variable.
29          The YAML inventory plugin processes variable values consistently and correctly.
30'''
31
32EXAMPLES = '''
33  example1: |
34      # example cfg file
35      [web]
36      host1
37      host2 ansible_port=222 # defined inline, interpreted as an integer
38
39      [web:vars]
40      http_port=8080 # all members of 'web' will inherit these
41      myvar=23 # defined in a :vars section, interpreted as a string
42
43      [web:children] # child groups will automatically add their hosts to parent group
44      apache
45      nginx
46
47      [apache]
48      tomcat1
49      tomcat2 myvar=34 # host specific vars override group vars
50      tomcat3 mysecret="'03#pa33w0rd'" # proper quoting to prevent value changes
51
52      [nginx]
53      jenkins1
54
55      [nginx:vars]
56      has_java = True # vars in child groups override same in parent
57
58      [all:vars]
59      has_java = False # 'all' is 'top' parent
60
61  example2: |
62      # other example config
63      host1 # this is 'ungrouped'
64
65      # both hosts have same IP but diff ports, also 'ungrouped'
66      host2 ansible_host=127.0.0.1 ansible_port=44
67      host3 ansible_host=127.0.0.1 ansible_port=45
68
69      [g1]
70      host4
71
72      [g2]
73      host4 # same host as above, but member of 2 groups, will inherit vars from both
74            # inventory hostnames are unique
75'''
76
77import ast
78import re
79
80from ansible.inventory.group import to_safe_group_name
81from ansible.plugins.inventory import BaseFileInventoryPlugin
82
83from ansible.errors import AnsibleError, AnsibleParserError
84from ansible.module_utils._text import to_bytes, to_text
85from ansible.utils.shlex import shlex_split
86
87
88class InventoryModule(BaseFileInventoryPlugin):
89    """
90    Takes an INI-format inventory file and builds a list of groups and subgroups
91    with their associated hosts and variable settings.
92    """
93    NAME = 'ini'
94    _COMMENT_MARKERS = frozenset((u';', u'#'))
95    b_COMMENT_MARKERS = frozenset((b';', b'#'))
96
97    def __init__(self):
98
99        super(InventoryModule, self).__init__()
100
101        self.patterns = {}
102        self._filename = None
103
104    def parse(self, inventory, loader, path, cache=True):
105
106        super(InventoryModule, self).parse(inventory, loader, path)
107
108        self._filename = path
109
110        try:
111            # Read in the hosts, groups, and variables defined in the inventory file.
112            if self.loader:
113                (b_data, private) = self.loader._get_file_contents(path)
114            else:
115                b_path = to_bytes(path, errors='surrogate_or_strict')
116                with open(b_path, 'rb') as fh:
117                    b_data = fh.read()
118
119            try:
120                # Faster to do to_text once on a long string than many
121                # times on smaller strings
122                data = to_text(b_data, errors='surrogate_or_strict').splitlines()
123            except UnicodeError:
124                # Handle non-utf8 in comment lines: https://github.com/ansible/ansible/issues/17593
125                data = []
126                for line in b_data.splitlines():
127                    if line and line[0] in self.b_COMMENT_MARKERS:
128                        # Replace is okay for comment lines
129                        # data.append(to_text(line, errors='surrogate_then_replace'))
130                        # Currently we only need these lines for accurate lineno in errors
131                        data.append(u'')
132                    else:
133                        # Non-comment lines still have to be valid uf-8
134                        data.append(to_text(line, errors='surrogate_or_strict'))
135
136            self._parse(path, data)
137        except Exception as e:
138            raise AnsibleParserError(e)
139
140    def _raise_error(self, message):
141        raise AnsibleError("%s:%d: " % (self._filename, self.lineno) + message)
142
143    def _parse(self, path, lines):
144        '''
145        Populates self.groups from the given array of lines. Raises an error on
146        any parse failure.
147        '''
148
149        self._compile_patterns()
150
151        # We behave as though the first line of the inventory is '[ungrouped]',
152        # and begin to look for host definitions. We make a single pass through
153        # each line of the inventory, building up self.groups and adding hosts,
154        # subgroups, and setting variables as we go.
155
156        pending_declarations = {}
157        groupname = 'ungrouped'
158        state = 'hosts'
159        self.lineno = 0
160        for line in lines:
161            self.lineno += 1
162
163            line = line.strip()
164            # Skip empty lines and comments
165            if not line or line[0] in self._COMMENT_MARKERS:
166                continue
167
168            # Is this a [section] header? That tells us what group we're parsing
169            # definitions for, and what kind of definitions to expect.
170
171            m = self.patterns['section'].match(line)
172            if m:
173                (groupname, state) = m.groups()
174
175                groupname = to_safe_group_name(groupname)
176
177                state = state or 'hosts'
178                if state not in ['hosts', 'children', 'vars']:
179                    title = ":".join(m.groups())
180                    self._raise_error("Section [%s] has unknown type: %s" % (title, state))
181
182                # If we haven't seen this group before, we add a new Group.
183                if groupname not in self.inventory.groups:
184                    # Either [groupname] or [groupname:children] is sufficient to declare a group,
185                    # but [groupname:vars] is allowed only if the # group is declared elsewhere.
186                    # We add the group anyway, but make a note in pending_declarations to check at the end.
187                    #
188                    # It's possible that a group is previously pending due to being defined as a child
189                    # group, in that case we simply pass so that the logic below to process pending
190                    # declarations will take the appropriate action for a pending child group instead of
191                    # incorrectly handling it as a var state pending declaration
192                    if state == 'vars' and groupname not in pending_declarations:
193                        pending_declarations[groupname] = dict(line=self.lineno, state=state, name=groupname)
194
195                    self.inventory.add_group(groupname)
196
197                # When we see a declaration that we've been waiting for, we process and delete.
198                if groupname in pending_declarations and state != 'vars':
199                    if pending_declarations[groupname]['state'] == 'children':
200                        self._add_pending_children(groupname, pending_declarations)
201                    elif pending_declarations[groupname]['state'] == 'vars':
202                        del pending_declarations[groupname]
203
204                continue
205            elif line.startswith('[') and line.endswith(']'):
206                self._raise_error("Invalid section entry: '%s'. Please make sure that there are no spaces" % line +
207                                  "in the section entry, and that there are no other invalid characters")
208
209            # It's not a section, so the current state tells us what kind of
210            # definition it must be. The individual parsers will raise an
211            # error if we feed them something they can't digest.
212
213            # [groupname] contains host definitions that must be added to
214            # the current group.
215            if state == 'hosts':
216                hosts, port, variables = self._parse_host_definition(line)
217                self._populate_host_vars(hosts, variables, groupname, port)
218
219            # [groupname:vars] contains variable definitions that must be
220            # applied to the current group.
221            elif state == 'vars':
222                (k, v) = self._parse_variable_definition(line)
223                self.inventory.set_variable(groupname, k, v)
224
225            # [groupname:children] contains subgroup names that must be
226            # added as children of the current group. The subgroup names
227            # must themselves be declared as groups, but as before, they
228            # may only be declared later.
229            elif state == 'children':
230                child = self._parse_group_name(line)
231                if child not in self.inventory.groups:
232                    if child not in pending_declarations:
233                        pending_declarations[child] = dict(line=self.lineno, state=state, name=child, parents=[groupname])
234                    else:
235                        pending_declarations[child]['parents'].append(groupname)
236                else:
237                    self.inventory.add_child(groupname, child)
238            else:
239                # This can happen only if the state checker accepts a state that isn't handled above.
240                self._raise_error("Entered unhandled state: %s" % (state))
241
242        # Any entries in pending_declarations not removed by a group declaration above mean that there was an unresolved reference.
243        # We report only the first such error here.
244        for g in pending_declarations:
245            decl = pending_declarations[g]
246            if decl['state'] == 'vars':
247                raise AnsibleError("%s:%d: Section [%s:vars] not valid for undefined group: %s" % (path, decl['line'], decl['name'], decl['name']))
248            elif decl['state'] == 'children':
249                raise AnsibleError("%s:%d: Section [%s:children] includes undefined group: %s" % (path, decl['line'], decl['parents'].pop(), decl['name']))
250
251    def _add_pending_children(self, group, pending):
252        for parent in pending[group]['parents']:
253            self.inventory.add_child(parent, group)
254            if parent in pending and pending[parent]['state'] == 'children':
255                self._add_pending_children(parent, pending)
256        del pending[group]
257
258    def _parse_group_name(self, line):
259        '''
260        Takes a single line and tries to parse it as a group name. Returns the
261        group name if successful, or raises an error.
262        '''
263
264        m = self.patterns['groupname'].match(line)
265        if m:
266            return m.group(1)
267
268        self._raise_error("Expected group name, got: %s" % (line))
269
270    def _parse_variable_definition(self, line):
271        '''
272        Takes a string and tries to parse it as a variable definition. Returns
273        the key and value if successful, or raises an error.
274        '''
275
276        # TODO: We parse variable assignments as a key (anything to the left of
277        # an '='"), an '=', and a value (anything left) and leave the value to
278        # _parse_value to sort out. We should be more systematic here about
279        # defining what is acceptable, how quotes work, and so on.
280
281        if '=' in line:
282            (k, v) = [e.strip() for e in line.split("=", 1)]
283            return (k, self._parse_value(v))
284
285        self._raise_error("Expected key=value, got: %s" % (line))
286
287    def _parse_host_definition(self, line):
288        '''
289        Takes a single line and tries to parse it as a host definition. Returns
290        a list of Hosts if successful, or raises an error.
291        '''
292
293        # A host definition comprises (1) a non-whitespace hostname or range,
294        # optionally followed by (2) a series of key="some value" assignments.
295        # We ignore any trailing whitespace and/or comments. For example, here
296        # are a series of host definitions in a group:
297        #
298        # [groupname]
299        # alpha
300        # beta:2345 user=admin      # we'll tell shlex
301        # gamma sudo=True user=root # to ignore comments
302
303        try:
304            tokens = shlex_split(line, comments=True)
305        except ValueError as e:
306            self._raise_error("Error parsing host definition '%s': %s" % (line, e))
307
308        (hostnames, port) = self._expand_hostpattern(tokens[0])
309
310        # Try to process anything remaining as a series of key=value pairs.
311        variables = {}
312        for t in tokens[1:]:
313            if '=' not in t:
314                self._raise_error("Expected key=value host variable assignment, got: %s" % (t))
315            (k, v) = t.split('=', 1)
316            variables[k] = self._parse_value(v)
317
318        return hostnames, port, variables
319
320    def _expand_hostpattern(self, hostpattern):
321        '''
322        do some extra checks over normal processing
323        '''
324        # specification?
325
326        hostnames, port = super(InventoryModule, self)._expand_hostpattern(hostpattern)
327
328        if hostpattern.strip().endswith(':') and port is None:
329            raise AnsibleParserError("Invalid host pattern '%s' supplied, ending in ':' is not allowed, this character is reserved to provide a port." %
330                                     hostpattern)
331        for pattern in hostnames:
332            # some YAML parsing prevention checks
333            if pattern.strip() == '---':
334                raise AnsibleParserError("Invalid host pattern '%s' supplied, '---' is normally a sign this is a YAML file." % hostpattern)
335
336        return (hostnames, port)
337
338    @staticmethod
339    def _parse_value(v):
340        '''
341        Attempt to transform the string value from an ini file into a basic python object
342        (int, dict, list, unicode string, etc).
343        '''
344        try:
345            v = ast.literal_eval(v)
346        # Using explicit exceptions.
347        # Likely a string that literal_eval does not like. We wil then just set it.
348        except ValueError:
349            # For some reason this was thought to be malformed.
350            pass
351        except SyntaxError:
352            # Is this a hash with an equals at the end?
353            pass
354        return to_text(v, nonstring='passthru', errors='surrogate_or_strict')
355
356    def _compile_patterns(self):
357        '''
358        Compiles the regular expressions required to parse the inventory and
359        stores them in self.patterns.
360        '''
361
362        # Section names are square-bracketed expressions at the beginning of a
363        # line, comprising (1) a group name optionally followed by (2) a tag
364        # that specifies the contents of the section. We ignore any trailing
365        # whitespace and/or comments. For example:
366        #
367        # [groupname]
368        # [somegroup:vars]
369        # [naughty:children] # only get coal in their stockings
370
371        self.patterns['section'] = re.compile(
372            to_text(r'''^\[
373                    ([^:\]\s]+)             # group name (see groupname below)
374                    (?::(\w+))?             # optional : and tag name
375                \]
376                \s*                         # ignore trailing whitespace
377                (?:\#.*)?                   # and/or a comment till the
378                $                           # end of the line
379            ''', errors='surrogate_or_strict'), re.X
380        )
381
382        # FIXME: What are the real restrictions on group names, or rather, what
383        # should they be? At the moment, they must be non-empty sequences of non
384        # whitespace characters excluding ':' and ']', but we should define more
385        # precise rules in order to support better diagnostics.
386
387        self.patterns['groupname'] = re.compile(
388            to_text(r'''^
389                ([^:\]\s]+)
390                \s*                         # ignore trailing whitespace
391                (?:\#.*)?                   # and/or a comment till the
392                $                           # end of the line
393            ''', errors='surrogate_or_strict'), re.X
394        )
395