1# -*- coding: utf-8 -*-
2# Copyright (C) 2016 Adrien Vergé
3#
4# This program is free software: you can redistribute it and/or modify
5# it under the terms of the GNU General Public License as published by
6# the Free Software Foundation, either version 3 of the License, or
7# (at your option) any later version.
8#
9# This program is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12# GNU General Public License for more details.
13#
14# You should have received a copy of the GNU General Public License
15# along with this program.  If not, see <http://www.gnu.org/licenses/>.
16
17"""
18Use this rule to control the indentation.
19
20.. rubric:: Options
21
22* ``spaces`` defines the indentation width, in spaces. Set either to an integer
23  (e.g. ``2`` or ``4``, representing the number of spaces in an indentation
24  level) or to ``consistent`` to allow any number, as long as it remains the
25  same within the file.
26* ``indent-sequences`` defines whether block sequences should be indented or
27  not (when in a mapping, this indentation is not mandatory -- some people
28  perceive the ``-`` as part of the indentation). Possible values: ``true``,
29  ``false``, ``whatever`` and ``consistent``. ``consistent`` requires either
30  all block sequences to be indented, or none to be. ``whatever`` means either
31  indenting or not indenting individual block sequences is OK.
32* ``check-multi-line-strings`` defines whether to lint indentation in
33  multi-line strings. Set to ``true`` to enable, ``false`` to disable.
34
35.. rubric:: Examples
36
37#. With ``indentation: {spaces: 1}``
38
39   the following code snippet would **PASS**:
40   ::
41
42    history:
43     - name: Unix
44       date: 1969
45     - name: Linux
46       date: 1991
47    nest:
48     recurse:
49      - haystack:
50         needle
51
52#. With ``indentation: {spaces: 4}``
53
54   the following code snippet would **PASS**:
55   ::
56
57    history:
58        - name: Unix
59          date: 1969
60        - name: Linux
61          date: 1991
62    nest:
63        recurse:
64            - haystack:
65                  needle
66
67   the following code snippet would **FAIL**:
68   ::
69
70    history:
71      - name: Unix
72        date: 1969
73      - name: Linux
74        date: 1991
75    nest:
76      recurse:
77        - haystack:
78            needle
79
80#. With ``indentation: {spaces: consistent}``
81
82   the following code snippet would **PASS**:
83   ::
84
85    history:
86       - name: Unix
87         date: 1969
88       - name: Linux
89         date: 1991
90    nest:
91       recurse:
92          - haystack:
93               needle
94
95   the following code snippet would **FAIL**:
96   ::
97
98    some:
99      Russian:
100          dolls
101
102#. With ``indentation: {spaces: 2, indent-sequences: false}``
103
104   the following code snippet would **PASS**:
105   ::
106
107    list:
108    - flying
109    - spaghetti
110    - monster
111
112   the following code snippet would **FAIL**:
113   ::
114
115    list:
116      - flying
117      - spaghetti
118      - monster
119
120#. With ``indentation: {spaces: 2, indent-sequences: whatever}``
121
122   the following code snippet would **PASS**:
123   ::
124
125    list:
126    - flying:
127      - spaghetti
128      - monster
129    - not flying:
130        - spaghetti
131        - sauce
132
133#. With ``indentation: {spaces: 2, indent-sequences: consistent}``
134
135   the following code snippet would **PASS**:
136   ::
137
138    - flying:
139      - spaghetti
140      - monster
141    - not flying:
142      - spaghetti
143      - sauce
144
145   the following code snippet would **FAIL**:
146   ::
147
148    - flying:
149        - spaghetti
150        - monster
151    - not flying:
152      - spaghetti
153      - sauce
154
155#. With ``indentation: {spaces: 4, check-multi-line-strings: true}``
156
157   the following code snippet would **PASS**:
158   ::
159
160    Blaise Pascal:
161        Je vous écris une longue lettre parce que
162        je n'ai pas le temps d'en écrire une courte.
163
164   the following code snippet would **PASS**:
165   ::
166
167    Blaise Pascal: Je vous écris une longue lettre parce que
168                   je n'ai pas le temps d'en écrire une courte.
169
170   the following code snippet would **FAIL**:
171   ::
172
173    Blaise Pascal: Je vous écris une longue lettre parce que
174      je n'ai pas le temps d'en écrire une courte.
175
176   the following code snippet would **FAIL**:
177   ::
178
179    C code:
180        void main() {
181            printf("foo");
182        }
183
184   the following code snippet would **PASS**:
185   ::
186
187    C code:
188        void main() {
189        printf("bar");
190        }
191"""
192
193import yaml
194
195from yamllint.linter import LintProblem
196from yamllint.rules.common import get_real_end_line, is_explicit_key
197
198
199ID = 'indentation'
200TYPE = 'token'
201CONF = {'spaces': (int, 'consistent'),
202        'indent-sequences': (bool, 'whatever', 'consistent'),
203        'check-multi-line-strings': bool}
204DEFAULT = {'spaces': 'consistent',
205           'indent-sequences': True,
206           'check-multi-line-strings': False}
207
208ROOT, B_MAP, F_MAP, B_SEQ, F_SEQ, B_ENT, KEY, VAL = range(8)
209labels = ('ROOT', 'B_MAP', 'F_MAP', 'B_SEQ', 'F_SEQ', 'B_ENT', 'KEY', 'VAL')
210
211
212class Parent(object):
213    def __init__(self, type, indent, line_indent=None):
214        self.type = type
215        self.indent = indent
216        self.line_indent = line_indent
217        self.explicit_key = False
218        self.implicit_block_seq = False
219
220    def __repr__(self):
221        return '%s:%d' % (labels[self.type], self.indent)
222
223
224def check_scalar_indentation(conf, token, context):
225    if token.start_mark.line == token.end_mark.line:
226        return
227
228    def compute_expected_indent(found_indent):
229        def detect_indent(base_indent):
230            if not isinstance(context['spaces'], int):
231                context['spaces'] = found_indent - base_indent
232            return base_indent + context['spaces']
233
234        if token.plain:
235            return token.start_mark.column
236        elif token.style in ('"', "'"):
237            return token.start_mark.column + 1
238        elif token.style in ('>', '|'):
239            if context['stack'][-1].type == B_ENT:
240                # - >
241                #     multi
242                #     line
243                return detect_indent(token.start_mark.column)
244            elif context['stack'][-1].type == KEY:
245                assert context['stack'][-1].explicit_key
246                # - ? >
247                #       multi-line
248                #       key
249                #   : >
250                #       multi-line
251                #       value
252                return detect_indent(token.start_mark.column)
253            elif context['stack'][-1].type == VAL:
254                if token.start_mark.line + 1 > context['cur_line']:
255                    # - key:
256                    #     >
257                    #       multi
258                    #       line
259                    return detect_indent(context['stack'][-1].indent)
260                elif context['stack'][-2].explicit_key:
261                    # - ? key
262                    #   : >
263                    #       multi-line
264                    #       value
265                    return detect_indent(token.start_mark.column)
266                else:
267                    # - key: >
268                    #     multi
269                    #     line
270                    return detect_indent(context['stack'][-2].indent)
271            else:
272                return detect_indent(context['stack'][-1].indent)
273
274    expected_indent = None
275
276    line_no = token.start_mark.line + 1
277
278    line_start = token.start_mark.pointer
279    while True:
280        line_start = token.start_mark.buffer.find(
281            '\n', line_start, token.end_mark.pointer - 1) + 1
282        if line_start == 0:
283            break
284        line_no += 1
285
286        indent = 0
287        while token.start_mark.buffer[line_start + indent] == ' ':
288            indent += 1
289        if token.start_mark.buffer[line_start + indent] == '\n':
290            continue
291
292        if expected_indent is None:
293            expected_indent = compute_expected_indent(indent)
294
295        if indent != expected_indent:
296            yield LintProblem(line_no, indent + 1,
297                              'wrong indentation: expected %d but found %d' %
298                              (expected_indent, indent))
299
300
301def _check(conf, token, prev, next, nextnext, context):
302    if 'stack' not in context:
303        context['stack'] = [Parent(ROOT, 0)]
304        context['cur_line'] = -1
305        context['spaces'] = conf['spaces']
306        context['indent-sequences'] = conf['indent-sequences']
307
308    # Step 1: Lint
309
310    is_visible = (
311        not isinstance(token, (yaml.StreamStartToken, yaml.StreamEndToken)) and
312        not isinstance(token, yaml.BlockEndToken) and
313        not (isinstance(token, yaml.ScalarToken) and token.value == ''))
314    first_in_line = (is_visible and
315                     token.start_mark.line + 1 > context['cur_line'])
316
317    def detect_indent(base_indent, next):
318        if not isinstance(context['spaces'], int):
319            context['spaces'] = next.start_mark.column - base_indent
320        return base_indent + context['spaces']
321
322    if first_in_line:
323        found_indentation = token.start_mark.column
324        expected = context['stack'][-1].indent
325
326        if isinstance(token, (yaml.FlowMappingEndToken,
327                              yaml.FlowSequenceEndToken)):
328            expected = context['stack'][-1].line_indent
329        elif (context['stack'][-1].type == KEY and
330                context['stack'][-1].explicit_key and
331                not isinstance(token, yaml.ValueToken)):
332            expected = detect_indent(expected, token)
333
334        if found_indentation != expected:
335            yield LintProblem(token.start_mark.line + 1, found_indentation + 1,
336                              'wrong indentation: expected %d but found %d' %
337                              (expected, found_indentation))
338
339    if (isinstance(token, yaml.ScalarToken) and
340            conf['check-multi-line-strings']):
341        for problem in check_scalar_indentation(conf, token, context):
342            yield problem
343
344    # Step 2.a:
345
346    if is_visible:
347        context['cur_line'] = get_real_end_line(token)
348        if first_in_line:
349            context['cur_line_indent'] = found_indentation
350
351    # Step 2.b: Update state
352
353    if isinstance(token, yaml.BlockMappingStartToken):
354        #   - a: 1
355        # or
356        #   - ? a
357        #     : 1
358        # or
359        #   - ?
360        #       a
361        #     : 1
362        assert isinstance(next, yaml.KeyToken)
363        assert next.start_mark.line == token.start_mark.line
364
365        indent = token.start_mark.column
366
367        context['stack'].append(Parent(B_MAP, indent))
368
369    elif isinstance(token, yaml.FlowMappingStartToken):
370        if next.start_mark.line == token.start_mark.line:
371            #   - {a: 1, b: 2}
372            indent = next.start_mark.column
373        else:
374            #   - {
375            #     a: 1, b: 2
376            #   }
377            indent = detect_indent(context['cur_line_indent'], next)
378
379        context['stack'].append(Parent(F_MAP, indent,
380                                       line_indent=context['cur_line_indent']))
381
382    elif isinstance(token, yaml.BlockSequenceStartToken):
383        #   - - a
384        #     - b
385        assert isinstance(next, yaml.BlockEntryToken)
386        assert next.start_mark.line == token.start_mark.line
387
388        indent = token.start_mark.column
389
390        context['stack'].append(Parent(B_SEQ, indent))
391
392    elif (isinstance(token, yaml.BlockEntryToken) and
393            # in case of an empty entry
394            not isinstance(next, (yaml.BlockEntryToken, yaml.BlockEndToken))):
395        # It looks like pyyaml doesn't issue BlockSequenceStartTokens when the
396        # list is not indented. We need to compensate that.
397        if context['stack'][-1].type != B_SEQ:
398            context['stack'].append(Parent(B_SEQ, token.start_mark.column))
399            context['stack'][-1].implicit_block_seq = True
400
401        if next.start_mark.line == token.end_mark.line:
402            #   - item 1
403            #   - item 2
404            indent = next.start_mark.column
405        elif next.start_mark.column == token.start_mark.column:
406            #   -
407            #   key: value
408            indent = next.start_mark.column
409        else:
410            #   -
411            #     item 1
412            #   -
413            #     key:
414            #       value
415            indent = detect_indent(token.start_mark.column, next)
416
417        context['stack'].append(Parent(B_ENT, indent))
418
419    elif isinstance(token, yaml.FlowSequenceStartToken):
420        if next.start_mark.line == token.start_mark.line:
421            #   - [a, b]
422            indent = next.start_mark.column
423        else:
424            #   - [
425            #   a, b
426            # ]
427            indent = detect_indent(context['cur_line_indent'], next)
428
429        context['stack'].append(Parent(F_SEQ, indent,
430                                       line_indent=context['cur_line_indent']))
431
432    elif isinstance(token, yaml.KeyToken):
433        indent = context['stack'][-1].indent
434
435        context['stack'].append(Parent(KEY, indent))
436
437        context['stack'][-1].explicit_key = is_explicit_key(token)
438
439    elif isinstance(token, yaml.ValueToken):
440        assert context['stack'][-1].type == KEY
441
442        # Special cases:
443        #     key: &anchor
444        #       value
445        # and:
446        #     key: !!tag
447        #       value
448        if isinstance(next, (yaml.AnchorToken, yaml.TagToken)):
449            if (next.start_mark.line == prev.start_mark.line and
450                    next.start_mark.line < nextnext.start_mark.line):
451                next = nextnext
452
453        # Only if value is not empty
454        if not isinstance(next, (yaml.BlockEndToken,
455                                 yaml.FlowMappingEndToken,
456                                 yaml.FlowSequenceEndToken,
457                                 yaml.KeyToken)):
458            if context['stack'][-1].explicit_key:
459                #   ? k
460                #   : value
461                # or
462                #   ? k
463                #   :
464                #     value
465                indent = detect_indent(context['stack'][-1].indent, next)
466            elif next.start_mark.line == prev.start_mark.line:
467                #   k: value
468                indent = next.start_mark.column
469            elif isinstance(next, (yaml.BlockSequenceStartToken,
470                                   yaml.BlockEntryToken)):
471                # NOTE: We add BlockEntryToken in the test above because
472                # sometimes BlockSequenceStartToken are not issued. Try
473                # yaml.scan()ning this:
474                #     '- lib:\n'
475                #     '  - var\n'
476                if context['indent-sequences'] is False:
477                    indent = context['stack'][-1].indent
478                elif context['indent-sequences'] is True:
479                    if (context['spaces'] == 'consistent' and
480                            next.start_mark.column -
481                            context['stack'][-1].indent == 0):
482                        # In this case, the block sequence item is not indented
483                        # (while it should be), but we don't know yet the
484                        # indentation it should have (because `spaces` is
485                        # `consistent` and its value has not been computed yet
486                        # -- this is probably the beginning of the document).
487                        # So we choose an arbitrary value (2).
488                        indent = 2
489                    else:
490                        indent = detect_indent(context['stack'][-1].indent,
491                                               next)
492                else:  # 'whatever' or 'consistent'
493                    if next.start_mark.column == context['stack'][-1].indent:
494                        #   key:
495                        #   - e1
496                        #   - e2
497                        if context['indent-sequences'] == 'consistent':
498                            context['indent-sequences'] = False
499                        indent = context['stack'][-1].indent
500                    else:
501                        if context['indent-sequences'] == 'consistent':
502                            context['indent-sequences'] = True
503                        #   key:
504                        #     - e1
505                        #     - e2
506                        indent = detect_indent(context['stack'][-1].indent,
507                                               next)
508            else:
509                #   k:
510                #     value
511                indent = detect_indent(context['stack'][-1].indent, next)
512
513            context['stack'].append(Parent(VAL, indent))
514
515    consumed_current_token = False
516    while True:
517        if (context['stack'][-1].type == F_SEQ and
518                isinstance(token, yaml.FlowSequenceEndToken) and
519                not consumed_current_token):
520            context['stack'].pop()
521            consumed_current_token = True
522
523        elif (context['stack'][-1].type == F_MAP and
524                isinstance(token, yaml.FlowMappingEndToken) and
525                not consumed_current_token):
526            context['stack'].pop()
527            consumed_current_token = True
528
529        elif (context['stack'][-1].type in (B_MAP, B_SEQ) and
530                isinstance(token, yaml.BlockEndToken) and
531                not context['stack'][-1].implicit_block_seq and
532                not consumed_current_token):
533            context['stack'].pop()
534            consumed_current_token = True
535
536        elif (context['stack'][-1].type == B_ENT and
537                not isinstance(token, yaml.BlockEntryToken) and
538                context['stack'][-2].implicit_block_seq and
539                not isinstance(token, (yaml.AnchorToken, yaml.TagToken)) and
540                not isinstance(next, yaml.BlockEntryToken)):
541            context['stack'].pop()
542            context['stack'].pop()
543
544        elif (context['stack'][-1].type == B_ENT and
545                isinstance(next, (yaml.BlockEntryToken, yaml.BlockEndToken))):
546            context['stack'].pop()
547
548        elif (context['stack'][-1].type == VAL and
549                not isinstance(token, yaml.ValueToken) and
550                not isinstance(token, (yaml.AnchorToken, yaml.TagToken))):
551            assert context['stack'][-2].type == KEY
552            context['stack'].pop()
553            context['stack'].pop()
554
555        elif (context['stack'][-1].type == KEY and
556                isinstance(next, (yaml.BlockEndToken,
557                                  yaml.FlowMappingEndToken,
558                                  yaml.FlowSequenceEndToken,
559                                  yaml.KeyToken))):
560            # A key without a value: it's part of a set. Let's drop this key
561            # and leave room for the next one.
562            context['stack'].pop()
563
564        else:
565            break
566
567
568def check(conf, token, prev, next, nextnext, context):
569    try:
570        for problem in _check(conf, token, prev, next, nextnext, context):
571            yield problem
572    except AssertionError:
573        yield LintProblem(token.start_mark.line + 1,
574                          token.start_mark.column + 1,
575                          'cannot infer indentation: unexpected token')
576