1#!/usr/bin/env python
2"""
3Usage: python vtk_reindent_code.py [--test] <file1> [<file2> ...]
4
5This script takes old-style "Whitesmiths" indented VTK source files as
6input, and re-indents the braces according to the new VTK style.
7Only the brace indentation is modified.
8
9If called with the --test option, then it will print an error message
10for each file that it would modify, but it will not actually modify the
11files.
12
13Written by David Gobbi on Sep 30, 2015.
14"""
15
16import sys
17import os
18import re
19
20def reindent(filename, dry_run=False):
21    """Reindent a file from Whitesmiths style to Allman style"""
22
23    # The first part of this function clears all strings and comments
24    # where non-grammatical braces might be hiding.  These changes will
25    # not be saved back to the file, they just simplify the parsing.
26
27    # look for ', ", /*, and //
28    keychar = re.compile(r"""[/"']""")
29    # comments of the form /* */
30    c_comment = re.compile(r"\/\*(\*(?!\/)|[^*])*\*\/")
31    c_comment_start = re.compile(r"\/\*(\*(?!\/)|[^*])*$")
32    c_comment_end = re.compile(r"^(\*(?!\/)|[^*])*\*\/")
33    # comments of the form //
34    cpp_comment = re.compile(r"\/\/.*")
35    # string literals ""
36    string_literal = re.compile(r'"([^\\"]|\\.)*"')
37    string_literal_start = re.compile(r'"([^\\"]|\\.)*\\$')
38    string_literal_end = re.compile(r'^([^\\"]|\\.)*"')
39    # character literals ''
40    char_literal = re.compile(r"'([^\\']|\\.)*'")
41    char_literal_start = re.compile(r"'([^\\']|\\.)*\\$")
42    char_literal_end = re.compile(r"^([^\\']|\\.)*'")
43
44    # read the file
45    try:
46        f = open(filename)
47        lines = f.readlines()
48        f.close()
49    except:
50        sys.stderr.write(filename + ": ")
51        sys.stderr.write(str(sys.exc_info()[1]) + "\n")
52        sys.exit(1)
53
54    # convert strings to "", char constants to '', and remove comments
55    n = len(lines) # 'lines' is the input
56    newlines = []  # 'newlines' is the output
57
58    cont = None    # set if e.g. we found /* and we are looking for */
59
60    for i in range(n):
61        line = lines[i].rstrip()
62
63        if cont is not None:
64            # look for closing ' or " or */
65            match = cont.match(line)
66            if match:
67                # found closing ' or " or */
68                line = line[match.end():]
69                cont = None
70            else:
71                # this whole line is in the middle of a string or comment
72                if cont is c_comment_end:
73                    # still looking for */, clear the whole line
74                    newlines.append("")
75                    continue
76                else:
77                    # still looking for ' or ", set line to backslash
78                    newlines.append('\\')
79                    continue
80
81        # start at column 0 and search for ', ", /*, or //
82        pos = 0
83        while True:
84            match = keychar.search(line, pos)
85            if match is None:
86                break
87            pos = match.start()
88            end = match.end()
89            # was the match /* ... */ ?
90            match = c_comment.match(line, pos)
91            if match:
92                line = line[0:pos] + " " + line[match.end():]
93                pos += 1
94                continue
95            # does the line have /* ... without the */ ?
96            match = c_comment_start.match(line, pos)
97            if match:
98                if line[-1] == '\\':
99                    line = line[0:pos] + ' \\'
100                else:
101                    line = line[0:pos]
102                cont = c_comment_end
103                break
104            # does the line have // ?
105            match = cpp_comment.match(line, pos)
106            if match:
107                if line[-1] == '\\':
108                    line = line[0:pos] + ' \\'
109                else:
110                    line = line[0:pos]
111                break
112            # did we find "..." ?
113            match = string_literal.match(line, pos)
114            if match:
115                line = line[0:pos] + "\"\"" + line[match.end():]
116                pos += 2
117                continue
118            # did we find "... without the final " ?
119            match = string_literal_start.match(line, pos)
120            if match:
121                line = line[0:pos] + "\"\"\\"
122                cont = string_literal_end
123                break
124            # did we find '...' ?
125            match = char_literal.match(line, pos)
126            if match:
127                line = line[0:pos] + "\' \'" + line[match.end():]
128                pos += 3
129                continue
130            # did we find '... without the final ' ?
131            match = char_literal_start.match(line, pos)
132            if match:
133                line = line[0:pos] + "\' \'\\"
134                cont = char_literal_end
135                break
136            # if we got to here, we found / that wasn't /* or //
137            pos += 1
138
139        # strip any trailing whitespace!
140        newlines.append(line.rstrip())
141
142    # The second part of this function looks for braces in the simplified
143    # code that we wrote to "newlines" after removing the contents of all
144    # string literals, character literals, and comments.
145
146    # Whenever we encounter an opening brace, we push its position onto a
147    # stack.  Whenever we encounter the matching closing brace, we indent
148    # the braces as a pair.
149
150    # For #if directives, we check whether there are mismatched braces
151    # within the conditional block, and if so, we print a warning and reset
152    # the stack to the depth that it had at the start of the block.
153
154    # For #define directives, we save the stack and then restart counting
155    # braces until the end of the #define.  Then we restore the stack.
156
157    # all changes go through this function
158    lines_changed = {} # keeps track of each line that was changed
159    def changeline(i, newtext, lines_changed=lines_changed):
160         if newtext != lines[i]:
161              lines[i] = newtext
162              lines_changed[i] = newtext
163
164    # we push a tuple (delim, row, col, newcol) onto this stack whenever
165    # we find a {, (, or [ delimiter, this keeps track of where we found
166    # the delimiter and what column we want to move it to
167    stack = []
168    lastdepth = 0
169
170    # this is a superstack that allows us to save the entire stack when we
171    # enter into an #if conditional block
172    dstack = []
173
174    # these are syntactic elements we need to look for
175    directive = re.compile(r"\s*#\s*(..)")
176    label = re.compile(r"""(case(?!\w)([^:]|::)+|\w+\s*(::\s*)*\s*:(?!:))""")
177    cflow = re.compile(r"(if|else|for|do|while|switch)(?!\w)")
178    delims = re.compile(r"[{}()\[\];]")
179    spaces = re.compile(r"\s*")
180    other = re.compile(r"(\w+|[^{}()\[\];\w\s]+)\s*")
181    cplusplus = re.compile(r"\s*#\s*ifdef\s+__cplusplus")
182
183    indentation = 0        # current indentation column
184    continuation = False   # true if line continues an unfinished statement
185    new_context = True     # also set when we enter a #define statement
186    in_else = False        # set if in an #else
187    in_define = False      # set if in #define
188    in_assign = False      # set to deal with "= {" or #define x {"
189    leaving_define = False # set if at the end of a #define
190    save_stack = None      # save stack when entering a #define
191
192    for i in range(n):
193        line = newlines[i]
194
195        # restore stack when leaving #define
196        if leaving_define:
197            stack, indentation, continuation = save_stack
198            save_stack = None
199            in_define = False
200            leaving_define = False
201
202        # handle #if conditionals
203        is_directive = False
204        in_else = False
205        match = directive.match(line)
206        if match:
207            is_directive = True
208            if match.groups()[0] == 'if':
209                dstack.append((list(stack), indentation, continuation,
210                               line))
211            elif match.groups()[0] in ('en', 'el'):
212                oldstack, oldindent, oldcont, dline = dstack.pop()
213                if len(stack) > len(oldstack) and not cplusplus.match(dline):
214                    sys.stderr.write(filename + ":" + str(i) + ": ")
215                    sys.stderr.write("mismatched delimiter in \"" +
216                                     dline + "\" block\n")
217                if match.groups()[0] == 'el':
218                    in_else = True
219                    indentation = oldindent
220                    continuation = oldcont
221                    stack = oldstack
222                    dstack.append((list(stack), indentation, continuation,
223                                  line))
224            elif match.groups()[0] == 'de':
225                in_define = True
226                leaving_define = False
227                save_stack = (stack, indentation, continuation)
228                stack = []
229                new_context = True
230
231        # remove backslash at end of line, if present
232        if len(line) > 0 and line[-1] == '\\':
233            line = line[0:-1].rstrip()
234        elif in_define:
235            leaving_define = True
236
237        if not is_directive and len(line) > 0 and not continuation:
238            # what is the indentation of the current line?
239            match = spaces.match(line)
240            if not line[match.end()] == '{':
241                indentation = match.end()
242                continuation = True
243
244        # new_context marks beginning of a file or a macro
245        if new_context:
246            continuation = False
247            indentation = 0
248            new_context = False
249
250        # skip initial whitespace
251        if is_directive:
252            pos = directive.match(line).end()
253        else:
254            pos = spaces.match(line).end()
255
256        # check for a label e.g. case
257        match = label.match(line, pos)
258        if match:
259            base = True
260            for item in stack:
261                if item[0] != '{':
262                    base = False
263            if base:
264                word = re.match(r"\w*", match.group())
265                if word in ("case", "default"):
266                    indentation = pos
267                continuation = False
268                # check for multiple labels on the same line
269                while match:
270                    pos = spaces.match(line, match.end()).end()
271                    match = label.match(line, pos)
272
273        # parse the line
274        while pos != len(line):
275            # check for if, else, for, while, do, switch
276            match = cflow.match(line, pos)
277            if match:
278                # if we are at the beginning of the line
279                if spaces.match(line).end() == pos:
280                    indentation = pos
281                pos = spaces.match(line, match.end()).end()
282                continue
283
284            # check for a delimiter {} () [] or ;
285            match = delims.match(line, pos)
286            if not match:
287                # check for any other identifiers, operators
288                match = other.match(line, pos)
289                if match:
290                    pos = match.end()
291                    continue
292                else:
293                    break
294
295            # found a delimiter
296            delim = line[pos]
297
298            if delim in ('(', '['):
299                # save delim, row, col, and current indentation
300                stack.append((delim, i, pos, indentation))
301            elif delim == '{':
302                if in_assign or line[0:pos-1].rstrip()[-1:] == "=":
303                    # do not adjust braces for initializer lists
304                    stack.append((delim, i, -1, indentation))
305                elif ((in_else or in_define) and spaces.sub("", line) == "{"):
306                    # for opening braces that might have no match
307                    indent = " "*indentation
308                    changeline(i, spaces.sub(indent, lines[i], count=1))
309                    stack.append((delim, i, pos, indentation))
310                else:
311                    # save delim, row, col, and previous indentation
312                    stack.append((delim, i, pos, indentation))
313                if spaces.sub("", newlines[i][0:pos]) == "":
314                    indentation += 2
315                continuation = False
316            elif delim == ';':
317                # ';' marks end of statement unless inside for (;;)
318                if len(stack) == 0 or stack[-1][0] == '{':
319                    continuation = False
320            else:
321                # found a ')', ']', or '}' delimiter, so pop its partner
322                try:
323                    ldelim, j, k, indentation = stack.pop()
324                    in_assign = (k < 0)
325                except IndexError:
326                    ldelim = ""
327                if ldelim != {'}':'{', ')':'(', ']':'['}[delim]:
328                    sys.stderr.write(filename + ":" + str(i) + ": ")
329                    sys.stderr.write("mismatched \'" + delim + "\'\n")
330                # adjust the indentation of matching '{', '}'
331                if (ldelim == '{' and delim == '}' and not in_assign and
332                      spaces.sub("", line[0:pos]) == ""):
333                    if spaces.sub("", newlines[j][0:k]) == "":
334                        indent = " "*indentation
335                        changeline(j, spaces.sub(indent, lines[j], count=1))
336                        changeline(i, spaces.sub(indent, lines[i], count=1))
337                    elif i != j:
338                        indent = " "*indentation
339                        changeline(i, spaces.sub(indent, lines[i], count=1))
340                if delim == '}':
341                    continuation = False
342
343            # eat whitespace and continue
344            pos = spaces.match(line, match.end()).end()
345
346        # check for " = " and #define assignments for the sake of
347        # the { inializer list } that might be on the following line
348        if len(line) > 0:
349            if (line[-1] == '=' or
350                (is_directive and in_define and not leaving_define)):
351                in_assign = True
352            elif not is_directive:
353                in_assign = False
354
355    if len(dstack) != 0:
356        sys.stderr.write(filename + ": ")
357        sys.stderr.write("mismatched #if conditional.\n")
358
359    if len(stack) != 0:
360        sys.stderr.write(filename + ":" + str(stack[0][1]) + ": ")
361        sys.stderr.write("no match for " + stack[0][0] +
362                         " before end of file.\n")
363
364    if lines_changed:
365        # remove any trailing whitespace
366        trailing = re.compile(r" *$")
367        for i in range(n):
368            lines[i] = trailing.sub("", lines[i])
369        while n > 0 and lines[n-1].rstrip() == "":
370            n -= 1
371        if dry_run:
372            errcount = len(lines_changed)
373            line_numbers = list(lines_changed.keys())
374            line_numbers.sort()
375            line_numbers = [str(l + 1) for l in line_numbers[0:10] ]
376            if errcount > len(line_numbers):
377                line_numbers.append("...")
378            sys.stderr.write("Warning: " + filename +
379                             ": incorrect brace indentation on " +
380                             str(errcount) +
381                             (" lines: ", "line: ")[errcount == 1] +
382                             ", ".join(line_numbers) + "\n")
383        else:
384            # rewrite the file
385            ofile = open(filename, 'w')
386            ofile.writelines(lines)
387            ofile.close()
388        return True
389
390    return False
391
392
393if __name__ == "__main__":
394
395    # ignore generated files
396    ignorefiles = ["lex.yy.c", "vtkParse.tab.c"]
397
398    files = []
399    opt_ignore = False # ignore all further options
400    opt_test = False # the --test option
401
402    for arg in sys.argv[1:]:
403        if arg[0:1] == '-' and not opt_ignore:
404            if arg == '--':
405                opt_ignore = True
406            elif arg == '--test':
407                opt_test = True
408            else:
409                sys.stderr.write("%s: unrecognized option %s\n" %
410                                 (os.path.split(sys.argv[0])[-1], arg))
411                sys.exit(1)
412        elif os.path.split(arg)[-1] not in ignorefiles:
413            files.append(arg)
414
415    # if --test was set, whenever a file needs modification, we set
416    # "failed" and continue checking the rest of the files
417    failed = False
418
419    for filename in files:
420        # repeat until no further changes occur
421        while reindent(filename, dry_run=opt_test):
422            if opt_test:
423                failed = True
424                break
425
426    if failed:
427        sys.exit(1)
428