1#!/usr/bin/env python 2""" 3Usage: python vtk_reindent_code.py [--test] <file1> [<file2> ...] 4 5This script takes old-style "Whitesmiths" indented VTK source files as 6input, and re-indents the braces according to the new VTK style. 7Only the brace indentation is modified. 8 9If called with the --test option, then it will print an error message 10for each file that it would modify, but it will not actually modify the 11files. 12 13Written by David Gobbi on Sep 30, 2015. 14""" 15 16import sys 17import os 18import re 19 20def reindent(filename, dry_run=False): 21 """Reindent a file from Whitesmiths style to Allman style""" 22 23 # The first part of this function clears all strings and comments 24 # where non-grammatical braces might be hiding. These changes will 25 # not be saved back to the file, they just simplify the parsing. 26 27 # look for ', ", /*, and // 28 keychar = re.compile(r"""[/"']""") 29 # comments of the form /* */ 30 c_comment = re.compile(r"\/\*(\*(?!\/)|[^*])*\*\/") 31 c_comment_start = re.compile(r"\/\*(\*(?!\/)|[^*])*$") 32 c_comment_end = re.compile(r"^(\*(?!\/)|[^*])*\*\/") 33 # comments of the form // 34 cpp_comment = re.compile(r"\/\/.*") 35 # string literals "" 36 string_literal = re.compile(r'"([^\\"]|\\.)*"') 37 string_literal_start = re.compile(r'"([^\\"]|\\.)*\\$') 38 string_literal_end = re.compile(r'^([^\\"]|\\.)*"') 39 # character literals '' 40 char_literal = re.compile(r"'([^\\']|\\.)*'") 41 char_literal_start = re.compile(r"'([^\\']|\\.)*\\$") 42 char_literal_end = re.compile(r"^([^\\']|\\.)*'") 43 44 # read the file 45 try: 46 f = open(filename) 47 lines = f.readlines() 48 f.close() 49 except: 50 sys.stderr.write(filename + ": ") 51 sys.stderr.write(str(sys.exc_info()[1]) + "\n") 52 sys.exit(1) 53 54 # convert strings to "", char constants to '', and remove comments 55 n = len(lines) # 'lines' is the input 56 newlines = [] # 'newlines' is the output 57 58 cont = None # set if e.g. we found /* and we are looking for */ 59 60 for i in range(n): 61 line = lines[i].rstrip() 62 63 if cont is not None: 64 # look for closing ' or " or */ 65 match = cont.match(line) 66 if match: 67 # found closing ' or " or */ 68 line = line[match.end():] 69 cont = None 70 else: 71 # this whole line is in the middle of a string or comment 72 if cont is c_comment_end: 73 # still looking for */, clear the whole line 74 newlines.append("") 75 continue 76 else: 77 # still looking for ' or ", set line to backslash 78 newlines.append('\\') 79 continue 80 81 # start at column 0 and search for ', ", /*, or // 82 pos = 0 83 while True: 84 match = keychar.search(line, pos) 85 if match is None: 86 break 87 pos = match.start() 88 end = match.end() 89 # was the match /* ... */ ? 90 match = c_comment.match(line, pos) 91 if match: 92 line = line[0:pos] + " " + line[match.end():] 93 pos += 1 94 continue 95 # does the line have /* ... without the */ ? 96 match = c_comment_start.match(line, pos) 97 if match: 98 if line[-1] == '\\': 99 line = line[0:pos] + ' \\' 100 else: 101 line = line[0:pos] 102 cont = c_comment_end 103 break 104 # does the line have // ? 105 match = cpp_comment.match(line, pos) 106 if match: 107 if line[-1] == '\\': 108 line = line[0:pos] + ' \\' 109 else: 110 line = line[0:pos] 111 break 112 # did we find "..." ? 113 match = string_literal.match(line, pos) 114 if match: 115 line = line[0:pos] + "\"\"" + line[match.end():] 116 pos += 2 117 continue 118 # did we find "... without the final " ? 119 match = string_literal_start.match(line, pos) 120 if match: 121 line = line[0:pos] + "\"\"\\" 122 cont = string_literal_end 123 break 124 # did we find '...' ? 125 match = char_literal.match(line, pos) 126 if match: 127 line = line[0:pos] + "\' \'" + line[match.end():] 128 pos += 3 129 continue 130 # did we find '... without the final ' ? 131 match = char_literal_start.match(line, pos) 132 if match: 133 line = line[0:pos] + "\' \'\\" 134 cont = char_literal_end 135 break 136 # if we got to here, we found / that wasn't /* or // 137 pos += 1 138 139 # strip any trailing whitespace! 140 newlines.append(line.rstrip()) 141 142 # The second part of this function looks for braces in the simplified 143 # code that we wrote to "newlines" after removing the contents of all 144 # string literals, character literals, and comments. 145 146 # Whenever we encounter an opening brace, we push its position onto a 147 # stack. Whenever we encounter the matching closing brace, we indent 148 # the braces as a pair. 149 150 # For #if directives, we check whether there are mismatched braces 151 # within the conditional block, and if so, we print a warning and reset 152 # the stack to the depth that it had at the start of the block. 153 154 # For #define directives, we save the stack and then restart counting 155 # braces until the end of the #define. Then we restore the stack. 156 157 # all changes go through this function 158 lines_changed = {} # keeps track of each line that was changed 159 def changeline(i, newtext, lines_changed=lines_changed): 160 if newtext != lines[i]: 161 lines[i] = newtext 162 lines_changed[i] = newtext 163 164 # we push a tuple (delim, row, col, newcol) onto this stack whenever 165 # we find a {, (, or [ delimiter, this keeps track of where we found 166 # the delimiter and what column we want to move it to 167 stack = [] 168 lastdepth = 0 169 170 # this is a superstack that allows us to save the entire stack when we 171 # enter into an #if conditional block 172 dstack = [] 173 174 # these are syntactic elements we need to look for 175 directive = re.compile(r"\s*#\s*(..)") 176 label = re.compile(r"""(case(?!\w)([^:]|::)+|\w+\s*(::\s*)*\s*:(?!:))""") 177 cflow = re.compile(r"(if|else|for|do|while|switch)(?!\w)") 178 delims = re.compile(r"[{}()\[\];]") 179 spaces = re.compile(r"\s*") 180 other = re.compile(r"(\w+|[^{}()\[\];\w\s]+)\s*") 181 cplusplus = re.compile(r"\s*#\s*ifdef\s+__cplusplus") 182 183 indentation = 0 # current indentation column 184 continuation = False # true if line continues an unfinished statement 185 new_context = True # also set when we enter a #define statement 186 in_else = False # set if in an #else 187 in_define = False # set if in #define 188 in_assign = False # set to deal with "= {" or #define x {" 189 leaving_define = False # set if at the end of a #define 190 save_stack = None # save stack when entering a #define 191 192 for i in range(n): 193 line = newlines[i] 194 195 # restore stack when leaving #define 196 if leaving_define: 197 stack, indentation, continuation = save_stack 198 save_stack = None 199 in_define = False 200 leaving_define = False 201 202 # handle #if conditionals 203 is_directive = False 204 in_else = False 205 match = directive.match(line) 206 if match: 207 is_directive = True 208 if match.groups()[0] == 'if': 209 dstack.append((list(stack), indentation, continuation, 210 line)) 211 elif match.groups()[0] in ('en', 'el'): 212 oldstack, oldindent, oldcont, dline = dstack.pop() 213 if len(stack) > len(oldstack) and not cplusplus.match(dline): 214 sys.stderr.write(filename + ":" + str(i) + ": ") 215 sys.stderr.write("mismatched delimiter in \"" + 216 dline + "\" block\n") 217 if match.groups()[0] == 'el': 218 in_else = True 219 indentation = oldindent 220 continuation = oldcont 221 stack = oldstack 222 dstack.append((list(stack), indentation, continuation, 223 line)) 224 elif match.groups()[0] == 'de': 225 in_define = True 226 leaving_define = False 227 save_stack = (stack, indentation, continuation) 228 stack = [] 229 new_context = True 230 231 # remove backslash at end of line, if present 232 if len(line) > 0 and line[-1] == '\\': 233 line = line[0:-1].rstrip() 234 elif in_define: 235 leaving_define = True 236 237 if not is_directive and len(line) > 0 and not continuation: 238 # what is the indentation of the current line? 239 match = spaces.match(line) 240 if not line[match.end()] == '{': 241 indentation = match.end() 242 continuation = True 243 244 # new_context marks beginning of a file or a macro 245 if new_context: 246 continuation = False 247 indentation = 0 248 new_context = False 249 250 # skip initial whitespace 251 if is_directive: 252 pos = directive.match(line).end() 253 else: 254 pos = spaces.match(line).end() 255 256 # check for a label e.g. case 257 match = label.match(line, pos) 258 if match: 259 base = True 260 for item in stack: 261 if item[0] != '{': 262 base = False 263 if base: 264 word = re.match(r"\w*", match.group()) 265 if word in ("case", "default"): 266 indentation = pos 267 continuation = False 268 # check for multiple labels on the same line 269 while match: 270 pos = spaces.match(line, match.end()).end() 271 match = label.match(line, pos) 272 273 # parse the line 274 while pos != len(line): 275 # check for if, else, for, while, do, switch 276 match = cflow.match(line, pos) 277 if match: 278 # if we are at the beginning of the line 279 if spaces.match(line).end() == pos: 280 indentation = pos 281 pos = spaces.match(line, match.end()).end() 282 continue 283 284 # check for a delimiter {} () [] or ; 285 match = delims.match(line, pos) 286 if not match: 287 # check for any other identifiers, operators 288 match = other.match(line, pos) 289 if match: 290 pos = match.end() 291 continue 292 else: 293 break 294 295 # found a delimiter 296 delim = line[pos] 297 298 if delim in ('(', '['): 299 # save delim, row, col, and current indentation 300 stack.append((delim, i, pos, indentation)) 301 elif delim == '{': 302 if in_assign or line[0:pos-1].rstrip()[-1:] == "=": 303 # do not adjust braces for initializer lists 304 stack.append((delim, i, -1, indentation)) 305 elif ((in_else or in_define) and spaces.sub("", line) == "{"): 306 # for opening braces that might have no match 307 indent = " "*indentation 308 changeline(i, spaces.sub(indent, lines[i], count=1)) 309 stack.append((delim, i, pos, indentation)) 310 else: 311 # save delim, row, col, and previous indentation 312 stack.append((delim, i, pos, indentation)) 313 if spaces.sub("", newlines[i][0:pos]) == "": 314 indentation += 2 315 continuation = False 316 elif delim == ';': 317 # ';' marks end of statement unless inside for (;;) 318 if len(stack) == 0 or stack[-1][0] == '{': 319 continuation = False 320 else: 321 # found a ')', ']', or '}' delimiter, so pop its partner 322 try: 323 ldelim, j, k, indentation = stack.pop() 324 in_assign = (k < 0) 325 except IndexError: 326 ldelim = "" 327 if ldelim != {'}':'{', ')':'(', ']':'['}[delim]: 328 sys.stderr.write(filename + ":" + str(i) + ": ") 329 sys.stderr.write("mismatched \'" + delim + "\'\n") 330 # adjust the indentation of matching '{', '}' 331 if (ldelim == '{' and delim == '}' and not in_assign and 332 spaces.sub("", line[0:pos]) == ""): 333 if spaces.sub("", newlines[j][0:k]) == "": 334 indent = " "*indentation 335 changeline(j, spaces.sub(indent, lines[j], count=1)) 336 changeline(i, spaces.sub(indent, lines[i], count=1)) 337 elif i != j: 338 indent = " "*indentation 339 changeline(i, spaces.sub(indent, lines[i], count=1)) 340 if delim == '}': 341 continuation = False 342 343 # eat whitespace and continue 344 pos = spaces.match(line, match.end()).end() 345 346 # check for " = " and #define assignments for the sake of 347 # the { inializer list } that might be on the following line 348 if len(line) > 0: 349 if (line[-1] == '=' or 350 (is_directive and in_define and not leaving_define)): 351 in_assign = True 352 elif not is_directive: 353 in_assign = False 354 355 if len(dstack) != 0: 356 sys.stderr.write(filename + ": ") 357 sys.stderr.write("mismatched #if conditional.\n") 358 359 if len(stack) != 0: 360 sys.stderr.write(filename + ":" + str(stack[0][1]) + ": ") 361 sys.stderr.write("no match for " + stack[0][0] + 362 " before end of file.\n") 363 364 if lines_changed: 365 # remove any trailing whitespace 366 trailing = re.compile(r" *$") 367 for i in range(n): 368 lines[i] = trailing.sub("", lines[i]) 369 while n > 0 and lines[n-1].rstrip() == "": 370 n -= 1 371 if dry_run: 372 errcount = len(lines_changed) 373 line_numbers = list(lines_changed.keys()) 374 line_numbers.sort() 375 line_numbers = [str(l + 1) for l in line_numbers[0:10] ] 376 if errcount > len(line_numbers): 377 line_numbers.append("...") 378 sys.stderr.write("Warning: " + filename + 379 ": incorrect brace indentation on " + 380 str(errcount) + 381 (" lines: ", "line: ")[errcount == 1] + 382 ", ".join(line_numbers) + "\n") 383 else: 384 # rewrite the file 385 ofile = open(filename, 'w') 386 ofile.writelines(lines) 387 ofile.close() 388 return True 389 390 return False 391 392 393if __name__ == "__main__": 394 395 # ignore generated files 396 ignorefiles = ["lex.yy.c", "vtkParse.tab.c"] 397 398 files = [] 399 opt_ignore = False # ignore all further options 400 opt_test = False # the --test option 401 402 for arg in sys.argv[1:]: 403 if arg[0:1] == '-' and not opt_ignore: 404 if arg == '--': 405 opt_ignore = True 406 elif arg == '--test': 407 opt_test = True 408 else: 409 sys.stderr.write("%s: unrecognized option %s\n" % 410 (os.path.split(sys.argv[0])[-1], arg)) 411 sys.exit(1) 412 elif os.path.split(arg)[-1] not in ignorefiles: 413 files.append(arg) 414 415 # if --test was set, whenever a file needs modification, we set 416 # "failed" and continue checking the rest of the files 417 failed = False 418 419 for filename in files: 420 # repeat until no further changes occur 421 while reindent(filename, dry_run=opt_test): 422 if opt_test: 423 failed = True 424 break 425 426 if failed: 427 sys.exit(1) 428