1#!/usr/bin/env python 2 3# fixcc -- indent and space lily's c++ code 4 5# This file is part of LilyPond, the GNU music typesetter. 6# 7# Copyright (C) 2005--2021 Jan Nieuwenhuizen <janneke@gnu.org> 8# 9# LilyPond is free software: you can redistribute it and/or modify 10# it under the terms of the GNU General Public License as published by 11# the Free Software Foundation, either version 3 of the License, or 12# (at your option) any later version. 13# 14# LilyPond is distributed in the hope that it will be useful, 15# but WITHOUT ANY WARRANTY; without even the implied warranty of 16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 17# GNU General Public License for more details. 18# 19# You should have received a copy of the GNU General Public License 20# along with LilyPond. If not, see <http://www.gnu.org/licenses/>. 21 22 23# Performs string substitution on files, then applies astyle 24# (http://astyle.sourceforge.net) 25# TODO 26# Remove prefiltering as the equivalent formatting becomes available in 27# astyle, or as the prefiltering is deemed un-necessary. 28# Soon, this script might be replaced by a simple invocation of astyle 29 30import __main__ 31import getopt 32import os 33import re 34import sys 35import time 36import subprocess 37 38COMMENT = 'COMMENT' 39RAW_STRING = 'RAW_STRING' 40STRING = 'STRING' 41GLOBAL_CXX = 'GC++' 42CXX = 'C++' 43verbose_p = 0 44indent_p = 1 45PREFERRED_ASTYLE_VERSION = "Artistic Style Version 3.1" 46 47 48rules = { 49 GLOBAL_CXX: 50 [ 51 # delete trailing whitespace 52 ('[ \t]*\n', '\n'), 53 ], 54 CXX: 55 [ 56 # space before parenthesis open; astyle -xd does this except for foo(). 57 (r'([\w\)\]>])\(', '\\1 ('), 58 # delete inline double spaces 59 (r'(\S) +', '\\1 '), 60 # delete space before parenthesis close 61 (r' *\)', ')'), 62 # delete spaces after prefix 63 (r'(--|\+\+) *([\w\(])', '\\1\\2'), 64 # delete spaces before postfix 65 (r'([\w\)\]]) *(--|\+\+)', '\\1\\2'), 66 67 # delete space around operator 68 (r'([\w\(\)\]]) *(\.|->) *([\w\(\)])', '\\1\\2\\3'), 69 # delete space after operator 70 (r'(::) *([\w\(\)])', '\\1\\2'), 71 72 # delete superflous space around operator 73 (r'([\w\(\)\]]) +(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|\+|-|=|/|:|&|\||\*) +([\w\(\)])', '\\1 \\2 \\3'), 74 75 # trailing operator, but don't un-trail close angle-braces > nor pointer *, and not before a preprocessor line 76 (r'(?<!\s) (::|&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|\+|-|=|/|:|&XXX|\||\*XXX) *\n( *)([^\s#])', '\n\\2\\1 \\3'), 77 # space after `operator' 78 (r'(\Woperator) *([^\w\s])', '\\1 \\2'), 79 # trailing parenthesis open 80 (r'\( *\n *', '('), 81 # dangling parenthesis close: Disabled to leave ADD_TRANSLATOR format in place 82 #('\n *\)', ')'), 83 # dangling comma 84 ('\n( *),', ',\n\\1'), 85 # delete space after case, label 86 (r'(\W(case|label) [\w]+) :', '\\1:'), 87 # delete space before comma 88 (' +,', ','), 89 # delete space before semicolon 90 ('([^;]) +;', '\\1;'), 91 # dangling newline 92 ('\n\n+', '\n\n'), 93 94 # delete backslash before empty line (emacs' indent region is broken) 95 ('\\\\\n\n', '\n\n'), 96 ], 97 98 COMMENT: 99 [ 100 # delete empty first lines 101 (r'(/\*\n)\n*', '\\1'), 102 # delete empty last lines 103 (r'\n*(\n\*/)', '\\1'), 104 # delete newline after start? 105 #('/(\*)\n', '\\1'), 106 # delete newline before end? 107 #('\n(\*/)', '\\1'), 108 ], 109 110 RAW_STRING: 111 [ 112 ], 113} 114 115# Recognize special sequences in the input. 116# 117# (?P<name>regex) -- Assign result of REGEX to NAME. 118# *? -- Match non-greedily. 119# (?m) -- Multiline regex: Make ^ and $ match at each line. 120# (?s) -- Make the dot match all characters including newline. 121# (?x) -- Ignore whitespace in patterns. 122no_match = 'a\ba' 123snippet_res = { 124 CXX: { 125 'define': 126 r'''(?x) 127 (?P<match> 128 (?P<code> 129 \#[ \t]*define[ \t]+([^\n]*\\\n)*[^\n]*))''', 130 131 'multiline_comment': 132 r'''(?sx) 133 (?P<match> 134 (?P<code> 135 [ \t]*/\*.*?\*/))''', 136 137 'raw_string': 138 r'''(?sx) 139 (?P<match> 140 (?P<code> 141 R"(?P<delim>[^\\() ]{0,16})\(.*\)(?P=delim)"))''', 142 143 'singleline_comment': 144 r'''(?mx) 145 ^.*? # leave leading spaces for the comment snippet 146 (?P<match> 147 (?P<code> 148 [ \t]*//[^\n]*\n))''', 149 150 'string': 151 r'''(?x) 152 " # leave the leading " character visible to CXX rules 153 (?P<match> 154 (?P<code> 155 ([^"\n]|\\")*"))''', 156 157 'char': 158 r'''(?x) 159 (?P<match> 160 (?P<code> 161 '([^']+|\')))''', 162 163 'include': 164 r'''(?x) 165 (?P<match> 166 (?P<code> 167 \#[ \t]*include[ \t]*<[^>]*>))''', 168 }, 169} 170 171 172class Chunk: 173 def replacement_text(self): 174 return '' 175 176 def filter_text(self): 177 return self.replacement_text() 178 179 180class Substring (Chunk): 181 def __init__(self, source, start, end): 182 self.source = source 183 self.start = start 184 self.end = end 185 186 def replacement_text(self): 187 s = self.source[self.start:self.end] 188 if verbose_p: 189 sys.stderr.write('CXX Rules') 190 for i in rules[CXX]: 191 if verbose_p: 192 sys.stderr.write('.') 193 #sys.stderr.write ('\n\n***********\n') 194 #sys.stderr.write (i[0]) 195 #sys.stderr.write ('\n***********\n') 196 #sys.stderr.write ('\n=========>>\n') 197 #sys.stderr.write (s) 198 #sys.stderr.write ('\n<<=========\n') 199 s = re.sub(i[0], i[1], s) 200 if verbose_p: 201 sys.stderr.write('done\n') 202 return s 203 204 205class Snippet (Chunk): 206 def __init__(self, type, match, format): 207 self.type = type 208 self.match = match 209 self.hash = 0 210 self.options = [] 211 self.format = format 212 213 def replacement_text(self): 214 return self.match.group('match') 215 216 def substring(self, s): 217 return self.match.group(s) 218 219 def __repr__(self): 220 return repr(self.__class__) + ' type = ' + self.type 221 222 223class Multiline_comment (Snippet): 224 def __init__(self, source, match, format): 225 self.type = type 226 self.match = match 227 self.hash = 0 228 self.options = [] 229 self.format = format 230 231 def replacement_text(self): 232 s = self.match.group('match') 233 if verbose_p: 234 sys.stderr.write('COMMENT Rules') 235 for i in rules[COMMENT]: 236 if verbose_p: 237 sys.stderr.write('.') 238 s = re.sub(i[0], i[1], s) 239 return s 240 241class Raw_string (Snippet): 242 def __init__(self, source, match, format): 243 self.type = type 244 self.match = match 245 self.hash = 0 246 self.options = [] 247 self.format = format 248 249 def replacement_text(self): 250 s = self.match.group('match') 251 if verbose_p: 252 sys.stderr.write('RAW_STRING Rules') 253 for i in rules[RAW_STRING]: 254 if verbose_p: 255 sys.stderr.write('.') 256 s = re.sub(i[0], i[1], s) 257 return s 258 259snippet_type_to_class = { 260 'multiline_comment': Multiline_comment, 261 'raw_string': Raw_string, 262 # 'string': Multiline_comment, 263 # 'include': Include_snippet, 264} 265 266 267def find_toplevel_snippets(s, types): 268 if verbose_p: 269 sys.stderr.write('Dissecting') 270 271 res = {} 272 for i in types: 273 res[i] = re.compile(snippet_res[format][i]) 274 275 snippets = [] 276 index = 0 277 # found = dict (map (lambda x: (x, None), 278 # types)) 279 # urg python2.1 280 found = {} 281 list(map(lambda x, f=found: f.setdefault(x, None), 282 types)) 283 284 # We want to search for multiple regexes, without searching 285 # the string multiple times for one regex. 286 # Hence, we use earlier results to limit the string portion 287 # where we search. 288 # Since every part of the string is traversed at most once for 289 # every type of snippet, this is linear. 290 291 while True: 292 if verbose_p: 293 sys.stderr.write('.') 294 first = None 295 endex = 1 << 30 296 for type in types: 297 if not found[type] or found[type][0] < index: 298 found[type] = None 299 m = res[type].search(s[index:endex]) 300 if not m: 301 continue 302 303 cl = Snippet 304 if type in snippet_type_to_class: 305 cl = snippet_type_to_class[type] 306 snip = cl(type, m, format) 307 start = index + m.start('match') 308 found[type] = (start, snip) 309 310 if found[type] \ 311 and (not first 312 or found[type][0] < found[first][0]): 313 first = type 314 315 # FIXME. 316 317 # Limiting the search space is a cute 318 # idea, but this *requires* to search 319 # for possible containing blocks 320 # first, at least as long as we do not 321 # search for the start of blocks, but 322 # always/directly for the entire 323 # @block ... @end block. 324 325 endex = found[first][0] 326 327 if not first: 328 snippets.append(Substring(s, index, len(s))) 329 break 330 331 (start, snip) = found[first] 332 snippets.append(Substring(s, index, start)) 333 snippets.append(snip) 334 found[first] = None 335 index = start + len(snip.match.group('match')) 336 337 return snippets 338 339 340def nitpick_file(outdir, file): 341 s = open(file, encoding='utf8').read() 342 343 t = s.expandtabs(8) 344 for i in rules[GLOBAL_CXX]: 345 t = re.sub(i[0], i[1], t) 346 347 # FIXME: Containing blocks must be first, see 348 # find_toplevel_snippets. 349 # We leave simple strings be part of the code 350 snippet_types = ( 351 'define', 352 'multiline_comment', 353 'raw_string', 354 'singleline_comment', 355 'string', 356 # 'char', 357 'include', 358 ) 359 360 chunks = find_toplevel_snippets(t, snippet_types) 361 # code = filter (lambda x: is_derived_class (x.__class__, Substring), 362 # chunks) 363 364 t = ''.join([x.filter_text() for x in chunks]) 365 fixt = file 366 if s != t: 367 if not outdir: 368 os.system('mv %s %s~' % (file, file)) 369 else: 370 fixt = os.path.join(outdir, 371 os.path.basename(file)) 372 h = open(fixt, "w", encoding="utf8") 373 h.write(t) 374 h.close() 375 if s != t or indent_p: 376 indent_file(fixt) 377 378 379def indent_file(file): 380 # Astyle aborts on unrecognized options, 381 # so wait until everyone has 2.04 before considering: 382 # --attach-namespaces --indent-namespaces \ 383 # --max-code-length=80 --pad-first-paren-out \ 384 astyle = '''astyle\ 385 --options=none --quiet -n \ 386 --style=gnu --indent=spaces=2 \ 387 --max-instatement-indent=60 \ 388 --indent-cases \ 389 --align-pointer=name --pad-oper \ 390 --keep-one-line-blocks \ 391 %(file)s 392 ''' % vars() 393 if verbose_p: 394 sys.stderr.write(astyle) 395 sys.stderr.write('\n') 396 os.system(astyle) 397 398 399def usage(): 400 sys.stdout.write(r''' 401Usage: 402fixcc [OPTION]... FILE... 403 404Options: 405 --help 406 --lazy skip astyle, if no changes 407 --sloppy accept any astyle version 408 --verbose 409 --test 410 411Typical use with LilyPond: 412 413 scripts/auxiliar/fixcc.py $(git ls-files '*.cc' '*.hh') 414 415''') 416 417 418def do_options(): 419 global indent_p, outdir, verbose_p, PREFERRED_ASTYLE_VERSION 420 (options, files) = getopt.getopt(sys.argv[1:], '', 421 ['help', 'lazy', 'outdir=', 'sloppy', 422 'test', 'verbose']) 423 for (o, a) in options: 424 if o == '--help': 425 usage() 426 sys.exit(0) 427 elif o == '--lazy': 428 indent_p = 0 429 elif o == '--outdir': 430 outdir = a 431 elif o == '--sloppy': 432 PREFERRED_ASTYLE_VERSION = "Artistic Style" 433 elif o == '--verbose': 434 verbose_p = 1 435 elif o == '--test': 436 test() 437 sys.exit(0) 438 else: 439 assert unimplemented 440 if not files: 441 usage() 442 sys.exit(2) 443 return files 444 445 446def check_astyle_version(): 447 cmd = "astyle --version" 448 process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE, 449 stderr=subprocess.PIPE) 450 stdout, stderr = process.communicate() 451 return (PREFERRED_ASTYLE_VERSION in stderr.decode()) \ 452 or (PREFERRED_ASTYLE_VERSION in stdout.decode()) 453 454 455outdir = 0 456format = CXX 457socketdir = '/tmp/fixcc' 458socketname = 'fixcc%d' % os.getpid() 459 460 461def main(): 462 files = do_options() 463 if not check_astyle_version(): 464 print("Warning: try to use %s." % PREFERRED_ASTYLE_VERSION) 465 print("Please limit use of this version to files with changed code.") 466 if len(files) > 4: 467 print("Too many files with this version. See `astyle --help`") 468 sys.exit(1) 469 if outdir and not os.path.isdir(outdir): 470 os.makedirs(outdir) 471 for i in files: 472 sys.stderr.write('%s...\n' % i) 473 nitpick_file(outdir, i) 474 475 476# TODO: make this compilable and check with g++ 477TEST = ''' 478#include <libio.h> 479#include <map> 480class 481ostream ; 482 483class Foo { 484public: static char* foo (); 485std::map<char*,int>* bar (char, char) { return 0; } 486}; 487typedef struct 488{ 489 Foo **bar; 490} String; 491 492ostream & 493operator << (ostream & os, String d); 494 495typedef struct _t_ligature 496{ 497 char *succ, *lig; 498 struct _t_ligature * next; 499} AFM_Ligature; 500 501typedef std::map < AFM_Ligature const *, int > Bar; 502 503 /** 504 Copyright (C) 1997--2021 Han-Wen Nienhuys <hanwen@cs.uu.nl> 505 */ 506 507/* || 508* vv 509* !OK OK 510*/ 511/* || 512 vv 513 !OK OK 514*/ 515char * 516Foo:: foo () 517{ 518int 519i 520; 521 char* a= &++ i ; 522 a [*++ a] = (char*) foe (*i, &bar) * 523 2; 524 int operator double (); 525 std::map<char*,int> y =*bar(-*a ,*b); 526 Interval_t<T> & operator*= (T r); 527 Foo<T>*c; 528 int compare (Pqueue_ent < K, T > const& e1, Pqueue_ent < K,T> *e2); 529 delete *p; 530 if (abs (f)*2 > abs (d) *FUDGE) 531 ; 532 while (0); 533 for (; i<x foo(); foo>bar); 534 for (; *p && > y; 535 foo > bar) 536; 537 do { 538 ;;; 539 } 540 while (foe); 541 542 squiggle. extent; 543 1 && * Moment::unsmob (lf); 544 line_spanner_ = make_spanner ("DynamicLineSpanner", rq ? rq->*self_scm 545(): SCM_EOL); 546 case foo: k; 547 548 if (0) {a=b;} else { 549 c=d; 550 } 551 552 cookie_io_functions_t Memory_out_stream::functions_ = { 553 Memory_out_stream::reader, 554 ... 555 }; 556 557 int compare (Array < Pitch> *, Array < Pitch> *); 558 original_ = (Grob *) & s; 559 Drul_array< Link_array<Grob> > o; 560} 561 562 header_.char_info_pos = (6 + header_length) * 4; 563 return ly_bool2scm (*ma < * mb); 564 565 1 *::sign(2); 566 567 (shift) *-d; 568 569 a = 0 ? *x : *y; 570 571a = "foo() 2,2,4"; 572{ 573 if (!span_) 574 { 575 span_ = make_spanner ("StaffSymbol", SCM_EOL); 576 } 577} 578{ 579 if (!span_) 580 { 581 span_ = make_spanner (StaffSymbol, SCM_EOL); 582 } 583} 584 585void casts() 586{ 587 auto a=const_cast<A>(foo); 588 auto a=dynamic_cast<A>(foo); 589 auto a=reinterpret_cast<A>(foo); 590 auto a=static_cast<A>(foo); 591} 592 593auto raw_string=R"_(foo -> bar)_" ; 594 595auto raw_string = R"( 596first line of multi+line-raw+string() 597second line of ! (multi -> line -> raw . string) 598)"; 599 600auto raw_string = R"_foo_( 601if (true) { _foo_(); } else { return R"_foo_("; } 602second line of ! (multi -> line -> raw . string < T > ) 603)_foo_"; 604 605''' 606 607 608def test(): 609 test_file = 'fixcc.cc' 610 open(test_file, 'w', encoding='utf8').write(TEST) 611 nitpick_file(outdir, test_file) 612 sys.stdout.write(open(test_file, encoding='utf8').read()) 613 614 615if __name__ == '__main__': 616 main() 617