1#!/usr/bin/env python
2
3# fixcc -- indent and space lily's c++ code
4
5# This file is part of LilyPond, the GNU music typesetter.
6#
7# Copyright (C) 2005--2021 Jan Nieuwenhuizen <janneke@gnu.org>
8#
9# LilyPond is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# LilyPond is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with LilyPond.  If not, see <http://www.gnu.org/licenses/>.
21
22
23#  Performs string substitution on files, then applies astyle
24#  (http://astyle.sourceforge.net)
25# TODO
26#  Remove prefiltering as the equivalent formatting becomes available in
27#  astyle, or as the prefiltering is deemed un-necessary.
28#  Soon, this script might be replaced by a simple invocation of astyle
29
30import __main__
31import getopt
32import os
33import re
34import sys
35import time
36import subprocess
37
38COMMENT = 'COMMENT'
39RAW_STRING = 'RAW_STRING'
40STRING = 'STRING'
41GLOBAL_CXX = 'GC++'
42CXX = 'C++'
43verbose_p = 0
44indent_p = 1
45PREFERRED_ASTYLE_VERSION = "Artistic Style Version 3.1"
46
47
48rules = {
49    GLOBAL_CXX:
50    [
51        # delete trailing whitespace
52        ('[ \t]*\n', '\n'),
53    ],
54    CXX:
55    [
56        # space before parenthesis open; astyle -xd does this except for foo().
57        (r'([\w\)\]>])\(', '\\1 ('),
58        # delete inline double spaces
59        (r'(\S)  +', '\\1 '),
60        # delete space before parenthesis close
61        (r' *\)', ')'),
62        # delete spaces after prefix
63        (r'(--|\+\+) *([\w\(])', '\\1\\2'),
64        # delete spaces before postfix
65        (r'([\w\)\]]) *(--|\+\+)', '\\1\\2'),
66
67        # delete space around operator
68        (r'([\w\(\)\]]) *(\.|->) *([\w\(\)])', '\\1\\2\\3'),
69        # delete space after operator
70        (r'(::) *([\w\(\)])', '\\1\\2'),
71
72        # delete superflous space around operator
73        (r'([\w\(\)\]]) +(&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|>|\+|-|=|/|:|&|\||\*) +([\w\(\)])', '\\1 \\2 \\3'),
74
75        # trailing operator, but don't un-trail close angle-braces > nor pointer *, and not before a preprocessor line
76        (r'(?<!\s) (::|&&|\|\||<=|>=|!=|\|=|==|\+=|-=|\*=|/=|\?|<|\+|-|=|/|:|&XXX|\||\*XXX) *\n( *)([^\s#])', '\n\\2\\1 \\3'),
77        # space after `operator'
78        (r'(\Woperator) *([^\w\s])', '\\1 \\2'),
79        # trailing parenthesis open
80        (r'\( *\n *', '('),
81        # dangling parenthesis close: Disabled to leave ADD_TRANSLATOR format in place
82        #('\n *\)', ')'),
83        # dangling comma
84        ('\n( *),', ',\n\\1'),
85        # delete space after case, label
86        (r'(\W(case|label) [\w]+) :', '\\1:'),
87        # delete space before comma
88        (' +,', ','),
89        # delete space before semicolon
90        ('([^;]) +;', '\\1;'),
91        # dangling newline
92        ('\n\n+', '\n\n'),
93
94        # delete backslash before empty line (emacs' indent region is broken)
95        ('\\\\\n\n', '\n\n'),
96    ],
97
98    COMMENT:
99    [
100        # delete empty first lines
101        (r'(/\*\n)\n*', '\\1'),
102        # delete empty last lines
103        (r'\n*(\n\*/)', '\\1'),
104        # delete newline after start?
105        #('/(\*)\n', '\\1'),
106        # delete newline before end?
107        #('\n(\*/)', '\\1'),
108    ],
109
110    RAW_STRING:
111    [
112    ],
113}
114
115# Recognize special sequences in the input.
116#
117#   (?P<name>regex) -- Assign result of REGEX to NAME.
118#   *? -- Match non-greedily.
119#   (?m) -- Multiline regex: Make ^ and $ match at each line.
120#   (?s) -- Make the dot match all characters including newline.
121#   (?x) -- Ignore whitespace in patterns.
122no_match = 'a\ba'
123snippet_res = {
124    CXX: {
125        'define':
126        r'''(?x)
127    (?P<match>
128    (?P<code>
129    \#[ \t]*define[ \t]+([^\n]*\\\n)*[^\n]*))''',
130
131        'multiline_comment':
132        r'''(?sx)
133    (?P<match>
134    (?P<code>
135    [ \t]*/\*.*?\*/))''',
136
137        'raw_string':
138        r'''(?sx)
139    (?P<match>
140    (?P<code>
141    R"(?P<delim>[^\\() ]{0,16})\(.*\)(?P=delim)"))''',
142
143        'singleline_comment':
144        r'''(?mx)
145    ^.*?    # leave leading spaces for the comment snippet
146    (?P<match>
147    (?P<code>
148    [ \t]*//[^\n]*\n))''',
149
150        'string':
151        r'''(?x)
152    "      # leave the leading " character visible to CXX rules
153    (?P<match>
154    (?P<code>
155    ([^"\n]|\\")*"))''',
156
157        'char':
158        r'''(?x)
159    (?P<match>
160    (?P<code>
161    '([^']+|\')))''',
162
163        'include':
164        r'''(?x)
165    (?P<match>
166    (?P<code>
167    \#[ \t]*include[ \t]*<[^>]*>))''',
168    },
169}
170
171
172class Chunk:
173    def replacement_text(self):
174        return ''
175
176    def filter_text(self):
177        return self.replacement_text()
178
179
180class Substring (Chunk):
181    def __init__(self, source, start, end):
182        self.source = source
183        self.start = start
184        self.end = end
185
186    def replacement_text(self):
187        s = self.source[self.start:self.end]
188        if verbose_p:
189            sys.stderr.write('CXX Rules')
190        for i in rules[CXX]:
191            if verbose_p:
192                sys.stderr.write('.')
193                #sys.stderr.write ('\n\n***********\n')
194                #sys.stderr.write (i[0])
195                #sys.stderr.write ('\n***********\n')
196                #sys.stderr.write ('\n=========>>\n')
197                #sys.stderr.write (s)
198                #sys.stderr.write ('\n<<=========\n')
199            s = re.sub(i[0], i[1], s)
200        if verbose_p:
201            sys.stderr.write('done\n')
202        return s
203
204
205class Snippet (Chunk):
206    def __init__(self, type, match, format):
207        self.type = type
208        self.match = match
209        self.hash = 0
210        self.options = []
211        self.format = format
212
213    def replacement_text(self):
214        return self.match.group('match')
215
216    def substring(self, s):
217        return self.match.group(s)
218
219    def __repr__(self):
220        return repr(self.__class__) + ' type = ' + self.type
221
222
223class Multiline_comment (Snippet):
224    def __init__(self, source, match, format):
225        self.type = type
226        self.match = match
227        self.hash = 0
228        self.options = []
229        self.format = format
230
231    def replacement_text(self):
232        s = self.match.group('match')
233        if verbose_p:
234            sys.stderr.write('COMMENT Rules')
235        for i in rules[COMMENT]:
236            if verbose_p:
237                sys.stderr.write('.')
238            s = re.sub(i[0], i[1], s)
239        return s
240
241class Raw_string (Snippet):
242    def __init__(self, source, match, format):
243        self.type = type
244        self.match = match
245        self.hash = 0
246        self.options = []
247        self.format = format
248
249    def replacement_text(self):
250        s = self.match.group('match')
251        if verbose_p:
252            sys.stderr.write('RAW_STRING Rules')
253        for i in rules[RAW_STRING]:
254            if verbose_p:
255                sys.stderr.write('.')
256            s = re.sub(i[0], i[1], s)
257        return s
258
259snippet_type_to_class = {
260    'multiline_comment': Multiline_comment,
261    'raw_string': Raw_string,
262    #        'string': Multiline_comment,
263    #        'include': Include_snippet,
264}
265
266
267def find_toplevel_snippets(s, types):
268    if verbose_p:
269        sys.stderr.write('Dissecting')
270
271    res = {}
272    for i in types:
273        res[i] = re.compile(snippet_res[format][i])
274
275    snippets = []
276    index = 0
277    # found = dict (map (lambda x: (x, None),
278    # types))
279    # urg python2.1
280    found = {}
281    list(map(lambda x, f=found: f.setdefault(x, None),
282             types))
283
284    # We want to search for multiple regexes, without searching
285    # the string multiple times for one regex.
286    # Hence, we use earlier results to limit the string portion
287    # where we search.
288    # Since every part of the string is traversed at most once for
289    # every type of snippet, this is linear.
290
291    while True:
292        if verbose_p:
293            sys.stderr.write('.')
294        first = None
295        endex = 1 << 30
296        for type in types:
297            if not found[type] or found[type][0] < index:
298                found[type] = None
299                m = res[type].search(s[index:endex])
300                if not m:
301                    continue
302
303                cl = Snippet
304                if type in snippet_type_to_class:
305                    cl = snippet_type_to_class[type]
306                snip = cl(type, m, format)
307                start = index + m.start('match')
308                found[type] = (start, snip)
309
310            if found[type] \
311                and (not first
312                     or found[type][0] < found[first][0]):
313                first = type
314
315                # FIXME.
316
317                # Limiting the search space is a cute
318                # idea, but this *requires* to search
319                # for possible containing blocks
320                # first, at least as long as we do not
321                # search for the start of blocks, but
322                # always/directly for the entire
323                # @block ... @end block.
324
325                endex = found[first][0]
326
327        if not first:
328            snippets.append(Substring(s, index, len(s)))
329            break
330
331        (start, snip) = found[first]
332        snippets.append(Substring(s, index, start))
333        snippets.append(snip)
334        found[first] = None
335        index = start + len(snip.match.group('match'))
336
337    return snippets
338
339
340def nitpick_file(outdir, file):
341    s = open(file, encoding='utf8').read()
342
343    t = s.expandtabs(8)
344    for i in rules[GLOBAL_CXX]:
345        t = re.sub(i[0], i[1], t)
346
347    # FIXME: Containing blocks must be first, see
348    #        find_toplevel_snippets.
349    #        We leave simple strings be part of the code
350    snippet_types = (
351        'define',
352        'multiline_comment',
353        'raw_string',
354        'singleline_comment',
355        'string',
356        #                'char',
357        'include',
358    )
359
360    chunks = find_toplevel_snippets(t, snippet_types)
361    # code = filter (lambda x: is_derived_class (x.__class__, Substring),
362    #               chunks)
363
364    t = ''.join([x.filter_text() for x in chunks])
365    fixt = file
366    if s != t:
367        if not outdir:
368            os.system('mv %s %s~' % (file, file))
369        else:
370            fixt = os.path.join(outdir,
371                                os.path.basename(file))
372        h = open(fixt, "w", encoding="utf8")
373        h.write(t)
374        h.close()
375    if s != t or indent_p:
376        indent_file(fixt)
377
378
379def indent_file(file):
380  # Astyle aborts on unrecognized options,
381  # so wait until everyone has 2.04 before considering:
382  # --attach-namespaces --indent-namespaces \
383  # --max-code-length=80 --pad-first-paren-out \
384    astyle = '''astyle\
385  --options=none --quiet -n \
386  --style=gnu --indent=spaces=2 \
387  --max-instatement-indent=60 \
388  --indent-cases \
389  --align-pointer=name --pad-oper \
390  --keep-one-line-blocks \
391  %(file)s
392  ''' % vars()
393    if verbose_p:
394        sys.stderr.write(astyle)
395        sys.stderr.write('\n')
396    os.system(astyle)
397
398
399def usage():
400    sys.stdout.write(r'''
401Usage:
402fixcc [OPTION]... FILE...
403
404Options:
405 --help
406 --lazy   skip astyle, if no changes
407 --sloppy accept any astyle version
408 --verbose
409 --test
410
411Typical use with LilyPond:
412
413 scripts/auxiliar/fixcc.py $(git ls-files '*.cc' '*.hh')
414
415''')
416
417
418def do_options():
419    global indent_p, outdir, verbose_p, PREFERRED_ASTYLE_VERSION
420    (options, files) = getopt.getopt(sys.argv[1:], '',
421                                     ['help', 'lazy', 'outdir=', 'sloppy',
422                                      'test', 'verbose'])
423    for (o, a) in options:
424        if o == '--help':
425            usage()
426            sys.exit(0)
427        elif o == '--lazy':
428            indent_p = 0
429        elif o == '--outdir':
430            outdir = a
431        elif o == '--sloppy':
432            PREFERRED_ASTYLE_VERSION = "Artistic Style"
433        elif o == '--verbose':
434            verbose_p = 1
435        elif o == '--test':
436            test()
437            sys.exit(0)
438        else:
439            assert unimplemented
440    if not files:
441        usage()
442        sys.exit(2)
443    return files
444
445
446def check_astyle_version():
447    cmd = "astyle --version"
448    process = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE,
449                               stderr=subprocess.PIPE)
450    stdout, stderr = process.communicate()
451    return (PREFERRED_ASTYLE_VERSION in stderr.decode()) \
452        or (PREFERRED_ASTYLE_VERSION in stdout.decode())
453
454
455outdir = 0
456format = CXX
457socketdir = '/tmp/fixcc'
458socketname = 'fixcc%d' % os.getpid()
459
460
461def main():
462    files = do_options()
463    if not check_astyle_version():
464        print("Warning: try to use %s." % PREFERRED_ASTYLE_VERSION)
465        print("Please limit use of this version to files with changed code.")
466        if len(files) > 4:
467            print("Too many files with this version.  See `astyle --help`")
468            sys.exit(1)
469    if outdir and not os.path.isdir(outdir):
470        os.makedirs(outdir)
471    for i in files:
472        sys.stderr.write('%s...\n' % i)
473        nitpick_file(outdir, i)
474
475
476# TODO: make this compilable and check with g++
477TEST = '''
478#include <libio.h>
479#include <map>
480class
481ostream ;
482
483class Foo {
484public: static char* foo ();
485std::map<char*,int>* bar (char, char) { return 0; }
486};
487typedef struct
488{
489 Foo **bar;
490} String;
491
492ostream &
493operator << (ostream & os, String d);
494
495typedef struct _t_ligature
496{
497 char *succ, *lig;
498 struct _t_ligature * next;
499}  AFM_Ligature;
500
501typedef std::map < AFM_Ligature const *, int > Bar;
502
503 /**
504 Copyright (C) 1997--2021 Han-Wen Nienhuys <hanwen@cs.uu.nl>
505 */
506
507/*      ||
508*      vv
509* !OK  OK
510*/
511/*     ||
512   vv
513 !OK  OK
514*/
515char *
516Foo:: foo ()
517{
518int
519i
520;
521 char* a= &++ i ;
522 a [*++ a] = (char*) foe (*i, &bar) *
523 2;
524 int operator double ();
525 std::map<char*,int> y =*bar(-*a ,*b);
526 Interval_t<T> & operator*= (T r);
527 Foo<T>*c;
528 int compare (Pqueue_ent < K, T > const& e1, Pqueue_ent < K,T> *e2);
529 delete *p;
530 if (abs (f)*2 > abs (d) *FUDGE)
531  ;
532 while (0);
533 for (; i<x foo(); foo>bar);
534 for (; *p && > y;
535   foo > bar)
536;
537 do {
538 ;;;
539 }
540 while (foe);
541
542 squiggle. extent;
543 1 && * Moment::unsmob (lf);
544 line_spanner_ = make_spanner ("DynamicLineSpanner", rq ? rq->*self_scm
545(): SCM_EOL);
546 case foo: k;
547
548 if (0) {a=b;} else {
549 c=d;
550 }
551
552 cookie_io_functions_t Memory_out_stream::functions_ = {
553  Memory_out_stream::reader,
554  ...
555 };
556
557 int compare (Array < Pitch> *, Array < Pitch> *);
558 original_ = (Grob *) & s;
559 Drul_array< Link_array<Grob> > o;
560}
561
562 header_.char_info_pos = (6 + header_length) * 4;
563 return ly_bool2scm (*ma < * mb);
564
565 1 *::sign(2);
566
567 (shift) *-d;
568
569 a = 0 ? *x : *y;
570
571a = "foo() 2,2,4";
572{
573 if (!span_)
574  {
575   span_ = make_spanner ("StaffSymbol", SCM_EOL);
576  }
577}
578{
579 if (!span_)
580  {
581   span_ = make_spanner (StaffSymbol, SCM_EOL);
582  }
583}
584
585void casts()
586{
587  auto a=const_cast<A>(foo);
588  auto a=dynamic_cast<A>(foo);
589  auto a=reinterpret_cast<A>(foo);
590  auto a=static_cast<A>(foo);
591}
592
593auto      raw_string=R"_(foo -> bar)_"    ;
594
595auto raw_string = R"(
596first line of multi+line-raw+string()
597second line of ! (multi -> line -> raw . string)
598)";
599
600auto raw_string = R"_foo_(
601if (true) { _foo_(); } else { return R"_foo_("; }
602second line of ! (multi -> line -> raw . string < T > )
603)_foo_";
604
605'''
606
607
608def test():
609    test_file = 'fixcc.cc'
610    open(test_file, 'w', encoding='utf8').write(TEST)
611    nitpick_file(outdir, test_file)
612    sys.stdout.write(open(test_file, encoding='utf8').read())
613
614
615if __name__ == '__main__':
616    main()
617