1#! /usr/bin/env python3
2
3# Released to the public domain, by Tim Peters, 03 October 2000.
4
5"""reindent [-d][-r][-v] [ path ... ]
6
7-d (--dryrun)   Dry run.   Analyze, but don't make any changes to, files.
8-r (--recurse)  Recurse.   Search for all .py files in subdirectories too.
9-n (--nobackup) No backup. Does not make a ".bak" file before reindenting.
10-v (--verbose)  Verbose.   Print informative msgs; else no output.
11   (--newline)  Newline.   Specify the newline character to use (CRLF, LF).
12                           Default is the same as the original file.
13-h (--help)     Help.      Print this usage information and exit.
14
15Change Python (.py) files to use 4-space indents and no hard tab characters.
16Also trim excess spaces and tabs from ends of lines, and remove empty lines
17at the end of files.  Also ensure the last line ends with a newline.
18
19If no paths are given on the command line, reindent operates as a filter,
20reading a single source file from standard input and writing the transformed
21source to standard output.  In this case, the -d, -r and -v flags are
22ignored.
23
24You can pass one or more file and/or directory paths.  When a directory
25path, all .py files within the directory will be examined, and, if the -r
26option is given, likewise recursively for subdirectories.
27
28If output is not to standard output, reindent overwrites files in place,
29renaming the originals with a .bak extension.  If it finds nothing to
30change, the file is left alone.  If reindent does change a file, the changed
31file is a fixed-point for future runs (i.e., running reindent on the
32resulting .py file won't change it again).
33
34The hard part of reindenting is figuring out what to do with comment
35lines.  So long as the input files get a clean bill of health from
36tabnanny.py, reindent should do a good job.
37
38The backup file is a copy of the one that is being reindented. The ".bak"
39file is generated with shutil.copy(), but some corner cases regarding
40user/group and permissions could leave the backup file more readable than
41you'd prefer. You can always use the --nobackup option to prevent this.
42"""
43
44__version__ = "1"
45
46import tokenize
47import os
48import shutil
49import sys
50
51verbose = False
52recurse = False
53dryrun = False
54makebackup = True
55# A specified newline to be used in the output (set by --newline option)
56spec_newline = None
57
58
59def usage(msg=None):
60    if msg is None:
61        msg = __doc__
62    print(msg, file=sys.stderr)
63
64
65def errprint(*args):
66    sys.stderr.write(" ".join(str(arg) for arg in args))
67    sys.stderr.write("\n")
68
69def main():
70    import getopt
71    global verbose, recurse, dryrun, makebackup, spec_newline
72    try:
73        opts, args = getopt.getopt(sys.argv[1:], "drnvh",
74            ["dryrun", "recurse", "nobackup", "verbose", "newline=", "help"])
75    except getopt.error as msg:
76        usage(msg)
77        return
78    for o, a in opts:
79        if o in ('-d', '--dryrun'):
80            dryrun = True
81        elif o in ('-r', '--recurse'):
82            recurse = True
83        elif o in ('-n', '--nobackup'):
84            makebackup = False
85        elif o in ('-v', '--verbose'):
86            verbose = True
87        elif o in ('--newline',):
88            if not a.upper() in ('CRLF', 'LF'):
89                usage()
90                return
91            spec_newline = dict(CRLF='\r\n', LF='\n')[a.upper()]
92        elif o in ('-h', '--help'):
93            usage()
94            return
95    if not args:
96        r = Reindenter(sys.stdin)
97        r.run()
98        r.write(sys.stdout)
99        return
100    for arg in args:
101        check(arg)
102
103
104def check(file):
105    if os.path.isdir(file) and not os.path.islink(file):
106        if verbose:
107            print("listing directory", file)
108        names = os.listdir(file)
109        for name in names:
110            fullname = os.path.join(file, name)
111            if ((recurse and os.path.isdir(fullname) and
112                 not os.path.islink(fullname) and
113                 not os.path.split(fullname)[1].startswith("."))
114                or name.lower().endswith(".py")):
115                check(fullname)
116        return
117
118    if verbose:
119        print("checking", file, "...", end=' ')
120    with open(file, 'rb') as f:
121        try:
122            encoding, _ = tokenize.detect_encoding(f.readline)
123        except SyntaxError as se:
124            errprint("%s: SyntaxError: %s" % (file, str(se)))
125            return
126    try:
127        with open(file, encoding=encoding) as f:
128            r = Reindenter(f)
129    except IOError as msg:
130        errprint("%s: I/O Error: %s" % (file, str(msg)))
131        return
132
133    newline = spec_newline if spec_newline else r.newlines
134    if isinstance(newline, tuple):
135        errprint("%s: mixed newlines detected; cannot continue without --newline" % file)
136        return
137
138    if r.run():
139        if verbose:
140            print("changed.")
141            if dryrun:
142                print("But this is a dry run, so leaving it alone.")
143        if not dryrun:
144            bak = file + ".bak"
145            if makebackup:
146                shutil.copyfile(file, bak)
147                if verbose:
148                    print("backed up", file, "to", bak)
149            with open(file, "w", encoding=encoding, newline=newline) as f:
150                r.write(f)
151            if verbose:
152                print("wrote new", file)
153        return True
154    else:
155        if verbose:
156            print("unchanged.")
157        return False
158
159
160def _rstrip(line, JUNK='\n \t'):
161    """Return line stripped of trailing spaces, tabs, newlines.
162
163    Note that line.rstrip() instead also strips sundry control characters,
164    but at least one known Emacs user expects to keep junk like that, not
165    mentioning Barry by name or anything <wink>.
166    """
167
168    i = len(line)
169    while i > 0 and line[i - 1] in JUNK:
170        i -= 1
171    return line[:i]
172
173
174class Reindenter:
175
176    def __init__(self, f):
177        self.find_stmt = 1  # next token begins a fresh stmt?
178        self.level = 0      # current indent level
179
180        # Raw file lines.
181        self.raw = f.readlines()
182
183        # File lines, rstripped & tab-expanded.  Dummy at start is so
184        # that we can use tokenize's 1-based line numbering easily.
185        # Note that a line is all-blank iff it's "\n".
186        self.lines = [_rstrip(line).expandtabs() + "\n"
187                      for line in self.raw]
188        self.lines.insert(0, None)
189        self.index = 1  # index into self.lines of next line
190
191        # List of (lineno, indentlevel) pairs, one for each stmt and
192        # comment line.  indentlevel is -1 for comment lines, as a
193        # signal that tokenize doesn't know what to do about them;
194        # indeed, they're our headache!
195        self.stats = []
196
197        # Save the newlines found in the file so they can be used to
198        #  create output without mutating the newlines.
199        self.newlines = f.newlines
200
201    def run(self):
202        tokens = tokenize.generate_tokens(self.getline)
203        for _token in tokens:
204            self.tokeneater(*_token)
205        # Remove trailing empty lines.
206        lines = self.lines
207        while lines and lines[-1] == "\n":
208            lines.pop()
209        # Sentinel.
210        stats = self.stats
211        stats.append((len(lines), 0))
212        # Map count of leading spaces to # we want.
213        have2want = {}
214        # Program after transformation.
215        after = self.after = []
216        # Copy over initial empty lines -- there's nothing to do until
217        # we see a line with *something* on it.
218        i = stats[0][0]
219        after.extend(lines[1:i])
220        for i in range(len(stats) - 1):
221            thisstmt, thislevel = stats[i]
222            nextstmt = stats[i + 1][0]
223            have = getlspace(lines[thisstmt])
224            want = thislevel * 4
225            if want < 0:
226                # A comment line.
227                if have:
228                    # An indented comment line.  If we saw the same
229                    # indentation before, reuse what it most recently
230                    # mapped to.
231                    want = have2want.get(have, -1)
232                    if want < 0:
233                        # Then it probably belongs to the next real stmt.
234                        for j in range(i + 1, len(stats) - 1):
235                            jline, jlevel = stats[j]
236                            if jlevel >= 0:
237                                if have == getlspace(lines[jline]):
238                                    want = jlevel * 4
239                                break
240                    if want < 0:           # Maybe it's a hanging
241                                           # comment like this one,
242                        # in which case we should shift it like its base
243                        # line got shifted.
244                        for j in range(i - 1, -1, -1):
245                            jline, jlevel = stats[j]
246                            if jlevel >= 0:
247                                want = have + (getlspace(after[jline - 1]) -
248                                               getlspace(lines[jline]))
249                                break
250                    if want < 0:
251                        # Still no luck -- leave it alone.
252                        want = have
253                else:
254                    want = 0
255            assert want >= 0
256            have2want[have] = want
257            diff = want - have
258            if diff == 0 or have == 0:
259                after.extend(lines[thisstmt:nextstmt])
260            else:
261                for line in lines[thisstmt:nextstmt]:
262                    if diff > 0:
263                        if line == "\n":
264                            after.append(line)
265                        else:
266                            after.append(" " * diff + line)
267                    else:
268                        remove = min(getlspace(line), -diff)
269                        after.append(line[remove:])
270        return self.raw != self.after
271
272    def write(self, f):
273        f.writelines(self.after)
274
275    # Line-getter for tokenize.
276    def getline(self):
277        if self.index >= len(self.lines):
278            line = ""
279        else:
280            line = self.lines[self.index]
281            self.index += 1
282        return line
283
284    # Line-eater for tokenize.
285    def tokeneater(self, type, token, slinecol, end, line,
286                   INDENT=tokenize.INDENT,
287                   DEDENT=tokenize.DEDENT,
288                   NEWLINE=tokenize.NEWLINE,
289                   COMMENT=tokenize.COMMENT,
290                   NL=tokenize.NL):
291
292        if type == NEWLINE:
293            # A program statement, or ENDMARKER, will eventually follow,
294            # after some (possibly empty) run of tokens of the form
295            #     (NL | COMMENT)* (INDENT | DEDENT+)?
296            self.find_stmt = 1
297
298        elif type == INDENT:
299            self.find_stmt = 1
300            self.level += 1
301
302        elif type == DEDENT:
303            self.find_stmt = 1
304            self.level -= 1
305
306        elif type == COMMENT:
307            if self.find_stmt:
308                self.stats.append((slinecol[0], -1))
309                # but we're still looking for a new stmt, so leave
310                # find_stmt alone
311
312        elif type == NL:
313            pass
314
315        elif self.find_stmt:
316            # This is the first "real token" following a NEWLINE, so it
317            # must be the first token of the next program statement, or an
318            # ENDMARKER.
319            self.find_stmt = 0
320            if line:   # not endmarker
321                self.stats.append((slinecol[0], self.level))
322
323
324# Count number of leading blanks.
325def getlspace(line):
326    i, n = 0, len(line)
327    while i < n and line[i] == " ":
328        i += 1
329    return i
330
331
332if __name__ == '__main__':
333    main()
334