1#!/usr/local/bin/python3.8
2# encoding: utf-8
3#
4# wmlscope -- generate reports on WML macro and resource usage
5#
6# By Eric S. Raymond, April 2007.
7#
8# This tool cross-references macro definitions with macro calls, and
9# resource (sound or image) files with uses of the resources in WML.
10# and generates various useful reports from such cross-references.
11# It also checks actual macro arguments against types implied by the formals
12#
13# (Most of the work is done by a cross-referencer class that is also
14# used in other tools.)
15#
16# It takes a list of directories as arguments; if none is given, it
17# behaves as though the current directory had been specified as a
18# single argument.  Each directory is treated as a separate domain for
19# macro and resource visibility purposes.
20#
21# There are two kinds of namespace, exporting and non-exporting.
22# Exporting namespaces make all their resources and macro names
23# globally visible.  You can make a namespace exporting by embedding
24# a comment like this in it:
25#
26#     # wmlscope: export=yes
27#
28# Wesnoth core data is an exporting namespace.  Campaigns are non-exporting;
29# they should contain the declaration
30#
31#     # wmlscope: export=no
32#
33# somewhere.  wmlscope will complain when it sees a namespace with no export
34# property, then treat it as non-exporting.
35#
36# You can tell wmlscope to ignore stretches of config files
37# with the following magic comments:
38#
39#     # wmlscope: start ignoring
40#     # wmlscope: stop ignoring
41#
42# Similarly, you can tell wmlscope to ignore multiple or duplicate macro
43# definitions in a range of lines with the following magic comments:
44#
45#     # wmlscope: start conditionals
46#     # wmlscope: stop conditionals
47#
48# The following magic comment:
49#
50#     # prune FOOBAR
51#
52# will cause wmlscope to forget about all but one of the definitions of FOOBAR
53# it has seen.  This will be useful mainly for symbols that have different
54# definitions enabled by an #ifdef.
55#
56#
57# This tool does catch one kind of implicit reference: if an attack name
58# is specified but no icon is given, the attack icon will default to
59# a name generated from the attack name.  This behavior can be suppressed
60# by adding a magic comment containing the string "no-icon" to the name= line.
61#
62# The checking done by this tool has a couple of flaws:
63#
64# (1) It doesn't actually evaluate file inclusions.  Instead, any
65# macro definition satisfies any macro call made under the same
66# directory.  Exception: when an #undef is detected, the macro is
67# tagged local and not visible outside the span of lines where it's
68# defined.
69#
70# (2) It doesn't read [binary_path] tags, as this would require
71# implementing a WML parser.  Instead, it assumes that a resource-file
72# reference can be satisfied by any matching image file from anywhere
73# in the same directory it came from.  The resources under the *first*
74# directory argument (only) are visible everywhere.
75#
76# (3) A reference with embedded {}s in a macro will have the macro's
77# formal args substituted in at WML evaluation time.  Instead, this
78# tool treats each {} as a .* wildcard and considers the reference to
79# match *every* resource filename that matches that pattern.  Under
80# appropriate circumstances this might report a resource filename
81# statically matching the pattern as having been referenced even
82# though none of the actual macro calls would actually generate it.
83#
84# Problems (1) and (2) imply that this tool might conceivably report
85# that a reference has been satisfied when under actual
86# WML-interpreter rules it has not.
87#
88# The reporting format is compatible with GNU Emacs compile mode.
89#
90# For debugging purposes, an in-line comment of the form
91#
92#     # wmlscope: warnlevel NNN
93#
94# sets the warning level.
95
96import sys, os, time, re, argparse, hashlib, glob, codecs
97from wesnoth.wmltools3 import *
98
99def interpret(lines, css):
100    "Interpret the ! convention for .cfg comments."
101    inlisting = False
102    outstr = '<p class="%s">' % css
103    for line in lines:
104        line = line.strip()
105        if inlisting:
106            if line and line[0] != '!':
107                outstr += "</pre>\n<p>"
108                inlisting = False
109        else:
110            if not line:
111                outstr += "</p><p>"
112                continue
113            if line[0] == '!':
114                outstr += "</p>\n<pre class='listing'>"
115                inlisting = True
116                bracketdepth = curlydepth = 0
117        line = line.replace("<", "&lt;").replace(">", "&gt;").replace("&", "&amp;")
118        if inlisting:
119            outstr += line[1:] + "\n"
120        else:
121            outstr += line + "\n"
122    if not inlisting:
123        outstr += "</p>\n"
124    else:
125        outstr += "</pre>\n"
126    outstr = outstr.replace("<p></p>", "")
127    outstr = outstr.replace("\n\n</pre>", "\n</pre>")
128    return outstr
129
130class CrossRefLister(CrossRef):
131    "Cross-reference generator with reporting functions"
132    def xrefdump(self, pred=None):
133        "Report resolved macro references."
134        for name in sorted(self.xref.keys()):
135            for defn in self.xref[name]:
136                if pred and not pred(name, defn):
137                    continue
138                if defn.undef:
139                    type_ = "local"
140                else:
141                    type_ = "global"
142                nrefs = len(defn.references)
143                if nrefs == 0:
144                    print("%s: %s macro %s is unused" % (defn, type_, name))
145                else:
146                    print("%s: %s macro %s is used in %d files:" % (defn, type_, name, nrefs))
147                defn.dump_references()
148        for name in sorted(self.fileref.keys()):
149            defloc = self.fileref[name]
150            if pred and not pred(name, defloc):
151                continue
152            nrefs = len(defloc.references)
153            if nrefs == 0:
154                print("Resource %s is unused" % defloc)
155            else:
156                print("Resource %s is used in %d files:" % (defloc, nrefs))
157            defloc.dump_references()
158
159    def unresdump(self):
160        "Report unresolved references, arity mismatches, duplicate unit IDs."
161        # First the unresolved references
162        if len(self.unresolved) == 0 and len(self.missing) == 0:
163            print("# No unresolved references")
164        else:
165            #print(list(self.fileref.keys()))
166            for (name, reference) in self.unresolved + self.missing:
167                print("%s: Unresolved reference -> %s" % (reference, name))
168        mismatched = []
169        for name in sorted(self.xref.keys()):
170            for defn in self.xref[name]:
171                m = defn.mismatches()
172                if m.references:
173                    mismatched.append((name, m))
174        # Then the type mismatches
175        if mismatched:
176            print("# Mismatched references:")
177            for (n, m) in mismatched:
178                print("%s: macro %s(%s) has mismatches:" % (m, n, ", ".join(["{}={}".format(x, formaltype(x)) for x in m.args])))
179                for (file, refs) in m.references.items():
180                    for (ln, args) in refs:
181                        print('"%s", line %d: %s(%s) with signature (%s)' % (file, ln, n, ", ".join(args), ", ".join(["{}={}".format(f, actualtype(a)) for f,a in zip(m.args, args)])))
182
183    def incorrectlysized(self):
184        "Report incorrectly sized images that cannot be safely used for their intended purpose"
185        for (namespace, filename) in xref.filelist.generator():
186            if filename.endswith(".png"):
187                fn_list = filename.split(os.sep)
188                try:
189                    with open(filename, mode="rb") as image:
190                        png_header = image.read(16)
191                        w = image.read(4)
192                        h = image.read(4)
193                    # some explanations for those that don't want to read the PNG documentation
194                    # all valid PNG files always start with the same 16 bytes
195                    # the first 8 are the 'magic number', which is 89 50 4E 47 0D 0A 1A 0A
196                    # notice that '50 4E 47' is 'PNG' in ASCII
197                    # the next 4 are the chunk size, then the next 4 are the chunk type
198                    # the IHDR chunk is always the first one in any PNG file
199                    # and has always a length of 13 bytes (0D == 13)
200                    if png_header != b"\x89PNG\r\n\x1a\n\x00\x00\x00\x0dIHDR":
201                        print("%s is not a valid PNG file" % filename, file=sys.stderr)
202                        continue
203                    # after the common part to all PNG files,
204                    # the next 4 bytes are the image width, and the next 4 are the image height
205                    # said bytes are placed in big-endian order (most significant bytes come first)
206                    # we need to use some bitwise operations to convert them as a single integer
207                    # also we don't need the remaining 5 bytes of the IHDR chunk
208                    # Py3 reads the file as bytes, and each byte is already an int
209                    # this is why, unlike Python 2, ord() isn't needed
210                    x = w[0] << 24 | w[1] << 16 | w[2] << 8 | w[3]
211                    y = h[0] << 24 | h[1] << 16 | h[2] << 8 | h[3]
212                    # these checks rely on add-ons that place files following mainline conventions
213                    # I'm aware that this may not always be the case
214                    # but the alternative will be implementing a more sophisticated check in wmllint
215                    if "images" in fn_list:
216                        expected_size = None
217                        if "attacks" in fn_list or "icons" in fn_list:
218                            # images used in attack dialogs should be 60x60
219                            if x != 60 or y != 60:
220                                expected_size = (60,60)
221                        elif "flags" in fn_list:
222                            # flags should be 72x72, but their icons should be 24 x 16
223                            if "icon" in os.path.split(filename)[1]:
224                                if x != 24 or y != 16:
225                                    expected_size = (24,16)
226                            else:
227                                if x != 72 or y != 72:
228                                    expected_size = (72,72)
229                        elif "items" in fn_list:
230                            # items should be 72x72
231                            if x != 72 or y != 72:
232                                expected_size = (72,72)
233                        if expected_size:
234                            print("%s: image is %d x %d, expected %d x %d" % (filename, x, y, expected_size[0], expected_size[1]))
235                except IOError:
236                    print("%s: unable to read file" % filename, file=sys.stderr)
237
238    def duplicates(self, exportonly):
239        "Dump duplicate unit IDs."
240        duplicate_latch = False
241        for (key, value) in self.unit_ids.items():
242            if len(value) > 1:
243                if exportonly and not [x for x in value if self.exports(x.namespace)]:
244                    continue
245                if not duplicate_latch:
246                    print("# Duplicate IDs")
247                    duplicate_latch = True
248                print("%s: occurs %d times as unit ID" % (key, len(value)))
249                for ref in value:
250                    print("%s: exported=%s" % (ref, self.exports(ref.namespace)))
251
252    def typelist(self, branch):
253        "Dump actual and formal arguments for macros in specified file"
254        already_seen = []
255        for name in sorted(self.xref.keys()):
256            for defn in self.xref[name]:
257                for (filename, refs) in defn.references.items():
258                    if filename.endswith(branch):
259                        if name not in already_seen:
260                            already_seen.append(name)
261                            print("%s: macro %s(%s):" % (defn, name, ", ".join(["{}={}".format(x, formaltype(x)) for x in defn.args])))
262                        for (ln, args) in refs:
263                            print('"%s", line %d: %s(%s) with signature (%s)' % (filename, ln, name, ", ".join(args), ", ".join(["{}={}".format(f, actualtype(a)) for f,a in zip(defn.args, args)])))
264    def deflist(self, pred=None):
265        "List all resource definitions."
266        for name in sorted(self.xref.keys()):
267            for defn in self.xref[name]:
268                if not pred or pred(name, defn):
269                    print("macro", name, " ".join(["{}={}".format(x, formaltype(x)) for x in defn.args]))
270        for name in sorted(self.fileref.keys()):
271            defloc = self.fileref[name]
272            if not pred or pred(name, defloc):
273                print("resource", name)
274        for uid in sorted(self.unit_ids.keys()):
275            print("unit", uid)
276
277    def unchecked(self, fp):
278        "List all macro definitions with untyped formals."
279        unchecked = []
280        defcount = 0
281        callcount = 0
282        unresolvedcount = 0
283        for name in self.xref.keys():
284            for defn in self.xref[name]:
285                defcount += 1
286                callcount += len(defn.references)
287                if None in map(formaltype, defn.args):
288                    for (i, d) in enumerate(defn.args):
289                        if formaltype(d) is None:
290                            defn.args[i] += "?"
291                    unchecked.append((name, defn))
292                    unresolvedcount += len(defn.references)
293        if unchecked:
294            print("# %d of %d (%.02f%%) macro definitions and %d of %d calls (%.02f%%) have untyped formals:" \
295                  % (len(unchecked),
296                     defcount,
297                     ((100 * len(unchecked)) / defcount),
298                     unresolvedcount,
299                     callcount,
300                     ((100 * unresolvedcount) / callcount)))
301            # sort by checking the 2nd element in the tuple
302            unchecked.sort(key=lambda element: element[1])
303            for (name, defn) in unchecked:
304                print("%s: %s(%s)" % (defn, name, ", ".join(defn.args)))
305
306    def extracthelp(self, pref, fp):
307        "Deliver all macro help comments in HTML form."
308        # Bug: finds only the first definition of each macro in scope.
309        doclist = [x for x in self.xref.keys() if self.xref[x][0].docstring.count("\n") > 1]
310        doclist.sort(key=lambda element: self.xref[element][0])
311        outstr = ""
312        filename = None
313        filenamelist = []
314        counted = 0
315        for name in doclist:
316            entry = self.xref[name][0]
317            if entry.filename != filename:
318                if counted:
319                    outstr += "</dl>\n"
320                counted += 1
321                filename = entry.filename
322                if filename.startswith(pref):
323                    displayname = filename[len(pref):]
324                else:
325                    displayname = filename
326                outstr += "<p class='toplink'>[ <a href='#content'>top</a> ]</p>\n"
327                outstr += "<h2 id='file:" + displayname + "' class='file_header'>From file: "
328                outstr += "<code class='noframe'>" + displayname + "</code></h2>\n"
329                filenamelist.append(displayname)
330                hdr = []
331                with codecs.open(filename, "r", "utf8") as dfp:
332                    for line in dfp:
333                        line = line.lstrip()
334                        if line and line.startswith("#textdomain"):
335                            continue
336                        if line and line[0] == '#':
337                            hdr.append(line[1:])
338                        else:
339                            break
340                if hdr:
341                    outstr += interpret(hdr, "file_explanation")
342                outstr += "<dl>\n"
343            if entry.docstring:
344                lines = entry.docstring.split("\n")
345                header = lines.pop(0).split()
346                if lines and not lines[-1]: # Ignore trailing blank lines
347                    lines.pop()
348                if not lines: # Ignore definitions without a docstring
349                    continue
350                outstr += "\n<dt id='" + header[0] + "'>\n<code class='noframe'>"
351                outstr += "<span class='macro-name'>" + header[0] + "</span>"
352                if header[1:]:
353                    outstr += " <var class='macro-formals'>"+" ".join(header[1:])+"</var>"
354                outstr += "\n</code></dt>\n"
355                outstr += "<dd>\n"
356                outstr += interpret(lines, "macro-explanation")
357                outstr += "</dd>\n"
358        outstr += "</dl>\n"
359        outstr += "<p class='toplink'>[ <a href='#content'>top</a> ]</p>\n"
360        linkheaderstr = "<p class='macro-ref-toc'>Documented files:</p><div class='filelist'><ul>"
361        for filename in filenamelist:
362            linkheaderstr += "<li><a href='#file:" + filename + "'>"
363            linkheaderstr += "<code class='noframe'>" + filename + "</code></a></li>"
364        linkheaderstr += "</ul></div>\n"
365        fp.write(linkheaderstr)
366        fp.write(outstr)
367
368if __name__ == "__main__":
369    parser = argparse.ArgumentParser()
370    parser.add_argument("-c", "--crossreference", action="store_true",
371                        help="Report resolved macro references (implies -w 1)")
372    parser.add_argument("-C", "--collisions", action="store_true",
373                        help="Report duplicate resource files")
374    parser.add_argument("-d", "--definitions", action="store_true",
375                        help="Make definition list")
376    parser.add_argument("-e", "--exclude", action="append", default = [],
377                        help="Ignore files matching the specified regular expression")
378    parser.add_argument("-f", "--from", action="store", dest="from_", metavar="FROM", # from is a keyword
379                        help="Report only on things defined in files matching regexp")
380    parser.add_argument("-l", "--listfiles", action="store_true",
381                        help="List files that will be processed")
382    parser.add_argument("-r", "--refcount", action="store", type=int, # convert to int, defaults to None
383                        help="Report only on macros w/references in ddd files")
384    parser.add_argument("-t", "--typelist", action="store",
385                        help="List actual & formal argtypes for calls in fname")
386    parser.add_argument("-u", "--unresolved", action="store_true",
387                        help="Report unresolved macro references")
388    parser.add_argument("-w", "--warnlevel", action="store", type=int, default=0,
389                        help="Set to 1 to warn of duplicate macro definitions")
390    # this option was never listed before...
391    parser.add_argument("-p", "--progress", action="store_true",
392                        help="Show progress") # TODO: improve description
393    # no short options for these
394    parser.add_argument("--force-used", action="append", dest="forceused", default = [],
395                        help="Ignore refcount 0 on names matching regexp")
396    parser.add_argument("--extracthelp", action="store_true",
397                        help="Extract help from macro definition comments.")
398    parser.add_argument("--unchecked", action="store_true",
399                        help="Report all macros with untyped formals.")
400    parser.add_argument("directories", action="store", nargs="*",
401                        help="""Any number of directiories to check. If no
402directories are given, all files under the current directory are checked.""")
403    namespace = parser.parse_args()
404
405    try:
406        # Process options
407        crossreference = namespace.crossreference
408        collisions = namespace.collisions
409        definitions = namespace.definitions
410        exclude = namespace.exclude
411        from_restrict = namespace.from_
412        extracthelp = namespace.extracthelp
413        listfiles = namespace.listfiles
414        refcount_restrict = namespace.refcount
415        typelist = namespace.typelist
416        unresolved = namespace.unresolved
417        warnlevel = 1 if crossreference else namespace.warnlevel
418        forceused = namespace.forceused
419        unchecked = namespace.unchecked
420        progress = namespace.progress
421        arguments = namespace.directories # a remnant of getopt...
422
423        # in certain situations, Windows' command prompt appends a double quote
424        # to the command line parameters. This block takes care of this issue.
425        for i,arg in enumerate(arguments):
426            if arg.endswith('"'):
427                arguments[i] = arg[:-1]
428
429        forceused = "|".join(forceused)
430        if len(arguments):
431            dirpath = []
432            for arg in arguments:
433                globarg = glob.glob(arg)
434                for globbed in globarg:
435                    dirpath.append(globbed)
436        else:
437            dirpath = ['.']
438        if not extracthelp:
439            print("# Wmlscope reporting on %s" % time.ctime())
440            print("# Invocation: %s" % " ".join(sys.argv))
441            print("# Working directory: %s" % os.getcwd())
442            starttime = time.time()
443        xref = CrossRefLister(dirpath, "|".join(exclude), warnlevel, progress)
444        if not extracthelp:
445            print("#Cross-reference time: %d seconds" % (time.time()-starttime))
446        if extracthelp:
447            xref.extracthelp(dirpath[0], sys.stdout)
448        elif unchecked:
449            xref.unchecked(sys.stdout)
450        elif listfiles:
451            for (namespace, filename) in xref.filelist.generator():
452                print(filename)
453        if collisions:
454            collisions = []
455            for (namespace, filename) in xref.filelist.generator():
456                with open(filename, "rb") as ifp: # this one may be an image or a sound, so don't assume UTF8 encoding
457                    m = hashlib.md5()
458                    while True:
459                        chunk = ifp.read(1024) # read 1 KiB each time to avoid using too much memory
460                        if not chunk:
461                            break
462                        m.update(chunk)
463                    collisions.append(m.hexdigest()) # hexdigest can be easily printed, unlike digest
464            hashes = {}
465            # hash in Py3 is a builtin function, hence the underscore after the variable name
466            for (filename, hash_) in zip(xref.filelist.flatten(), collisions):
467                if hash_ in hashes:
468                    hashes[hash_].append(filename)
469                else:
470                    hashes[hash_]=[filename]
471            for (hash_, files) in hashes.items(): # items in Py3 is equivalent to iteritems in Py2
472                if len(files) > 1:
473                    print("%%\nPossible duplicated files with MD5 hash", hash_)
474                    for fn in files:
475                        print("->", fn)
476            xref.duplicates(exportonly=False)
477        elif typelist:
478            xref.typelist(typelist)
479        elif crossreference or definitions or listfiles or unresolved:
480            def predicate(name, defloc):
481                if from_restrict and not re.search(from_restrict, defloc.filename):
482                    return False
483                if refcount_restrict!=None \
484                       and len(defloc.references) != refcount_restrict \
485                       or (refcount_restrict == 0 and forceused and re.search(forceused, name)):
486                    return False
487                return True
488            if crossreference:
489                if xref.noxref:
490                    print("wmlscope: can't make cross-reference, input included a definitions file.", file=sys.stderr)
491                else:
492                    xref.xrefdump(predicate)
493            if definitions:
494                xref.deflist(predicate)
495            if unresolved:
496                xref.incorrectlysized()
497                xref.unresdump()
498                xref.duplicates(exportonly=True)
499    except KeyboardInterrupt:
500        print("wmlscope: aborted.", file=sys.stderr)
501
502# wmlscope ends here
503