1#!/usr/local/bin/python3.8 2# encoding: utf-8 3# 4# wmlscope -- generate reports on WML macro and resource usage 5# 6# By Eric S. Raymond, April 2007. 7# 8# This tool cross-references macro definitions with macro calls, and 9# resource (sound or image) files with uses of the resources in WML. 10# and generates various useful reports from such cross-references. 11# It also checks actual macro arguments against types implied by the formals 12# 13# (Most of the work is done by a cross-referencer class that is also 14# used in other tools.) 15# 16# It takes a list of directories as arguments; if none is given, it 17# behaves as though the current directory had been specified as a 18# single argument. Each directory is treated as a separate domain for 19# macro and resource visibility purposes. 20# 21# There are two kinds of namespace, exporting and non-exporting. 22# Exporting namespaces make all their resources and macro names 23# globally visible. You can make a namespace exporting by embedding 24# a comment like this in it: 25# 26# # wmlscope: export=yes 27# 28# Wesnoth core data is an exporting namespace. Campaigns are non-exporting; 29# they should contain the declaration 30# 31# # wmlscope: export=no 32# 33# somewhere. wmlscope will complain when it sees a namespace with no export 34# property, then treat it as non-exporting. 35# 36# You can tell wmlscope to ignore stretches of config files 37# with the following magic comments: 38# 39# # wmlscope: start ignoring 40# # wmlscope: stop ignoring 41# 42# Similarly, you can tell wmlscope to ignore multiple or duplicate macro 43# definitions in a range of lines with the following magic comments: 44# 45# # wmlscope: start conditionals 46# # wmlscope: stop conditionals 47# 48# The following magic comment: 49# 50# # prune FOOBAR 51# 52# will cause wmlscope to forget about all but one of the definitions of FOOBAR 53# it has seen. This will be useful mainly for symbols that have different 54# definitions enabled by an #ifdef. 55# 56# 57# This tool does catch one kind of implicit reference: if an attack name 58# is specified but no icon is given, the attack icon will default to 59# a name generated from the attack name. This behavior can be suppressed 60# by adding a magic comment containing the string "no-icon" to the name= line. 61# 62# The checking done by this tool has a couple of flaws: 63# 64# (1) It doesn't actually evaluate file inclusions. Instead, any 65# macro definition satisfies any macro call made under the same 66# directory. Exception: when an #undef is detected, the macro is 67# tagged local and not visible outside the span of lines where it's 68# defined. 69# 70# (2) It doesn't read [binary_path] tags, as this would require 71# implementing a WML parser. Instead, it assumes that a resource-file 72# reference can be satisfied by any matching image file from anywhere 73# in the same directory it came from. The resources under the *first* 74# directory argument (only) are visible everywhere. 75# 76# (3) A reference with embedded {}s in a macro will have the macro's 77# formal args substituted in at WML evaluation time. Instead, this 78# tool treats each {} as a .* wildcard and considers the reference to 79# match *every* resource filename that matches that pattern. Under 80# appropriate circumstances this might report a resource filename 81# statically matching the pattern as having been referenced even 82# though none of the actual macro calls would actually generate it. 83# 84# Problems (1) and (2) imply that this tool might conceivably report 85# that a reference has been satisfied when under actual 86# WML-interpreter rules it has not. 87# 88# The reporting format is compatible with GNU Emacs compile mode. 89# 90# For debugging purposes, an in-line comment of the form 91# 92# # wmlscope: warnlevel NNN 93# 94# sets the warning level. 95 96import sys, os, time, re, argparse, hashlib, glob, codecs 97from wesnoth.wmltools3 import * 98 99def interpret(lines, css): 100 "Interpret the ! convention for .cfg comments." 101 inlisting = False 102 outstr = '<p class="%s">' % css 103 for line in lines: 104 line = line.strip() 105 if inlisting: 106 if line and line[0] != '!': 107 outstr += "</pre>\n<p>" 108 inlisting = False 109 else: 110 if not line: 111 outstr += "</p><p>" 112 continue 113 if line[0] == '!': 114 outstr += "</p>\n<pre class='listing'>" 115 inlisting = True 116 bracketdepth = curlydepth = 0 117 line = line.replace("<", "<").replace(">", ">").replace("&", "&") 118 if inlisting: 119 outstr += line[1:] + "\n" 120 else: 121 outstr += line + "\n" 122 if not inlisting: 123 outstr += "</p>\n" 124 else: 125 outstr += "</pre>\n" 126 outstr = outstr.replace("<p></p>", "") 127 outstr = outstr.replace("\n\n</pre>", "\n</pre>") 128 return outstr 129 130class CrossRefLister(CrossRef): 131 "Cross-reference generator with reporting functions" 132 def xrefdump(self, pred=None): 133 "Report resolved macro references." 134 for name in sorted(self.xref.keys()): 135 for defn in self.xref[name]: 136 if pred and not pred(name, defn): 137 continue 138 if defn.undef: 139 type_ = "local" 140 else: 141 type_ = "global" 142 nrefs = len(defn.references) 143 if nrefs == 0: 144 print("%s: %s macro %s is unused" % (defn, type_, name)) 145 else: 146 print("%s: %s macro %s is used in %d files:" % (defn, type_, name, nrefs)) 147 defn.dump_references() 148 for name in sorted(self.fileref.keys()): 149 defloc = self.fileref[name] 150 if pred and not pred(name, defloc): 151 continue 152 nrefs = len(defloc.references) 153 if nrefs == 0: 154 print("Resource %s is unused" % defloc) 155 else: 156 print("Resource %s is used in %d files:" % (defloc, nrefs)) 157 defloc.dump_references() 158 159 def unresdump(self): 160 "Report unresolved references, arity mismatches, duplicate unit IDs." 161 # First the unresolved references 162 if len(self.unresolved) == 0 and len(self.missing) == 0: 163 print("# No unresolved references") 164 else: 165 #print(list(self.fileref.keys())) 166 for (name, reference) in self.unresolved + self.missing: 167 print("%s: Unresolved reference -> %s" % (reference, name)) 168 mismatched = [] 169 for name in sorted(self.xref.keys()): 170 for defn in self.xref[name]: 171 m = defn.mismatches() 172 if m.references: 173 mismatched.append((name, m)) 174 # Then the type mismatches 175 if mismatched: 176 print("# Mismatched references:") 177 for (n, m) in mismatched: 178 print("%s: macro %s(%s) has mismatches:" % (m, n, ", ".join(["{}={}".format(x, formaltype(x)) for x in m.args]))) 179 for (file, refs) in m.references.items(): 180 for (ln, args) in refs: 181 print('"%s", line %d: %s(%s) with signature (%s)' % (file, ln, n, ", ".join(args), ", ".join(["{}={}".format(f, actualtype(a)) for f,a in zip(m.args, args)]))) 182 183 def incorrectlysized(self): 184 "Report incorrectly sized images that cannot be safely used for their intended purpose" 185 for (namespace, filename) in xref.filelist.generator(): 186 if filename.endswith(".png"): 187 fn_list = filename.split(os.sep) 188 try: 189 with open(filename, mode="rb") as image: 190 png_header = image.read(16) 191 w = image.read(4) 192 h = image.read(4) 193 # some explanations for those that don't want to read the PNG documentation 194 # all valid PNG files always start with the same 16 bytes 195 # the first 8 are the 'magic number', which is 89 50 4E 47 0D 0A 1A 0A 196 # notice that '50 4E 47' is 'PNG' in ASCII 197 # the next 4 are the chunk size, then the next 4 are the chunk type 198 # the IHDR chunk is always the first one in any PNG file 199 # and has always a length of 13 bytes (0D == 13) 200 if png_header != b"\x89PNG\r\n\x1a\n\x00\x00\x00\x0dIHDR": 201 print("%s is not a valid PNG file" % filename, file=sys.stderr) 202 continue 203 # after the common part to all PNG files, 204 # the next 4 bytes are the image width, and the next 4 are the image height 205 # said bytes are placed in big-endian order (most significant bytes come first) 206 # we need to use some bitwise operations to convert them as a single integer 207 # also we don't need the remaining 5 bytes of the IHDR chunk 208 # Py3 reads the file as bytes, and each byte is already an int 209 # this is why, unlike Python 2, ord() isn't needed 210 x = w[0] << 24 | w[1] << 16 | w[2] << 8 | w[3] 211 y = h[0] << 24 | h[1] << 16 | h[2] << 8 | h[3] 212 # these checks rely on add-ons that place files following mainline conventions 213 # I'm aware that this may not always be the case 214 # but the alternative will be implementing a more sophisticated check in wmllint 215 if "images" in fn_list: 216 expected_size = None 217 if "attacks" in fn_list or "icons" in fn_list: 218 # images used in attack dialogs should be 60x60 219 if x != 60 or y != 60: 220 expected_size = (60,60) 221 elif "flags" in fn_list: 222 # flags should be 72x72, but their icons should be 24 x 16 223 if "icon" in os.path.split(filename)[1]: 224 if x != 24 or y != 16: 225 expected_size = (24,16) 226 else: 227 if x != 72 or y != 72: 228 expected_size = (72,72) 229 elif "items" in fn_list: 230 # items should be 72x72 231 if x != 72 or y != 72: 232 expected_size = (72,72) 233 if expected_size: 234 print("%s: image is %d x %d, expected %d x %d" % (filename, x, y, expected_size[0], expected_size[1])) 235 except IOError: 236 print("%s: unable to read file" % filename, file=sys.stderr) 237 238 def duplicates(self, exportonly): 239 "Dump duplicate unit IDs." 240 duplicate_latch = False 241 for (key, value) in self.unit_ids.items(): 242 if len(value) > 1: 243 if exportonly and not [x for x in value if self.exports(x.namespace)]: 244 continue 245 if not duplicate_latch: 246 print("# Duplicate IDs") 247 duplicate_latch = True 248 print("%s: occurs %d times as unit ID" % (key, len(value))) 249 for ref in value: 250 print("%s: exported=%s" % (ref, self.exports(ref.namespace))) 251 252 def typelist(self, branch): 253 "Dump actual and formal arguments for macros in specified file" 254 already_seen = [] 255 for name in sorted(self.xref.keys()): 256 for defn in self.xref[name]: 257 for (filename, refs) in defn.references.items(): 258 if filename.endswith(branch): 259 if name not in already_seen: 260 already_seen.append(name) 261 print("%s: macro %s(%s):" % (defn, name, ", ".join(["{}={}".format(x, formaltype(x)) for x in defn.args]))) 262 for (ln, args) in refs: 263 print('"%s", line %d: %s(%s) with signature (%s)' % (filename, ln, name, ", ".join(args), ", ".join(["{}={}".format(f, actualtype(a)) for f,a in zip(defn.args, args)]))) 264 def deflist(self, pred=None): 265 "List all resource definitions." 266 for name in sorted(self.xref.keys()): 267 for defn in self.xref[name]: 268 if not pred or pred(name, defn): 269 print("macro", name, " ".join(["{}={}".format(x, formaltype(x)) for x in defn.args])) 270 for name in sorted(self.fileref.keys()): 271 defloc = self.fileref[name] 272 if not pred or pred(name, defloc): 273 print("resource", name) 274 for uid in sorted(self.unit_ids.keys()): 275 print("unit", uid) 276 277 def unchecked(self, fp): 278 "List all macro definitions with untyped formals." 279 unchecked = [] 280 defcount = 0 281 callcount = 0 282 unresolvedcount = 0 283 for name in self.xref.keys(): 284 for defn in self.xref[name]: 285 defcount += 1 286 callcount += len(defn.references) 287 if None in map(formaltype, defn.args): 288 for (i, d) in enumerate(defn.args): 289 if formaltype(d) is None: 290 defn.args[i] += "?" 291 unchecked.append((name, defn)) 292 unresolvedcount += len(defn.references) 293 if unchecked: 294 print("# %d of %d (%.02f%%) macro definitions and %d of %d calls (%.02f%%) have untyped formals:" \ 295 % (len(unchecked), 296 defcount, 297 ((100 * len(unchecked)) / defcount), 298 unresolvedcount, 299 callcount, 300 ((100 * unresolvedcount) / callcount))) 301 # sort by checking the 2nd element in the tuple 302 unchecked.sort(key=lambda element: element[1]) 303 for (name, defn) in unchecked: 304 print("%s: %s(%s)" % (defn, name, ", ".join(defn.args))) 305 306 def extracthelp(self, pref, fp): 307 "Deliver all macro help comments in HTML form." 308 # Bug: finds only the first definition of each macro in scope. 309 doclist = [x for x in self.xref.keys() if self.xref[x][0].docstring.count("\n") > 1] 310 doclist.sort(key=lambda element: self.xref[element][0]) 311 outstr = "" 312 filename = None 313 filenamelist = [] 314 counted = 0 315 for name in doclist: 316 entry = self.xref[name][0] 317 if entry.filename != filename: 318 if counted: 319 outstr += "</dl>\n" 320 counted += 1 321 filename = entry.filename 322 if filename.startswith(pref): 323 displayname = filename[len(pref):] 324 else: 325 displayname = filename 326 outstr += "<p class='toplink'>[ <a href='#content'>top</a> ]</p>\n" 327 outstr += "<h2 id='file:" + displayname + "' class='file_header'>From file: " 328 outstr += "<code class='noframe'>" + displayname + "</code></h2>\n" 329 filenamelist.append(displayname) 330 hdr = [] 331 with codecs.open(filename, "r", "utf8") as dfp: 332 for line in dfp: 333 line = line.lstrip() 334 if line and line.startswith("#textdomain"): 335 continue 336 if line and line[0] == '#': 337 hdr.append(line[1:]) 338 else: 339 break 340 if hdr: 341 outstr += interpret(hdr, "file_explanation") 342 outstr += "<dl>\n" 343 if entry.docstring: 344 lines = entry.docstring.split("\n") 345 header = lines.pop(0).split() 346 if lines and not lines[-1]: # Ignore trailing blank lines 347 lines.pop() 348 if not lines: # Ignore definitions without a docstring 349 continue 350 outstr += "\n<dt id='" + header[0] + "'>\n<code class='noframe'>" 351 outstr += "<span class='macro-name'>" + header[0] + "</span>" 352 if header[1:]: 353 outstr += " <var class='macro-formals'>"+" ".join(header[1:])+"</var>" 354 outstr += "\n</code></dt>\n" 355 outstr += "<dd>\n" 356 outstr += interpret(lines, "macro-explanation") 357 outstr += "</dd>\n" 358 outstr += "</dl>\n" 359 outstr += "<p class='toplink'>[ <a href='#content'>top</a> ]</p>\n" 360 linkheaderstr = "<p class='macro-ref-toc'>Documented files:</p><div class='filelist'><ul>" 361 for filename in filenamelist: 362 linkheaderstr += "<li><a href='#file:" + filename + "'>" 363 linkheaderstr += "<code class='noframe'>" + filename + "</code></a></li>" 364 linkheaderstr += "</ul></div>\n" 365 fp.write(linkheaderstr) 366 fp.write(outstr) 367 368if __name__ == "__main__": 369 parser = argparse.ArgumentParser() 370 parser.add_argument("-c", "--crossreference", action="store_true", 371 help="Report resolved macro references (implies -w 1)") 372 parser.add_argument("-C", "--collisions", action="store_true", 373 help="Report duplicate resource files") 374 parser.add_argument("-d", "--definitions", action="store_true", 375 help="Make definition list") 376 parser.add_argument("-e", "--exclude", action="append", default = [], 377 help="Ignore files matching the specified regular expression") 378 parser.add_argument("-f", "--from", action="store", dest="from_", metavar="FROM", # from is a keyword 379 help="Report only on things defined in files matching regexp") 380 parser.add_argument("-l", "--listfiles", action="store_true", 381 help="List files that will be processed") 382 parser.add_argument("-r", "--refcount", action="store", type=int, # convert to int, defaults to None 383 help="Report only on macros w/references in ddd files") 384 parser.add_argument("-t", "--typelist", action="store", 385 help="List actual & formal argtypes for calls in fname") 386 parser.add_argument("-u", "--unresolved", action="store_true", 387 help="Report unresolved macro references") 388 parser.add_argument("-w", "--warnlevel", action="store", type=int, default=0, 389 help="Set to 1 to warn of duplicate macro definitions") 390 # this option was never listed before... 391 parser.add_argument("-p", "--progress", action="store_true", 392 help="Show progress") # TODO: improve description 393 # no short options for these 394 parser.add_argument("--force-used", action="append", dest="forceused", default = [], 395 help="Ignore refcount 0 on names matching regexp") 396 parser.add_argument("--extracthelp", action="store_true", 397 help="Extract help from macro definition comments.") 398 parser.add_argument("--unchecked", action="store_true", 399 help="Report all macros with untyped formals.") 400 parser.add_argument("directories", action="store", nargs="*", 401 help="""Any number of directiories to check. If no 402directories are given, all files under the current directory are checked.""") 403 namespace = parser.parse_args() 404 405 try: 406 # Process options 407 crossreference = namespace.crossreference 408 collisions = namespace.collisions 409 definitions = namespace.definitions 410 exclude = namespace.exclude 411 from_restrict = namespace.from_ 412 extracthelp = namespace.extracthelp 413 listfiles = namespace.listfiles 414 refcount_restrict = namespace.refcount 415 typelist = namespace.typelist 416 unresolved = namespace.unresolved 417 warnlevel = 1 if crossreference else namespace.warnlevel 418 forceused = namespace.forceused 419 unchecked = namespace.unchecked 420 progress = namespace.progress 421 arguments = namespace.directories # a remnant of getopt... 422 423 # in certain situations, Windows' command prompt appends a double quote 424 # to the command line parameters. This block takes care of this issue. 425 for i,arg in enumerate(arguments): 426 if arg.endswith('"'): 427 arguments[i] = arg[:-1] 428 429 forceused = "|".join(forceused) 430 if len(arguments): 431 dirpath = [] 432 for arg in arguments: 433 globarg = glob.glob(arg) 434 for globbed in globarg: 435 dirpath.append(globbed) 436 else: 437 dirpath = ['.'] 438 if not extracthelp: 439 print("# Wmlscope reporting on %s" % time.ctime()) 440 print("# Invocation: %s" % " ".join(sys.argv)) 441 print("# Working directory: %s" % os.getcwd()) 442 starttime = time.time() 443 xref = CrossRefLister(dirpath, "|".join(exclude), warnlevel, progress) 444 if not extracthelp: 445 print("#Cross-reference time: %d seconds" % (time.time()-starttime)) 446 if extracthelp: 447 xref.extracthelp(dirpath[0], sys.stdout) 448 elif unchecked: 449 xref.unchecked(sys.stdout) 450 elif listfiles: 451 for (namespace, filename) in xref.filelist.generator(): 452 print(filename) 453 if collisions: 454 collisions = [] 455 for (namespace, filename) in xref.filelist.generator(): 456 with open(filename, "rb") as ifp: # this one may be an image or a sound, so don't assume UTF8 encoding 457 m = hashlib.md5() 458 while True: 459 chunk = ifp.read(1024) # read 1 KiB each time to avoid using too much memory 460 if not chunk: 461 break 462 m.update(chunk) 463 collisions.append(m.hexdigest()) # hexdigest can be easily printed, unlike digest 464 hashes = {} 465 # hash in Py3 is a builtin function, hence the underscore after the variable name 466 for (filename, hash_) in zip(xref.filelist.flatten(), collisions): 467 if hash_ in hashes: 468 hashes[hash_].append(filename) 469 else: 470 hashes[hash_]=[filename] 471 for (hash_, files) in hashes.items(): # items in Py3 is equivalent to iteritems in Py2 472 if len(files) > 1: 473 print("%%\nPossible duplicated files with MD5 hash", hash_) 474 for fn in files: 475 print("->", fn) 476 xref.duplicates(exportonly=False) 477 elif typelist: 478 xref.typelist(typelist) 479 elif crossreference or definitions or listfiles or unresolved: 480 def predicate(name, defloc): 481 if from_restrict and not re.search(from_restrict, defloc.filename): 482 return False 483 if refcount_restrict!=None \ 484 and len(defloc.references) != refcount_restrict \ 485 or (refcount_restrict == 0 and forceused and re.search(forceused, name)): 486 return False 487 return True 488 if crossreference: 489 if xref.noxref: 490 print("wmlscope: can't make cross-reference, input included a definitions file.", file=sys.stderr) 491 else: 492 xref.xrefdump(predicate) 493 if definitions: 494 xref.deflist(predicate) 495 if unresolved: 496 xref.incorrectlysized() 497 xref.unresdump() 498 xref.duplicates(exportonly=True) 499 except KeyboardInterrupt: 500 print("wmlscope: aborted.", file=sys.stderr) 501 502# wmlscope ends here 503