1#!/usr/bin/env python 2 3"""This script searches files for functions that are just aliases in 4PHP source code. This is not 100% reliable, so it should not be 5automated, but it's useful to run once in a while to make sure that 6all of the matches it finds are not really legitimate aliases. 7 8Usage: 9 10 parse_aliases.py <name of alias file> [PHP source code filename]... 11""" 12 13import sys 14 15# Fetch this URL to get the file that is parsed into the aliases list 16alias_url = 'http://www.zend.com/phpfunc/all_aliases.php' 17 18header_tok = '<!-- END OF HEADER -->'; 19footer_tok = '<!-- FOOTER -->'; 20 21# Example line of the table that we parse: 22# '<tr bgcolor="#EFEFFF"><td><a href="function.bzclose.php">bzclose</a></td><td><a href="http://lxr.php.net/source/php-src/ext/bz2/bz2.c#48">php-src/ext/bz2/bz2.c</a></td><td><a href="function.fclose.php">fclose</a></td></tr>' 23 24import re 25 26line_re = re.compile(r''' 27\A 28 29<tr\ bgcolor="[^">]+"> 30 31<td><a\ href="[^>"]+\.php">([^<>]+)</a></td> 32 33<td><a\ href="[^">]+">[^<>]+</a></td> 34 35<td> 36(?: 37 <a\ href="[^">]+\.php"> 38 ( [^<>]+ ) 39 </a> 40| ( [^<>]+ ) 41) 42</td> 43 44</tr> 45 46\Z 47''', re.VERBOSE) 48 49def parseString(s): 50 _, rest = s.split(header_tok, 1) 51 body, _ = rest.split(footer_tok, 1) 52 53 lines = body.split('\n') 54 assert [s.strip() for s in lines[-2:]] == ['</table>', ''] 55 assert lines[0].strip().startswith('<table') 56 del lines[0], lines[-2:] 57 aliases = {} 58 for line in lines: 59 mo = line_re.match(line) 60 assert mo, line 61 alias, master1, master2 = mo.groups() 62 if master1: 63 master = master1 64 else: 65 assert master2 66 master = master2 67 aliases[alias] = master 68 69 return aliases 70 71def parseFile(f): 72 return parseString(f.read()) 73 74def parseFileName(fn): 75 return parseFile(file(fn, 'r')) 76 77def parseURL(url): 78 return parseFile(urllib2.urlopen(url)) 79 80def getAliasRE(aliases): 81 return re.compile(r'(->|\$|)\s*\b(%s)\b' % ('|'.join(aliases.keys()))) 82 83def checkAliasesFile(alias_re, f): 84 found = [] 85 line_num = 1 86 for line in f: 87 for mo in alias_re.finditer(line): 88 if mo.group(1): 89 continue 90 alias = mo.group(2) 91 found.append((line_num, alias)) 92 line_num += 1 93 return found 94 95def checkAliases(alias_re, filename): 96 return checkAliasesFile(alias_re, file(filename, 'r')) 97 98def checkAliasesFiles(alias_re, filenames): 99 found = [] 100 for filename in filenames: 101 file_found = checkAliases(alias_re, filename) 102 found.extend([(filename, n, a) for (n, a) in file_found]) 103 return found 104 105def dumpResults(aliases, found, out=sys.stdout): 106 for filename, n, a in found: 107 print >>out, "%s:%d %s -> %s" % (filename, n, a, aliases[a]) 108 109def main(alias_file, *filenames): 110 aliases = parseFileName(alias_file) 111 alias_re = getAliasRE(aliases) 112 found = checkAliasesFiles(alias_re, filenames) 113 dumpResults(aliases, found) 114 return found 115 116if __name__ == '__main__': 117 found = main(*sys.argv[1:]) 118 if found: 119 sys.exit(1) 120