1#!/usr/bin/env python
2
3"""This script searches files for functions that are just aliases in
4PHP source code. This is not 100% reliable, so it should not be
5automated, but it's useful to run once in a while to make sure that
6all of the matches it finds are not really legitimate aliases.
7
8Usage:
9
10  parse_aliases.py <name of alias file> [PHP source code filename]...
11"""
12
13import sys
14
15# Fetch this URL to get the file that is parsed into the aliases list
16alias_url = 'http://www.zend.com/phpfunc/all_aliases.php'
17
18header_tok = '<!-- END OF HEADER -->';
19footer_tok = '<!-- FOOTER -->';
20
21# Example line of the table that we parse:
22# '<tr bgcolor="#EFEFFF"><td><a href="function.bzclose.php">bzclose</a></td><td><a href="http://lxr.php.net/source/php-src/ext/bz2/bz2.c#48">php-src/ext/bz2/bz2.c</a></td><td><a href="function.fclose.php">fclose</a></td></tr>'
23
24import re
25
26line_re = re.compile(r'''
27\A
28
29<tr\ bgcolor="[^">]+">
30
31<td><a\ href="[^>"]+\.php">([^<>]+)</a></td>
32
33<td><a\ href="[^">]+">[^<>]+</a></td>
34
35<td>
36(?:
37    <a\ href="[^">]+\.php">
38    ( [^<>]+ )
39    </a>
40|   ( [^<>]+ )
41)
42</td>
43
44</tr>
45
46\Z
47''', re.VERBOSE)
48
49def parseString(s):
50    _, rest = s.split(header_tok, 1)
51    body, _ = rest.split(footer_tok, 1)
52
53    lines = body.split('\n')
54    assert [s.strip() for s in lines[-2:]] == ['</table>', '']
55    assert lines[0].strip().startswith('<table')
56    del lines[0], lines[-2:]
57    aliases = {}
58    for line in lines:
59        mo = line_re.match(line)
60        assert mo, line
61        alias, master1, master2 = mo.groups()
62        if master1:
63            master = master1
64        else:
65            assert master2
66            master = master2
67        aliases[alias] = master
68
69    return aliases
70
71def parseFile(f):
72    return parseString(f.read())
73
74def parseFileName(fn):
75    return parseFile(file(fn, 'r'))
76
77def parseURL(url):
78    return parseFile(urllib2.urlopen(url))
79
80def getAliasRE(aliases):
81    return re.compile(r'(->|\$|)\s*\b(%s)\b' % ('|'.join(aliases.keys())))
82
83def checkAliasesFile(alias_re, f):
84    found = []
85    line_num = 1
86    for line in f:
87        for mo in alias_re.finditer(line):
88            if mo.group(1):
89                continue
90            alias = mo.group(2)
91            found.append((line_num, alias))
92        line_num += 1
93    return found
94
95def checkAliases(alias_re, filename):
96    return checkAliasesFile(alias_re, file(filename, 'r'))
97
98def checkAliasesFiles(alias_re, filenames):
99    found = []
100    for filename in filenames:
101        file_found = checkAliases(alias_re, filename)
102        found.extend([(filename, n, a) for (n, a) in file_found])
103    return found
104
105def dumpResults(aliases, found, out=sys.stdout):
106    for filename, n, a in found:
107        print >>out, "%s:%d %s -> %s" % (filename, n, a, aliases[a])
108
109def main(alias_file, *filenames):
110    aliases = parseFileName(alias_file)
111    alias_re = getAliasRE(aliases)
112    found = checkAliasesFiles(alias_re, filenames)
113    dumpResults(aliases, found)
114    return found
115
116if __name__ == '__main__':
117    found = main(*sys.argv[1:])
118    if found:
119        sys.exit(1)
120