1#!/usr/local/bin/php
2<?php
3
4chdir(dirname(__FILE__));
5require_once 'common.php';
6require_once '../library/HTMLPurifier.auto.php';
7assertCli();
8
9if (version_compare(PHP_VERSION, '5.2.2', '<')) {
10    echo "This script requires PHP 5.2.2 or later, for tokenizer line numbers.";
11    exit(1);
12}
13
14/**
15 * @file
16 * Scans HTML Purifier source code for $config tokens and records the
17 * directive being used; configdoc can use this info later.
18 *
19 * Currently, this just dumps all the info onto the console. Eventually, it
20 * will create an XML file that our XSLT transform can use.
21 */
22
23$FS = new FSTools();
24chdir(dirname(__FILE__) . '/../library/');
25$raw_files = $FS->globr('.', '*.php');
26$files = array();
27foreach ($raw_files as $file) {
28    $file = substr($file, 2); // rm leading './'
29    if (strncmp('standalone/', $file, 11) === 0) continue; // rm generated files
30    if (substr_count($file, '.') > 1) continue; // rm meta files
31    $files[] = $file;
32}
33
34/**
35 * Moves the $i cursor to the next non-whitespace token
36 */
37function consumeWhitespace($tokens, &$i)
38{
39    do {$i++;} while (is_array($tokens[$i]) && $tokens[$i][0] === T_WHITESPACE);
40}
41
42/**
43 * Tests whether or not a token is a particular type. There are three run-cases:
44 *      - ($token, $expect_token): tests if the token is $expect_token type;
45 *      - ($token, $expect_value): tests if the token is the string $expect_value;
46 *      - ($token, $expect_token, $expect_value): tests if token is $expect_token type, and
47 *        its string representation is $expect_value
48 */
49function testToken($token, $value_or_token, $value = null)
50{
51    if (is_null($value)) {
52        if (is_int($value_or_token)) return is_array($token) && $token[0] === $value_or_token;
53        else return $token === $value_or_token;
54    } else {
55        return is_array($token) && $token[0] === $value_or_token && $token[1] === $value;
56    }
57}
58
59$counter = 0;
60$full_counter = 0;
61$tracker = array();
62
63foreach ($files as $file) {
64    $tokens = token_get_all(file_get_contents($file));
65    $file = str_replace('\\', '/', $file);
66    for ($i = 0, $c = count($tokens); $i < $c; $i++) {
67        $ok = false;
68        // Match $config
69        if (!$ok && testToken($tokens[$i], T_VARIABLE, '$config')) $ok = true;
70        // Match $this->config
71        while (!$ok && testToken($tokens[$i], T_VARIABLE, '$this')) {
72            consumeWhitespace($tokens, $i);
73            if (!testToken($tokens[$i], T_OBJECT_OPERATOR)) break;
74            consumeWhitespace($tokens, $i);
75            if (testToken($tokens[$i], T_STRING, 'config')) $ok = true;
76            break;
77        }
78        if (!$ok) continue;
79
80        $ok = false;
81        for($i++; $i < $c; $i++) {
82            if ($tokens[$i] === ',' || $tokens[$i] === ')' || $tokens[$i] === ';') {
83                break;
84            }
85            if (is_string($tokens[$i])) continue;
86            if ($tokens[$i][0] === T_OBJECT_OPERATOR) {
87                $ok = true;
88                break;
89            }
90        }
91        if (!$ok) continue;
92
93        $line = $tokens[$i][2];
94
95        consumeWhitespace($tokens, $i);
96        if (!testToken($tokens[$i], T_STRING, 'get')) continue;
97
98        consumeWhitespace($tokens, $i);
99        if (!testToken($tokens[$i], '(')) continue;
100
101        $full_counter++;
102
103        $matched = false;
104        do {
105
106            // What we currently don't match are batch retrievals, and
107            // wildcard retrievals. This data might be useful in the future,
108            // which is why we have a do {} while loop that doesn't actually
109            // do anything.
110
111            consumeWhitespace($tokens, $i);
112            if (!testToken($tokens[$i], T_CONSTANT_ENCAPSED_STRING)) continue;
113            $id = substr($tokens[$i][1], 1, -1);
114
115            $counter++;
116            $matched = true;
117
118            if (!isset($tracker[$id])) $tracker[$id] = array();
119            if (!isset($tracker[$id][$file])) $tracker[$id][$file] = array();
120            $tracker[$id][$file][] = $line;
121
122        } while (0);
123
124        //echo "$file:$line uses $namespace.$directive\n";
125    }
126}
127
128echo "\n$counter/$full_counter instances of \$config or \$this->config found in source code.\n";
129
130echo "Generating XML... ";
131
132$xw = new XMLWriter();
133$xw->openURI('../configdoc/usage.xml');
134$xw->setIndent(true);
135$xw->startDocument('1.0', 'UTF-8');
136$xw->startElement('usage');
137foreach ($tracker as $id => $files) {
138    $xw->startElement('directive');
139    $xw->writeAttribute('id', $id);
140    foreach ($files as $file => $lines) {
141        $xw->startElement('file');
142        $xw->writeAttribute('name', $file);
143        foreach ($lines as $line) {
144            $xw->writeElement('line', $line);
145        }
146        $xw->endElement();
147    }
148    $xw->endElement();
149}
150$xw->endElement();
151$xw->flush();
152
153echo "done!\n";
154
155// vim: et sw=4 sts=4
156