1<?php
2
3/**
4 * PHP lexer code snarfed from the CVS tree for the lamplib project at
5 * http://sourceforge.net/projects/lamplib
6 * This project is administered by Markus Baker, Harry Fuecks and Matt
7 * Mitchell, and the project  code is in the public domain.
8 *
9 * Thanks, guys!
10 *
11 * @package   moodlecore
12 * @copyright Markus Baker, Harry Fuecks and Matt Mitchell
13 * @license   Public Domain {@link http://sourceforge.net/projects/lamplib}
14 */
15
16    /** LEXER_ENTER = 1 */
17    define("LEXER_ENTER", 1);
18    /** LEXER_MATCHED = 2 */
19    define("LEXER_MATCHED", 2);
20    /** LEXER_UNMATCHED = 3 */
21    define("LEXER_UNMATCHED", 3);
22    /** LEXER_EXIT = 4 */
23    define("LEXER_EXIT", 4);
24    /** LEXER_SPECIAL = 5 */
25    define("LEXER_SPECIAL", 5);
26
27    /**
28     * Compounded regular expression. Any of
29     * the contained patterns could match and
30     * when one does it's label is returned.
31     * @package   moodlecore
32     * @copyright Markus Baker, Harry Fuecks and Matt Mitchell
33     * @license   Public Domain {@link http://sourceforge.net/projects/lamplib}
34     */
35    class ParallelRegex {
36        var $_patterns;
37        var $_labels;
38        var $_regex;
39        var $_case;
40
41        /**
42         *    Constructor. Starts with no patterns.
43         *    @param bool $case    True for case sensitive, false
44         *                    for insensitive.
45         *    @access public
46         */
47        public function __construct($case) {
48            $this->_case = $case;
49            $this->_patterns = array();
50            $this->_labels = array();
51            $this->_regex = null;
52        }
53
54        /**
55         * Old syntax of class constructor. Deprecated in PHP7.
56         *
57         * @deprecated since Moodle 3.1
58         */
59        public function ParallelRegex($case) {
60            debugging('Use of class name as constructor is deprecated', DEBUG_DEVELOPER);
61            self::__construct($case);
62        }
63
64        /**
65         *    Adds a pattern with an optional label.
66         *    @param string $pattern      Perl style regex, but ( and )
67         *                         lose the usual meaning.
68         *    @param string $label        Label of regex to be returned
69         *                         on a match.
70         *    @access public
71         */
72        function addPattern($pattern, $label = true) {
73            $count = count($this->_patterns);
74            $this->_patterns[$count] = $pattern;
75            $this->_labels[$count] = $label;
76            $this->_regex = null;
77        }
78
79        /**
80         *    Attempts to match all patterns at once against
81         *    a string.
82         *    @param string $subject      String to match against.
83         *    @param string $match        First matched portion of
84         *                         subject.
85         *    @return bool             True on success.
86         *    @access public
87         */
88        function match($subject, &$match) {
89            if (count($this->_patterns) == 0) {
90                return false;
91            }
92            if (!preg_match($this->_getCompoundedRegex(), $subject, $matches)) {
93                $match = "";
94                return false;
95            }
96            $match = $matches[0];
97            for ($i = 1; $i < count($matches); $i++) {
98                if ($matches[$i]) {
99                    return $this->_labels[$i - 1];
100                }
101            }
102            return true;
103        }
104
105        /**
106         *    Compounds the patterns into a single
107         *    regular expression separated with the
108         *    "or" operator. Caches the regex.
109         *    Will automatically escape (, ) and / tokens.
110         *    @access private
111         */
112        function _getCompoundedRegex() {
113            if ($this->_regex == null) {
114                for ($i = 0; $i < count($this->_patterns); $i++) {
115                    $this->_patterns[$i] = '(' . str_replace(
116                            array('/', '(', ')'),
117                            array('\/', '\(', '\)'),
118                            $this->_patterns[$i]) . ')';
119                }
120                $this->_regex = "/" . implode("|", $this->_patterns) . "/" . $this->_getPerlMatchingFlags();
121            }
122            return $this->_regex;
123        }
124
125        /**
126         *    Accessor for perl regex mode flags to use.
127         *    @return string       Flags as string.
128         *    @access private
129         */
130        function _getPerlMatchingFlags() {
131            return ($this->_case ? "msS" : "msSi");
132        }
133    }
134
135    /**
136     * States for a stack machine.
137     *
138     * @package   moodlecore
139     * @copyright Markus Baker, Harry Fuecks and Matt Mitchell
140     * @license   Public Domain {@link http://sourceforge.net/projects/lamplib}
141     */
142    class StateStack {
143        var $_stack;
144
145        /**
146         *    Constructor. Starts in named state.
147         *    @param string $start        Starting state name.
148         *    @access public
149         */
150        public function __construct($start) {
151            $this->_stack = array($start);
152        }
153
154        /**
155         * Old syntax of class constructor. Deprecated in PHP7.
156         *
157         * @deprecated since Moodle 3.1
158         */
159        public function StateStack($start) {
160            debugging('Use of class name as constructor is deprecated', DEBUG_DEVELOPER);
161            self::__construct($start);
162        }
163
164        /**
165         *    Accessor for current state.
166         *    @return string State as string.
167         *    @access public
168         */
169        function getCurrent() {
170            return $this->_stack[count($this->_stack) - 1];
171        }
172
173        /**
174         *    Adds a state to the stack and sets it
175         *    to be the current state.
176         *    @param string $state        New state.
177         *    @access public
178         */
179        function enter($state) {
180            array_push($this->_stack, $state);
181        }
182
183        /**
184         *    Leaves the current state and reverts
185         *    to the previous one.
186         *    @return bool     False if we drop off
187         *                the bottom of the list.
188         *    @access public
189         */
190        function leave() {
191            if (count($this->_stack) == 1) {
192                return false;
193            }
194            array_pop($this->_stack);
195            return true;
196        }
197    }
198
199    /**
200     * Accepts text and breaks it into tokens.
201     * Some optimisation to make the sure the
202     * content is only scanned by the PHP regex
203     * parser once. Lexer modes must not start
204     * with leading underscores.
205     *
206     * @package   moodlecore
207     * @copyright Markus Baker, Harry Fuecks and Matt Mitchell
208     * @license   Public Domain {@link http://sourceforge.net/projects/lamplib}
209     */
210    class Lexer {
211        var $_regexes;
212        var $_parser;
213        var $_mode;
214        var $_mode_handlers;
215        var $_case;
216
217        /**
218         *    Sets up the lexer in case insensitive matching
219         *    by default.
220         *    @param object $parser     Handling strategy by
221         *                       reference.
222         *    @param string $start      Starting handler.
223         *    @param bool $case       True for case sensitive.
224         *    @access public
225         */
226        public function __construct(&$parser, $start = "accept", $case = false) {
227            $this->_case = $case;
228            $this->_regexes = array();
229            $this->_parser = &$parser;
230            $this->_mode = new StateStack($start);
231            $this->_mode_handlers = array();
232        }
233
234        /**
235         * Old syntax of class constructor. Deprecated in PHP7.
236         *
237         * @deprecated since Moodle 3.1
238         */
239        public function Lexer(&$parser, $start = "accept", $case = false) {
240            debugging('Use of class name as constructor is deprecated', DEBUG_DEVELOPER);
241            self::__construct($parser, $start, $case);
242        }
243
244        /**
245         *    Adds a token search pattern for a particular
246         *    parsing mode. The pattern does not change the
247         *    current mode.
248         *    @param string $pattern      Perl style regex, but ( and )
249         *                         lose the usual meaning.
250         *    @param string $mode         Should only apply this
251         *                         pattern when dealing with
252         *                         this type of input.
253         *    @access public
254         */
255        function addPattern($pattern, $mode = "accept") {
256            if (!isset($this->_regexes[$mode])) {
257                $this->_regexes[$mode] = new ParallelRegex($this->_case);
258            }
259            $this->_regexes[$mode]->addPattern($pattern);
260        }
261
262        /**
263         *    Adds a pattern that will enter a new parsing
264         *    mode. Useful for entering parenthesis, strings,
265         *    tags, etc.
266         *    @param string $pattern      Perl style regex, but ( and )
267         *                         lose the usual meaning.
268         *    @param string $mode         Should only apply this
269         *                         pattern when dealing with
270         *                         this type of input.
271         *    @param string $new_mode     Change parsing to this new
272         *                         nested mode.
273         *    @access public
274         */
275        function addEntryPattern($pattern, $mode, $new_mode) {
276            if (!isset($this->_regexes[$mode])) {
277                $this->_regexes[$mode] = new ParallelRegex($this->_case);
278            }
279            $this->_regexes[$mode]->addPattern($pattern, $new_mode);
280        }
281
282        /**
283         *    Adds a pattern that will exit the current mode
284         *    and re-enter the previous one.
285         *    @param string $pattern      Perl style regex, but ( and )
286         *                         lose the usual meaning.
287         *    @param string $mode         Mode to leave.
288         *    @access public
289         */
290        function addExitPattern($pattern, $mode) {
291            if (!isset($this->_regexes[$mode])) {
292                $this->_regexes[$mode] = new ParallelRegex($this->_case);
293            }
294            $this->_regexes[$mode]->addPattern($pattern, "__exit");
295        }
296
297        /**
298         *    Adds a pattern that has a special mode.
299         *    Acts as an entry and exit pattern in one go.
300         *    @param string $pattern      Perl style regex, but ( and )
301         *                         lose the usual meaning.
302         *    @param string $mode         Should only apply this
303         *                         pattern when dealing with
304         *                         this type of input.
305         *    @param string $special      Use this mode for this one token.
306         *    @access public
307         */
308        function addSpecialPattern($pattern, $mode, $special) {
309            if (!isset($this->_regexes[$mode])) {
310                $this->_regexes[$mode] = new ParallelRegex($this->_case);
311            }
312            $this->_regexes[$mode]->addPattern($pattern, "_$special");
313        }
314
315        /**
316         *    Adds a mapping from a mode to another handler.
317         *    @param string $mode        Mode to be remapped.
318         *    @param string $handler     New target handler.
319         *    @access public
320         */
321        function mapHandler($mode, $handler) {
322            $this->_mode_handlers[$mode] = $handler;
323        }
324
325        /**
326         *    Splits the page text into tokens. Will fail
327         *    if the handlers report an error or if no
328         *    content is consumed. If successful then each
329         *    unparsed and parsed token invokes a call to the
330         *    held listener.
331         *    @param string $raw        Raw HTML text.
332         *    @return bool           True on success, else false.
333         *    @access public
334         */
335        function parse($raw) {
336            if (!isset($this->_parser)) {
337                return false;
338            }
339            $length = strlen($raw);
340            while (is_array($parsed = $this->_reduce($raw))) {
341                list($unmatched, $matched, $mode) = $parsed;
342                if (!$this->_dispatchTokens($unmatched, $matched, $mode)) {
343                    return false;
344                }
345                if (strlen($raw) == $length) {
346                    return false;
347                }
348                $length = strlen($raw);
349            }
350            if (!$parsed) {
351                return false;
352            }
353            return $this->_invokeParser($raw, LEXER_UNMATCHED);
354        }
355
356        /**
357         *    Sends the matched token and any leading unmatched
358         *    text to the parser changing the lexer to a new
359         *    mode if one is listed.
360         *    @param string $unmatched    Unmatched leading portion.
361         *    @param string $matched      Actual token match.
362         *    @param string $mode         Mode after match. The "_exit"
363         *                         mode causes a stack pop. An
364         *                         false mode causes no change.
365         *    @return bool              False if there was any error
366         *                         from the parser.
367         *    @access private
368         */
369        function _dispatchTokens($unmatched, $matched, $mode = false) {
370            if (!$this->_invokeParser($unmatched, LEXER_UNMATCHED)) {
371                return false;
372            }
373            if ($mode === "__exit") {
374                if (!$this->_invokeParser($matched, LEXER_EXIT)) {
375                    return false;
376                }
377                return $this->_mode->leave();
378            }
379            if (strncmp($mode, "_", 1) == 0) {
380                $mode = substr($mode, 1);
381                $this->_mode->enter($mode);
382                if (!$this->_invokeParser($matched, LEXER_SPECIAL)) {
383                    return false;
384                }
385                return $this->_mode->leave();
386            }
387            if (is_string($mode)) {
388                $this->_mode->enter($mode);
389                return $this->_invokeParser($matched, LEXER_ENTER);
390            }
391            return $this->_invokeParser($matched, LEXER_MATCHED);
392        }
393
394        /**
395         *    Calls the parser method named after the current
396         *    mode. Empty content will be ignored.
397         *    @param string $content        Text parsed.
398         *    @param string $is_match       Token is recognised rather
399         *                           than unparsed data.
400         *    @access private
401         */
402        function _invokeParser($content, $is_match) {
403            if (($content === "") || ($content === false)) {
404                return true;
405            }
406            $handler = $this->_mode->getCurrent();
407            if (isset($this->_mode_handlers[$handler])) {
408                $handler = $this->_mode_handlers[$handler];
409            }
410            return $this->_parser->$handler($content, $is_match);
411        }
412
413        /**
414         *    Tries to match a chunk of text and if successful
415         *    removes the recognised chunk and any leading
416         *    unparsed data. Empty strings will not be matched.
417         *    @param string $raw  The subject to parse. This is the
418         *                        content that will be eaten.
419         *    @return bool|array  Three item list of unparsed
420         *                        content followed by the
421         *                        recognised token and finally the
422         *                        action the parser is to take.
423         *                        True if no match, false if there
424         *                        is a parsing error.
425         *    @access private
426         */
427        function _reduce(&$raw) {
428            if (!isset($this->_regexes[$this->_mode->getCurrent()])) {
429                return false;
430            }
431            if ($raw === "") {
432                return true;
433            }
434            if ($action = $this->_regexes[$this->_mode->getCurrent()]->match($raw, $match)) {
435                $count = strpos($raw, $match);
436                $unparsed = substr($raw, 0, $count);
437                $raw = substr($raw, $count + strlen($match));
438                return array($unparsed, $match, $action);
439            }
440            return true;
441        }
442    }
443?>
444