1<?php
2/**
3 * JSMin.php - modified PHP implementation of Douglas Crockford's JSMin.
4 *
5 * <code>
6 * $minifiedJs = JSMin::minify($js);
7 * </code>
8 *
9 * This is a modified port of jsmin.c. Improvements:
10 *
11 * Does not choke on some regexp literals containing quote characters. E.g. /'/
12 *
13 * Spaces are preserved after some add/sub operators, so they are not mistakenly
14 * converted to post-inc/dec. E.g. a + ++b -> a+ ++b
15 *
16 * Preserves multi-line comments that begin with /*!
17 *
18 * PHP 5 or higher is required.
19 *
20 * Permission is hereby granted to use this version of the library under the
21 * same terms as jsmin.c, which has the following license:
22 *
23 * --
24 * Copyright (c) 2002 Douglas Crockford  (www.crockford.com)
25 *
26 * Permission is hereby granted, free of charge, to any person obtaining a copy of
27 * this software and associated documentation files (the "Software"), to deal in
28 * the Software without restriction, including without limitation the rights to
29 * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
30 * of the Software, and to permit persons to whom the Software is furnished to do
31 * so, subject to the following conditions:
32 *
33 * The above copyright notice and this permission notice shall be included in all
34 * copies or substantial portions of the Software.
35 *
36 * The Software shall be used for Good, not Evil.
37 *
38 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
39 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
40 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
41 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
42 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
43 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
44 * SOFTWARE.
45 * --
46 *
47 * @package JSMin
48 * @author Ryan Grove <ryan@wonko.com> (PHP port)
49 * @author Steve Clay <steve@mrclay.org> (modifications + cleanup)
50 * @author Andrea Giammarchi <http://www.3site.eu> (spaceBeforeRegExp)
51 * @copyright 2002 Douglas Crockford <douglas@crockford.com> (jsmin.c)
52 * @copyright 2008 Ryan Grove <ryan@wonko.com> (PHP port)
53 * @license http://opensource.org/licenses/mit-license.php MIT License
54 * @link http://code.google.com/p/jsmin-php/
55 */
56
57class JSMin {
58    const ORD_LF            = 10;
59    const ORD_SPACE         = 32;
60    const ACTION_KEEP_A     = 1;
61    const ACTION_DELETE_A   = 2;
62    const ACTION_DELETE_A_B = 3;
63
64    protected $a           = "\n";
65    protected $b           = '';
66    protected $input       = '';
67    protected $inputIndex  = 0;
68    protected $inputLength = 0;
69    protected $lookAhead   = null;
70    protected $output      = '';
71    protected $lastByteOut  = '';
72    protected $keptComment = '';
73
74    /**
75     * Minify Javascript.
76     *
77     * @param string $js Javascript to be minified
78     *
79     * @return string
80     */
81    public static function minify($js)
82    {
83        $jsmin = new JSMin($js);
84        return $jsmin->min();
85    }
86
87    /**
88     * @param string $input
89     */
90    public function __construct($input)
91    {
92        $this->input = $input;
93    }
94
95    /**
96     * Perform minification, return result
97     *
98     * @return string
99     */
100    public function min()
101    {
102        if ($this->output !== '') { // min already run
103            return $this->output;
104        }
105
106        $mbIntEnc = null;
107        if (function_exists('mb_strlen') && ((int)ini_get('mbstring.func_overload') & 2)) {
108            $mbIntEnc = mb_internal_encoding();
109            mb_internal_encoding('8bit');
110        }
111
112        if (isset($this->input[0]) && $this->input[0] === "\xef") {
113            $this->input = substr($this->input, 3);
114        }
115
116        $this->input = str_replace("\r\n", "\n", $this->input);
117        $this->inputLength = strlen($this->input);
118
119        $this->action(self::ACTION_DELETE_A_B);
120
121        while ($this->a !== null) {
122            // determine next command
123            $command = self::ACTION_KEEP_A; // default
124            if ($this->a === ' ') {
125                if (($this->lastByteOut === '+' || $this->lastByteOut === '-')
126                        && ($this->b === $this->lastByteOut)) {
127                    // Don't delete this space. If we do, the addition/subtraction
128                    // could be parsed as a post-increment
129                } elseif (! $this->isAlphaNum($this->b)) {
130                    $command = self::ACTION_DELETE_A;
131                }
132            } elseif ($this->a === "\n") {
133                if ($this->b === ' ') {
134                    $command = self::ACTION_DELETE_A_B;
135
136                    // in case of mbstring.func_overload & 2, must check for null b,
137                    // otherwise mb_strpos will give WARNING
138                } elseif ($this->b === null
139                          || (false === strpos('{[(+-!~', $this->b)
140                              && ! $this->isAlphaNum($this->b))) {
141                    $command = self::ACTION_DELETE_A;
142                }
143            } elseif (! $this->isAlphaNum($this->a)) {
144                if ($this->b === ' '
145                    || ($this->b === "\n"
146                        && (false === strpos('}])+-"\'', $this->a)))) {
147                    $command = self::ACTION_DELETE_A_B;
148                }
149            }
150            $this->action($command);
151        }
152        $this->output = trim($this->output);
153
154        if ($mbIntEnc !== null) {
155            mb_internal_encoding($mbIntEnc);
156        }
157        return $this->output;
158    }
159
160    /**
161     * ACTION_KEEP_A = Output A. Copy B to A. Get the next B.
162     * ACTION_DELETE_A = Copy B to A. Get the next B.
163     * ACTION_DELETE_A_B = Get the next B.
164     *
165     * @param int $command
166     * @throws JSMin_UnterminatedRegExpException|JSMin_UnterminatedStringException
167     */
168    protected function action($command)
169    {
170        // make sure we don't compress "a + ++b" to "a+++b", etc.
171        if ($command === self::ACTION_DELETE_A_B
172            && $this->b === ' '
173            && ($this->a === '+' || $this->a === '-')) {
174            // Note: we're at an addition/substraction operator; the inputIndex
175            // will certainly be a valid index
176            if ($this->input[$this->inputIndex] === $this->a) {
177                // This is "+ +" or "- -". Don't delete the space.
178                $command = self::ACTION_KEEP_A;
179            }
180        }
181
182        switch ($command) {
183            case self::ACTION_KEEP_A: // 1
184                $this->output .= $this->a;
185
186                if ($this->keptComment) {
187                    $this->output = rtrim($this->output, "\n");
188                    $this->output .= $this->keptComment;
189                    $this->keptComment = '';
190                }
191
192                $this->lastByteOut = $this->a;
193
194                // fallthrough intentional
195            case self::ACTION_DELETE_A: // 2
196                $this->a = $this->b;
197                if ($this->a === "'" || $this->a === '"') { // string literal
198                    $str = $this->a; // in case needed for exception
199                    for(;;) {
200                        $this->output .= $this->a;
201                        $this->lastByteOut = $this->a;
202
203                        $this->a = $this->get();
204                        if ($this->a === $this->b) { // end quote
205                            break;
206                        }
207                        if ($this->isEOF($this->a)) {
208                            $byte = $this->inputIndex - 1;
209                            throw new JSMin_UnterminatedStringException(
210                                "JSMin: Unterminated String at byte {$byte}: {$str}");
211                        }
212                        $str .= $this->a;
213                        if ($this->a === '\\') {
214                            $this->output .= $this->a;
215                            $this->lastByteOut = $this->a;
216
217                            $this->a       = $this->get();
218                            $str .= $this->a;
219                        }
220                    }
221                }
222
223                // fallthrough intentional
224            case self::ACTION_DELETE_A_B: // 3
225                $this->b = $this->next();
226                if ($this->b === '/' && $this->isRegexpLiteral()) {
227                    $this->output .= $this->a . $this->b;
228                    $pattern = '/'; // keep entire pattern in case we need to report it in the exception
229                    for(;;) {
230                        $this->a = $this->get();
231                        $pattern .= $this->a;
232                        if ($this->a === '[') {
233                            for(;;) {
234                                $this->output .= $this->a;
235                                $this->a = $this->get();
236                                $pattern .= $this->a;
237                                if ($this->a === ']') {
238                                    break;
239                                }
240                                if ($this->a === '\\') {
241                                    $this->output .= $this->a;
242                                    $this->a = $this->get();
243                                    $pattern .= $this->a;
244                                }
245                                if ($this->isEOF($this->a)) {
246                                    throw new JSMin_UnterminatedRegExpException(
247                                        "JSMin: Unterminated set in RegExp at byte "
248                                            . $this->inputIndex .": {$pattern}");
249                                }
250                            }
251                        }
252
253                        if ($this->a === '/') { // end pattern
254                            break; // while (true)
255                        } elseif ($this->a === '\\') {
256                            $this->output .= $this->a;
257                            $this->a = $this->get();
258                            $pattern .= $this->a;
259                        } elseif ($this->isEOF($this->a)) {
260                            $byte = $this->inputIndex - 1;
261                            throw new JSMin_UnterminatedRegExpException(
262                                "JSMin: Unterminated RegExp at byte {$byte}: {$pattern}");
263                        }
264                        $this->output .= $this->a;
265                        $this->lastByteOut = $this->a;
266                    }
267                    $this->b = $this->next();
268                }
269            // end case ACTION_DELETE_A_B
270        }
271    }
272
273    /**
274     * @return bool
275     */
276    protected function isRegexpLiteral()
277    {
278        if (false !== strpos("(,=:[!&|?+-~*{;", $this->a)) {
279            // we can't divide after these tokens
280            return true;
281        }
282
283        // check if first non-ws token is "/" (see starts-regex.js)
284        $length = strlen($this->output);
285        if ($this->a === ' ' || $this->a === "\n") {
286            if ($length < 2) { // weird edge case
287                return true;
288            }
289        }
290
291        // if the "/" follows a keyword, it must be a regexp, otherwise it's best to assume division
292
293        $subject = $this->output . trim($this->a);
294        if (!preg_match('/(?:case|else|in|return|typeof)$/', $subject, $m)) {
295            // not a keyword
296            return false;
297        }
298
299        // can't be sure it's a keyword yet (see not-regexp.js)
300        $charBeforeKeyword = substr($subject, 0 - strlen($m[0]) - 1, 1);
301        if ($this->isAlphaNum($charBeforeKeyword)) {
302            // this is really an identifier ending in a keyword, e.g. "xreturn"
303            return false;
304        }
305
306        // it's a regexp. Remove unneeded whitespace after keyword
307        if ($this->a === ' ' || $this->a === "\n") {
308            $this->a = '';
309        }
310
311        return true;
312    }
313
314    /**
315     * Return the next character from stdin. Watch out for lookahead. If the character is a control character,
316     * translate it to a space or linefeed.
317     *
318     * @return string
319     */
320    protected function get()
321    {
322        $c = $this->lookAhead;
323        $this->lookAhead = null;
324        if ($c === null) {
325            // getc(stdin)
326            if ($this->inputIndex < $this->inputLength) {
327                $c = $this->input[$this->inputIndex];
328                $this->inputIndex += 1;
329            } else {
330                $c = null;
331            }
332        }
333        if (ord($c) >= self::ORD_SPACE || $c === "\n" || $c === null) {
334            return $c;
335        }
336        if ($c === "\r") {
337            return "\n";
338        }
339        return ' ';
340    }
341
342    /**
343     * Does $a indicate end of input?
344     *
345     * @param string $a
346     * @return bool
347     */
348    protected function isEOF($a)
349    {
350        return ord($a) <= self::ORD_LF;
351    }
352
353    /**
354     * Get next char (without getting it). If is ctrl character, translate to a space or newline.
355     *
356     * @return string
357     */
358    protected function peek()
359    {
360        $this->lookAhead = $this->get();
361        return $this->lookAhead;
362    }
363
364    /**
365     * Return true if the character is a letter, digit, underscore, dollar sign, or non-ASCII character.
366     *
367     * @param string $c
368     *
369     * @return bool
370     */
371    protected function isAlphaNum($c)
372    {
373        return (preg_match('/^[a-z0-9A-Z_\\$\\\\]$/', $c) || ord($c) > 126);
374    }
375
376    /**
377     * Consume a single line comment from input (possibly retaining it)
378     */
379    protected function consumeSingleLineComment()
380    {
381        $comment = '';
382        while (true) {
383            $get = $this->get();
384            $comment .= $get;
385            if (ord($get) <= self::ORD_LF) { // end of line reached
386                // if IE conditional comment
387                if (preg_match('/^\\/@(?:cc_on|if|elif|else|end)\\b/', $comment)) {
388                    $this->keptComment .= "/{$comment}";
389                }
390                return;
391            }
392        }
393    }
394
395    /**
396     * Consume a multiple line comment from input (possibly retaining it)
397     *
398     * @throws JSMin_UnterminatedCommentException
399     */
400    protected function consumeMultipleLineComment()
401    {
402        $this->get();
403        $comment = '';
404        for(;;) {
405            $get = $this->get();
406            if ($get === '*') {
407                if ($this->peek() === '/') { // end of comment reached
408                    $this->get();
409                    if (0 === strpos($comment, '!')) {
410                        // preserved by YUI Compressor
411                        if (!$this->keptComment) {
412                            // don't prepend a newline if two comments right after one another
413                            $this->keptComment = "\n";
414                        }
415                        $this->keptComment .= "/*!" . substr($comment, 1) . "*/\n";
416                    } else if (preg_match('/^@(?:cc_on|if|elif|else|end)\\b/', $comment)) {
417                        // IE conditional
418                        $this->keptComment .= "/*{$comment}*/";
419                    }
420                    return;
421                }
422            } elseif ($get === null) {
423                throw new JSMin_UnterminatedCommentException(
424                    "JSMin: Unterminated comment at byte {$this->inputIndex}: /*{$comment}");
425            }
426            $comment .= $get;
427        }
428    }
429
430    /**
431     * Get the next character, skipping over comments. Some comments may be preserved.
432     *
433     * @return string
434     */
435    protected function next()
436    {
437        $get = $this->get();
438        if ($get === '/') {
439            switch ($this->peek()) {
440                case '/':
441                    $this->consumeSingleLineComment();
442                    $get = "\n";
443                    break;
444                case '*':
445                    $this->consumeMultipleLineComment();
446                    $get = ' ';
447                    break;
448            }
449        }
450        return $get;
451    }
452}
453
454class JSMin_UnterminatedStringException extends Exception {}
455class JSMin_UnterminatedCommentException extends Exception {}
456class JSMin_UnterminatedRegExpException extends Exception {}
457