1<?php
2
3/*
4 * This file is part of the Linkify library.
5 *
6 * (c) University of Cambridge
7 *
8 * For the full copyright and license information, please view the LICENSE
9 * file that was distributed with this source code.
10 */
11
12namespace Misd\Linkify;
13
14/**
15 * Converts URLs and/or email addresses into HTML links.
16 */
17class Linkify implements LinkifyInterface
18{
19    /**
20     * Default options.
21     *
22     * @var array
23     */
24    protected $options;
25
26    /**
27     * Constructor.
28     *
29     * @param array $options Default options.
30     */
31    public function __construct(array $options = array())
32    {
33        $this->options = $options;
34    }
35
36    /**
37     * {@inheritdoc}
38     */
39    public function process($text, array $options = array())
40    {
41        return $this->linkify($text, true, true, $options);
42    }
43
44    /**
45     * {@inheritdoc}
46     */
47    public function processUrls($text, array $options = array())
48    {
49        return $this->linkify($text, true, false, $options);
50    }
51
52    /**
53     * {@inheritdoc}
54     */
55    public function processEmails($text, array $options = array())
56    {
57        return $this->linkify($text, false, true, $options);
58    }
59
60    /**
61     * Add links to text.
62     *
63     * @param string $text    Text to linkify.
64     * @param bool   $urls    Linkify URLs?
65     * @param bool   $emails  Linkify email addresses?
66     * @param array  $options Options.
67     *
68     * @return string Linkified text.
69     */
70    protected function linkify($text, $urls = true, $emails = true, array $options = array())
71    {
72        if (false === $urls && false === $emails) {
73            // nothing to do...
74            return $text;
75        }
76
77        $options = array_merge_recursive($this->options, $options);
78
79        $attr = '';
80
81        if (true === array_key_exists('attr', $options)) {
82            foreach ($options['attr'] as $key => $value) {
83                if (true === is_array($value)) {
84                    $value = array_pop($value);
85                }
86                $attr .= sprintf(' %s="%s"', $key, $value);
87            }
88        }
89
90        $options['attr'] = $attr;
91
92        $ignoreTags = array('head', 'link', 'a', 'script', 'style', 'code', 'pre', 'select', 'textarea', 'button');
93
94        $chunks = preg_split('/(<.+?>)/is', $text, 0, PREG_SPLIT_DELIM_CAPTURE);
95
96        $openTag = null;
97
98        for ($i = 0; $i < count($chunks); $i++) {
99            if ($i % 2 === 0) { // even numbers are text
100                // Only process this chunk if there are no unclosed $ignoreTags
101                if (null === $openTag) {
102                    if (true === $urls) {
103                        $chunks[$i] = $this->linkifyUrls($chunks[$i], $options);
104                    }
105                    if (true === $emails) {
106                        $chunks[$i] = $this->linkifyEmails($chunks[$i], $options);
107                    }
108                }
109            } else { // odd numbers are tags
110                // Only process this tag if there are no unclosed $ignoreTags
111                if (null === $openTag) {
112                    // Check whether this tag is contained in $ignoreTags and is not self-closing
113                    if (preg_match("`<(" . implode('|', $ignoreTags) . ").*(?<!/)>$`is", $chunks[$i], $matches)) {
114                        $openTag = $matches[1];
115                    }
116                } else {
117                    // Otherwise, check whether this is the closing tag for $openTag.
118                    if (preg_match('`</\s*' . $openTag . '>`i', $chunks[$i], $matches)) {
119                        $openTag = null;
120                    }
121                }
122            }
123        }
124
125        $text = implode($chunks);
126
127        return $text;
128    }
129
130    /**
131     * Add HTML links to URLs in plain text.
132     *
133     * @see http://www.regular-expressions.info/catastrophic.html For more info on atomic-grouping,
134     *      used in this regex to prevent Catastrophic Backtracking.
135     *
136     * @param string $text    Text to linkify.
137     * @param array  $options Options, 'attr' key being the attributes to add to the links, with a preceding space.
138     *
139     * @return string Linkified text.
140     */
141    protected function linkifyUrls($text, $options = array('attr' => ''))
142    {
143        $pattern = '~(?xi)
144              (?:
145                ((ht|f)tps?://)                      # scheme://
146                |                                    #   or
147                www\d{0,3}\.                         # "www.", "www1.", "www2." ... "www999."
148                |                                    #   or
149                www\-                                # "www-"
150                |                                    #   or
151                [a-z0-9.\-]+\.[a-z]{2,4}(?=/)        # looks like domain name followed by a slash
152              )
153              (?:                                    # Zero or more:
154                [^\s()<>]+                           # Run of non-space, non-()<>
155                |                                    #   or
156                \((?>[^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels
157              )*
158              (?:                                    # End with:
159                \((?>[^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels
160                |                                    #   or
161                [^\s`!\-()\[\]{};:\'".,<>?«»“”‘’]    # not a space or one of these punct chars
162              )
163        ~u';
164
165        $callback = function ($match) use ($options) {
166            $caption = $match[0];
167            $pattern = "~^(ht|f)tps?://~";
168
169            if (0 === preg_match($pattern, $match[0])) {
170                $match[0] = 'http://' . $match[0];
171            }
172
173            if (isset($options['callback'])) {
174                $cb = $options['callback']($match[0], $caption, false);
175                if (!is_null($cb)) {
176                    return $cb;
177                }
178            }
179
180            return '<a href="' . $match[0] . '"' . $options['attr'] . '>' . $caption . '</a>';
181        };
182
183        return preg_replace_callback($pattern, $callback, $text);
184    }
185
186    /**
187     * Add HTML links to email addresses in plain text.
188     *
189     * @param string $text    Text to linkify.
190     * @param array  $options Options, 'attr' key being the attributes to add to the links, with a preceding space.
191     *
192     * @return string Linkified text.
193     */
194    protected function linkifyEmails($text, $options = array('attr' => ''))
195    {
196        $pattern = '~(?xi)
197                \b
198                (?<!=)           # Not part of a query string
199                [A-Z0-9._\'%+-]+ # Username
200                @                # At
201                [A-Z0-9.-]+      # Domain
202                \.               # Dot
203                [A-Z]{2,4}       # Something
204        ~u';
205
206        $callback = function ($match) use ($options) {
207            if (isset($options['callback'])) {
208                $cb = $options['callback']($match[0], $match[0], true);
209                if (!is_null($cb)) {
210                    return $cb;
211                }
212            }
213
214            return '<a href="mailto:' . $match[0] . '"' . $options['attr'] . '>' . $match[0] . '</a>';
215        };
216
217        return preg_replace_callback($pattern, $callback, $text);
218    }
219}
220