1<?php 2 3/* 4 * This file is part of the Linkify library. 5 * 6 * (c) University of Cambridge 7 * 8 * For the full copyright and license information, please view the LICENSE 9 * file that was distributed with this source code. 10 */ 11 12namespace Misd\Linkify; 13 14/** 15 * Converts URLs and/or email addresses into HTML links. 16 */ 17class Linkify implements LinkifyInterface 18{ 19 /** 20 * Default options. 21 * 22 * @var array 23 */ 24 protected $options; 25 26 /** 27 * Constructor. 28 * 29 * @param array $options Default options. 30 */ 31 public function __construct(array $options = array()) 32 { 33 $this->options = $options; 34 } 35 36 /** 37 * {@inheritdoc} 38 */ 39 public function process($text, array $options = array()) 40 { 41 return $this->linkify($text, true, true, $options); 42 } 43 44 /** 45 * {@inheritdoc} 46 */ 47 public function processUrls($text, array $options = array()) 48 { 49 return $this->linkify($text, true, false, $options); 50 } 51 52 /** 53 * {@inheritdoc} 54 */ 55 public function processEmails($text, array $options = array()) 56 { 57 return $this->linkify($text, false, true, $options); 58 } 59 60 /** 61 * Add links to text. 62 * 63 * @param string $text Text to linkify. 64 * @param bool $urls Linkify URLs? 65 * @param bool $emails Linkify email addresses? 66 * @param array $options Options. 67 * 68 * @return string Linkified text. 69 */ 70 protected function linkify($text, $urls = true, $emails = true, array $options = array()) 71 { 72 if (false === $urls && false === $emails) { 73 // nothing to do... 74 return $text; 75 } 76 77 $options = array_merge_recursive($this->options, $options); 78 79 $attr = ''; 80 81 if (true === array_key_exists('attr', $options)) { 82 foreach ($options['attr'] as $key => $value) { 83 if (true === is_array($value)) { 84 $value = array_pop($value); 85 } 86 $attr .= sprintf(' %s="%s"', $key, $value); 87 } 88 } 89 90 $options['attr'] = $attr; 91 92 $ignoreTags = array('head', 'link', 'a', 'script', 'style', 'code', 'pre', 'select', 'textarea', 'button'); 93 94 $chunks = preg_split('/(<.+?>)/is', $text, 0, PREG_SPLIT_DELIM_CAPTURE); 95 96 $openTag = null; 97 98 for ($i = 0; $i < count($chunks); $i++) { 99 if ($i % 2 === 0) { // even numbers are text 100 // Only process this chunk if there are no unclosed $ignoreTags 101 if (null === $openTag) { 102 if (true === $urls) { 103 $chunks[$i] = $this->linkifyUrls($chunks[$i], $options); 104 } 105 if (true === $emails) { 106 $chunks[$i] = $this->linkifyEmails($chunks[$i], $options); 107 } 108 } 109 } else { // odd numbers are tags 110 // Only process this tag if there are no unclosed $ignoreTags 111 if (null === $openTag) { 112 // Check whether this tag is contained in $ignoreTags and is not self-closing 113 if (preg_match("`<(" . implode('|', $ignoreTags) . ").*(?<!/)>$`is", $chunks[$i], $matches)) { 114 $openTag = $matches[1]; 115 } 116 } else { 117 // Otherwise, check whether this is the closing tag for $openTag. 118 if (preg_match('`</\s*' . $openTag . '>`i', $chunks[$i], $matches)) { 119 $openTag = null; 120 } 121 } 122 } 123 } 124 125 $text = implode($chunks); 126 127 return $text; 128 } 129 130 /** 131 * Add HTML links to URLs in plain text. 132 * 133 * @see http://www.regular-expressions.info/catastrophic.html For more info on atomic-grouping, 134 * used in this regex to prevent Catastrophic Backtracking. 135 * 136 * @param string $text Text to linkify. 137 * @param array $options Options, 'attr' key being the attributes to add to the links, with a preceding space. 138 * 139 * @return string Linkified text. 140 */ 141 protected function linkifyUrls($text, $options = array('attr' => '')) 142 { 143 $pattern = '~(?xi) 144 (?: 145 ((ht|f)tps?://) # scheme:// 146 | # or 147 www\d{0,3}\. # "www.", "www1.", "www2." ... "www999." 148 | # or 149 www\- # "www-" 150 | # or 151 [a-z0-9.\-]+\.[a-z]{2,4}(?=/) # looks like domain name followed by a slash 152 ) 153 (?: # Zero or more: 154 [^\s()<>]+ # Run of non-space, non-()<> 155 | # or 156 \((?>[^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels 157 )* 158 (?: # End with: 159 \((?>[^\s()<>]+|(\([^\s()<>]+\)))*\) # balanced parens, up to 2 levels 160 | # or 161 [^\s`!\-()\[\]{};:\'".,<>?«»“”‘’] # not a space or one of these punct chars 162 ) 163 ~u'; 164 165 $callback = function ($match) use ($options) { 166 $caption = $match[0]; 167 $pattern = "~^(ht|f)tps?://~"; 168 169 if (0 === preg_match($pattern, $match[0])) { 170 $match[0] = 'http://' . $match[0]; 171 } 172 173 if (isset($options['callback'])) { 174 $cb = $options['callback']($match[0], $caption, false); 175 if (!is_null($cb)) { 176 return $cb; 177 } 178 } 179 180 return '<a href="' . $match[0] . '"' . $options['attr'] . '>' . $caption . '</a>'; 181 }; 182 183 return preg_replace_callback($pattern, $callback, $text); 184 } 185 186 /** 187 * Add HTML links to email addresses in plain text. 188 * 189 * @param string $text Text to linkify. 190 * @param array $options Options, 'attr' key being the attributes to add to the links, with a preceding space. 191 * 192 * @return string Linkified text. 193 */ 194 protected function linkifyEmails($text, $options = array('attr' => '')) 195 { 196 $pattern = '~(?xi) 197 \b 198 (?<!=) # Not part of a query string 199 [A-Z0-9._\'%+-]+ # Username 200 @ # At 201 [A-Z0-9.-]+ # Domain 202 \. # Dot 203 [A-Z]{2,4} # Something 204 ~u'; 205 206 $callback = function ($match) use ($options) { 207 if (isset($options['callback'])) { 208 $cb = $options['callback']($match[0], $match[0], true); 209 if (!is_null($cb)) { 210 return $cb; 211 } 212 } 213 214 return '<a href="mailto:' . $match[0] . '"' . $options['attr'] . '>' . $match[0] . '</a>'; 215 }; 216 217 return preg_replace_callback($pattern, $callback, $text); 218 } 219} 220