1<?php 2/** 3 * Matomo - free/libre analytics platform 4 * 5 * @link https://matomo.org 6 * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later 7 * 8 */ 9namespace Piwik; 10 11use Piwik\Container\StaticContainer; 12use Piwik\Intl\Data\Provider\RegionDataProvider; 13 14/** 15 * Contains less commonly needed URL helper methods. 16 * 17 */ 18class UrlHelper 19{ 20 private static $validLinkProtocols = [ 21 'http', 22 'https', 23 'tel', 24 'sms', 25 'mailto', 26 'callto', 27 ]; 28 29 /** 30 * Checks if a string matches/is equal to one of the patterns/strings. 31 * 32 * @static 33 * @param $test String to test. 34 * @param $patterns Array of strings or regexs. 35 * 36 * @return true if $test matches or is equal to one of the regex/string in $patterns, false otherwise. 37 */ 38 protected static function in_array_matches_regex($test, $patterns) 39 { 40 foreach($patterns as $val) { 41 if(@preg_match($val, null) === false) { 42 if( strcasecmp($val, $test) === 0 ) { 43 return true; 44 } 45 } else { 46 if( preg_match($val, $test) === 1 ) { 47 return true; 48 } 49 } 50 } 51 return false; 52 } 53 54 /** 55 * Converts an array of query parameter name/value mappings into a query string. 56 * Parameters that are in `$parametersToExclude` will not appear in the result. 57 * 58 * @static 59 * @param $queryParameters Array of query parameters, eg, `array('site' => '0', 'date' => '2012-01-01')`. 60 * @param $parametersToExclude Array of query parameter names that shouldn't be 61 * in the result query string, eg, `array('date', 'period')`. 62 * @return string A query string, eg, `"?site=0"`. 63 * @api 64 */ 65 public static function getQueryStringWithExcludedParameters($queryParameters, $parametersToExclude) 66 { 67 $validQuery = ''; 68 $separator = '&'; 69 foreach ($queryParameters as $name => $value) { 70 // decode encoded square brackets 71 $name = str_replace(array('%5B', '%5D'), array('[', ']'), $name); 72 73 if (!self::in_array_matches_regex(strtolower($name), $parametersToExclude)) { 74 if (is_array($value)) { 75 foreach ($value as $param) { 76 if ($param === false) { 77 $validQuery .= $name . '[]' . $separator; 78 } else { 79 $validQuery .= $name . '[]=' . $param . $separator; 80 } 81 } 82 } elseif ($value === false) { 83 $validQuery .= $name . $separator; 84 } else { 85 $validQuery .= $name . '=' . $value . $separator; 86 } 87 } 88 } 89 $validQuery = substr($validQuery, 0, -strlen($separator)); 90 return $validQuery; 91 } 92 93 /** 94 * Reduce URL to more minimal form. 2 letter country codes are 95 * replaced by '{}', while other parts are simply removed. 96 * 97 * Examples: 98 * www.example.com -> example.com 99 * search.example.com -> example.com 100 * m.example.com -> example.com 101 * de.example.com -> {}.example.com 102 * example.de -> example.{} 103 * example.co.uk -> example.{} 104 * 105 * @param string $url 106 * @return string 107 */ 108 public static function getLossyUrl($url) 109 { 110 static $countries; 111 if (!isset($countries)) { 112 /** @var RegionDataProvider $regionDataProvider */ 113 $regionDataProvider = StaticContainer::get('Piwik\Intl\Data\Provider\RegionDataProvider'); 114 $countries = implode('|', array_keys($regionDataProvider->getCountryList(true))); 115 } 116 117 return preg_replace( 118 array( 119 '/^(w+[0-9]*|search)\./', 120 '/(^|\.)m\./', 121 '/(\.(com|org|net|co|it|edu))?\.(' . $countries . ')(\/|$)/', 122 '/(^|\.)(' . $countries . ')\./', 123 ), 124 array( 125 '', 126 '$1', 127 '.{}$4', 128 '$1{}.', 129 ), 130 $url); 131 } 132 133 /** 134 * Returns true if the string passed may be a URL ie. it starts with protocol://. 135 * We don't need a precise test here because the value comes from the website 136 * tracked source code and the URLs may look very strange. 137 * 138 * @api 139 * @param string $url 140 * @return bool 141 */ 142 public static function isLookLikeUrl($url) 143 { 144 return $url && preg_match('~^(([[:alpha:]][[:alnum:]+.-]*)?:)?//(.*)$~D', $url, $matches) !== 0 145 && strlen($matches[3]) > 0 146 && !preg_match('/^(javascript:|vbscript:|data:)/i', $matches[1]) 147 ; 148 } 149 150 public static function isLookLikeSafeUrl($url) 151 { 152 if (preg_match('/[\x00-\x1F\x7F]/', $url)) { 153 return false; 154 } 155 156 if (strpos($url, ':') === false) { 157 return true; 158 } 159 160 $protocol = explode(':', $url, 2)[0]; 161 return preg_match('/^(' . implode('|', self::$validLinkProtocols) . ')$/i', $protocol); 162 } 163 164 /** 165 * Returns a URL created from the result of the [parse_url](http://php.net/manual/en/function.parse-url.php) 166 * function. 167 * 168 * Copied from the PHP comments at [http://php.net/parse_url](http://php.net/parse_url). 169 * 170 * @param array $parsed Result of [parse_url](http://php.net/manual/en/function.parse-url.php). 171 * @return false|string The URL or `false` if `$parsed` isn't an array. 172 * @api 173 */ 174 public static function getParseUrlReverse($parsed) 175 { 176 if (!is_array($parsed)) { 177 return false; 178 } 179 180 $uri = !empty($parsed['scheme']) ? $parsed['scheme'] . ':' . (!strcasecmp($parsed['scheme'], 'mailto') ? '' : '//') : ''; 181 $uri .= !empty($parsed['user']) ? $parsed['user'] . (!empty($parsed['pass']) ? ':' . $parsed['pass'] : '') . '@' : ''; 182 $uri .= !empty($parsed['host']) ? $parsed['host'] : ''; 183 $uri .= !empty($parsed['port']) ? ':' . $parsed['port'] : ''; 184 185 if (!empty($parsed['path'])) { 186 $uri .= (!strncmp($parsed['path'], '/', 1)) 187 ? $parsed['path'] 188 : ((!empty($uri) ? '/' : '') . $parsed['path']); 189 } 190 191 $uri .= !empty($parsed['query']) ? '?' . $parsed['query'] : ''; 192 $uri .= !empty($parsed['fragment']) ? '#' . $parsed['fragment'] : ''; 193 return $uri; 194 } 195 196 /** 197 * Returns a URL query string as an array. 198 * 199 * @param string $urlQuery The query string, eg, `'?param1=value1¶m2=value2'`. 200 * @return array eg, `array('param1' => 'value1', 'param2' => 'value2')` 201 * @api 202 */ 203 public static function getArrayFromQueryString($urlQuery) 204 { 205 if (strlen($urlQuery) == 0) { 206 return array(); 207 } 208 209 // TODO: this method should not use a cache. callers should instead have their own cache, configured through DI. 210 // one undesirable side effect of using a cache here, is that this method can now init the StaticContainer, which makes setting 211 // test environment for RequestCommand more complicated. 212 $cache = Cache::getTransientCache(); 213 $cacheKey = 'arrayFromQuery' . $urlQuery; 214 215 if ($cache->contains($cacheKey)) { 216 return $cache->fetch($cacheKey); 217 } 218 219 if ($urlQuery[0] == '?') { 220 $urlQuery = substr($urlQuery, 1); 221 } 222 $separator = '&'; 223 224 $urlQuery = $separator . $urlQuery; 225 // $urlQuery = str_replace(array('%20'), ' ', $urlQuery); 226 $referrerQuery = trim($urlQuery); 227 228 $values = explode($separator, $referrerQuery); 229 230 $nameToValue = array(); 231 232 foreach ($values as $value) { 233 $pos = strpos($value, '='); 234 if ($pos !== false) { 235 $name = substr($value, 0, $pos); 236 $value = substr($value, $pos + 1); 237 if ($value === false) { 238 $value = ''; 239 } 240 } else { 241 $name = $value; 242 $value = false; 243 } 244 if (!empty($name)) { 245 $name = Common::sanitizeInputValue($name); 246 } 247 if (!empty($value)) { 248 $value = Common::sanitizeInputValue($value); 249 } 250 251 // if array without indexes 252 $count = 0; 253 $tmp = preg_replace('/(\[|%5b)(]|%5d)$/i', '', $name, -1, $count); 254 if (!empty($tmp) && $count) { 255 $name = $tmp; 256 if (isset($nameToValue[$name]) == false || is_array($nameToValue[$name]) == false) { 257 $nameToValue[$name] = array(); 258 } 259 array_push($nameToValue[$name], $value); 260 } elseif (!empty($name)) { 261 $nameToValue[$name] = $value; 262 } 263 } 264 265 $cache->save($cacheKey, $nameToValue); 266 267 return $nameToValue; 268 } 269 270 /** 271 * Returns the value of a single query parameter from the supplied query string. 272 * 273 * @param string $urlQuery The query string. 274 * @param string $parameter The query parameter name to return. 275 * @return string|null Parameter value if found (can be the empty string!), null if not found. 276 * @api 277 */ 278 public static function getParameterFromQueryString($urlQuery, $parameter) 279 { 280 $nameToValue = self::getArrayFromQueryString($urlQuery); 281 282 if (isset($nameToValue[$parameter])) { 283 return $nameToValue[$parameter]; 284 } 285 return null; 286 } 287 288 /** 289 * Returns the path and query string of a URL. 290 * 291 * @param string $url The URL. 292 * @return string eg, `/test/index.php?module=CoreHome` if `$url` is `http://piwik.org/test/index.php?module=CoreHome`. 293 * @api 294 */ 295 public static function getPathAndQueryFromUrl($url) 296 { 297 $parsedUrl = parse_url($url); 298 $result = ''; 299 if (isset($parsedUrl['path'])) { 300 if (substr($parsedUrl['path'], 0, 1) == '/') { 301 $parsedUrl['path'] = substr($parsedUrl['path'], 1); 302 } 303 $result .= $parsedUrl['path']; 304 } 305 if (isset($parsedUrl['query'])) { 306 $result .= '?' . $parsedUrl['query']; 307 } 308 return $result; 309 } 310 311 /** 312 * Returns the query part from any valid url and adds additional parameters to the query part if needed. 313 * 314 * @param string $url Any url eg `"http://example.com/piwik/?foo=bar"` 315 * @param array $additionalParamsToAdd If not empty the given parameters will be added to the query. 316 * 317 * @return string eg. `"foo=bar&foo2=bar2"` 318 * @api 319 */ 320 public static function getQueryFromUrl($url, array $additionalParamsToAdd = array()) 321 { 322 $url = @parse_url($url); 323 $query = ''; 324 325 if (!empty($url['query'])) { 326 $query .= $url['query']; 327 } 328 329 if (!empty($additionalParamsToAdd)) { 330 if (!empty($query)) { 331 $query .= '&'; 332 } 333 334 $query .= Url::getQueryStringFromParameters($additionalParamsToAdd); 335 } 336 337 return $query; 338 } 339 340 public static function getHostFromUrl($url) 341 { 342 if (!UrlHelper::isLookLikeUrl($url)) { 343 $url = "http://" . $url; 344 } 345 return parse_url($url, PHP_URL_HOST); 346 } 347} 348