1<?php
2/**
3 * Matomo - free/libre analytics platform
4 *
5 * @link https://matomo.org
6 * @license http://www.gnu.org/licenses/gpl-3.0.html GPL v3 or later
7 *
8 */
9namespace Piwik;
10
11use Piwik\Container\StaticContainer;
12use Piwik\Intl\Data\Provider\RegionDataProvider;
13
14/**
15 * Contains less commonly needed URL helper methods.
16 *
17 */
18class UrlHelper
19{
20    private static $validLinkProtocols = [
21        'http',
22        'https',
23        'tel',
24        'sms',
25        'mailto',
26        'callto',
27    ];
28
29    /**
30    * Checks if a string matches/is equal to one of the patterns/strings.
31    *
32    * @static
33    * @param $test String to test.
34    * @param $patterns Array of strings or regexs.
35    *
36    * @return true if $test matches or is equal to one of the regex/string in $patterns, false otherwise.
37    */
38    protected static function in_array_matches_regex($test, $patterns)
39    {
40        foreach($patterns as $val) {
41            if(@preg_match($val, null) === false) {
42                if( strcasecmp($val, $test) === 0 ) {
43                    return true;
44                }
45            } else {
46                if( preg_match($val, $test) === 1 ) {
47                    return true;
48                }
49            }
50        }
51        return false;
52    }
53
54    /**
55     * Converts an array of query parameter name/value mappings into a query string.
56     * Parameters that are in `$parametersToExclude` will not appear in the result.
57     *
58     * @static
59     * @param $queryParameters Array of query parameters, eg, `array('site' => '0', 'date' => '2012-01-01')`.
60     * @param $parametersToExclude Array of query parameter names that shouldn't be
61     *                             in the result query string, eg, `array('date', 'period')`.
62     * @return string A query string, eg, `"?site=0"`.
63     * @api
64     */
65    public static function getQueryStringWithExcludedParameters($queryParameters, $parametersToExclude)
66    {
67        $validQuery = '';
68        $separator = '&';
69        foreach ($queryParameters as $name => $value) {
70            // decode encoded square brackets
71            $name = str_replace(array('%5B', '%5D'), array('[', ']'), $name);
72
73            if (!self::in_array_matches_regex(strtolower($name), $parametersToExclude)) {
74                if (is_array($value)) {
75                    foreach ($value as $param) {
76                        if ($param === false) {
77                            $validQuery .= $name . '[]' . $separator;
78                        } else {
79                            $validQuery .= $name . '[]=' . $param . $separator;
80                        }
81                    }
82                } elseif ($value === false) {
83                    $validQuery .= $name . $separator;
84                } else {
85                    $validQuery .= $name . '=' . $value . $separator;
86                }
87            }
88        }
89        $validQuery = substr($validQuery, 0, -strlen($separator));
90        return $validQuery;
91    }
92
93    /**
94     * Reduce URL to more minimal form.  2 letter country codes are
95     * replaced by '{}', while other parts are simply removed.
96     *
97     * Examples:
98     *   www.example.com -> example.com
99     *   search.example.com -> example.com
100     *   m.example.com -> example.com
101     *   de.example.com -> {}.example.com
102     *   example.de -> example.{}
103     *   example.co.uk -> example.{}
104     *
105     * @param string $url
106     * @return string
107     */
108    public static function getLossyUrl($url)
109    {
110        static $countries;
111        if (!isset($countries)) {
112            /** @var RegionDataProvider $regionDataProvider */
113            $regionDataProvider = StaticContainer::get('Piwik\Intl\Data\Provider\RegionDataProvider');
114            $countries = implode('|', array_keys($regionDataProvider->getCountryList(true)));
115        }
116
117        return preg_replace(
118            array(
119                 '/^(w+[0-9]*|search)\./',
120                 '/(^|\.)m\./',
121                 '/(\.(com|org|net|co|it|edu))?\.(' . $countries . ')(\/|$)/',
122                 '/(^|\.)(' . $countries . ')\./',
123            ),
124            array(
125                 '',
126                 '$1',
127                 '.{}$4',
128                 '$1{}.',
129            ),
130            $url);
131    }
132
133    /**
134     * Returns true if the string passed may be a URL ie. it starts with protocol://.
135     * We don't need a precise test here because the value comes from the website
136     * tracked source code and the URLs may look very strange.
137     *
138     * @api
139     * @param string $url
140     * @return bool
141     */
142    public static function isLookLikeUrl($url)
143    {
144        return $url && preg_match('~^(([[:alpha:]][[:alnum:]+.-]*)?:)?//(.*)$~D', $url, $matches) !== 0
145            && strlen($matches[3]) > 0
146            && !preg_match('/^(javascript:|vbscript:|data:)/i', $matches[1])
147            ;
148    }
149
150    public static function isLookLikeSafeUrl($url)
151    {
152        if (preg_match('/[\x00-\x1F\x7F]/', $url)) {
153            return false;
154        }
155
156        if (strpos($url, ':') === false) {
157            return true;
158        }
159
160        $protocol = explode(':', $url, 2)[0];
161        return preg_match('/^(' . implode('|', self::$validLinkProtocols) . ')$/i', $protocol);
162    }
163
164    /**
165     * Returns a URL created from the result of the [parse_url](http://php.net/manual/en/function.parse-url.php)
166     * function.
167     *
168     * Copied from the PHP comments at [http://php.net/parse_url](http://php.net/parse_url).
169     *
170     * @param array $parsed Result of [parse_url](http://php.net/manual/en/function.parse-url.php).
171     * @return false|string The URL or `false` if `$parsed` isn't an array.
172     * @api
173     */
174    public static function getParseUrlReverse($parsed)
175    {
176        if (!is_array($parsed)) {
177            return false;
178        }
179
180        $uri = !empty($parsed['scheme']) ? $parsed['scheme'] . ':' . (!strcasecmp($parsed['scheme'], 'mailto') ? '' : '//') : '';
181        $uri .= !empty($parsed['user']) ? $parsed['user'] . (!empty($parsed['pass']) ? ':' . $parsed['pass'] : '') . '@' : '';
182        $uri .= !empty($parsed['host']) ? $parsed['host'] : '';
183        $uri .= !empty($parsed['port']) ? ':' . $parsed['port'] : '';
184
185        if (!empty($parsed['path'])) {
186            $uri .= (!strncmp($parsed['path'], '/', 1))
187                ? $parsed['path']
188                : ((!empty($uri) ? '/' : '') . $parsed['path']);
189        }
190
191        $uri .= !empty($parsed['query']) ? '?' . $parsed['query'] : '';
192        $uri .= !empty($parsed['fragment']) ? '#' . $parsed['fragment'] : '';
193        return $uri;
194    }
195
196    /**
197     * Returns a URL query string as an array.
198     *
199     * @param string $urlQuery The query string, eg, `'?param1=value1&param2=value2'`.
200     * @return array eg, `array('param1' => 'value1', 'param2' => 'value2')`
201     * @api
202     */
203    public static function getArrayFromQueryString($urlQuery)
204    {
205        if (strlen($urlQuery) == 0) {
206            return array();
207        }
208
209        // TODO: this method should not use a cache. callers should instead have their own cache, configured through DI.
210        //       one undesirable side effect of using a cache here, is that this method can now init the StaticContainer, which makes setting
211        //       test environment for RequestCommand more complicated.
212        $cache    = Cache::getTransientCache();
213        $cacheKey = 'arrayFromQuery' . $urlQuery;
214
215        if ($cache->contains($cacheKey)) {
216            return $cache->fetch($cacheKey);
217        }
218
219        if ($urlQuery[0] == '?') {
220            $urlQuery = substr($urlQuery, 1);
221        }
222        $separator = '&';
223
224        $urlQuery = $separator . $urlQuery;
225        //		$urlQuery = str_replace(array('%20'), ' ', $urlQuery);
226        $referrerQuery = trim($urlQuery);
227
228        $values = explode($separator, $referrerQuery);
229
230        $nameToValue = array();
231
232        foreach ($values as $value) {
233            $pos = strpos($value, '=');
234            if ($pos !== false) {
235                $name = substr($value, 0, $pos);
236                $value = substr($value, $pos + 1);
237                if ($value === false) {
238                    $value = '';
239                }
240            } else {
241                $name = $value;
242                $value = false;
243            }
244            if (!empty($name)) {
245                $name = Common::sanitizeInputValue($name);
246            }
247            if (!empty($value)) {
248                $value = Common::sanitizeInputValue($value);
249            }
250
251            // if array without indexes
252            $count = 0;
253            $tmp = preg_replace('/(\[|%5b)(]|%5d)$/i', '', $name, -1, $count);
254            if (!empty($tmp) && $count) {
255                $name = $tmp;
256                if (isset($nameToValue[$name]) == false || is_array($nameToValue[$name]) == false) {
257                    $nameToValue[$name] = array();
258                }
259                array_push($nameToValue[$name], $value);
260            } elseif (!empty($name)) {
261                $nameToValue[$name] = $value;
262            }
263        }
264
265        $cache->save($cacheKey, $nameToValue);
266
267        return $nameToValue;
268    }
269
270    /**
271     * Returns the value of a single query parameter from the supplied query string.
272     *
273     * @param string $urlQuery The query string.
274     * @param string $parameter The query parameter name to return.
275     * @return string|null Parameter value if found (can be the empty string!), null if not found.
276     * @api
277     */
278    public static function getParameterFromQueryString($urlQuery, $parameter)
279    {
280        $nameToValue = self::getArrayFromQueryString($urlQuery);
281
282        if (isset($nameToValue[$parameter])) {
283            return $nameToValue[$parameter];
284        }
285        return null;
286    }
287
288    /**
289     * Returns the path and query string of a URL.
290     *
291     * @param string $url The URL.
292     * @return string eg, `/test/index.php?module=CoreHome` if `$url` is `http://piwik.org/test/index.php?module=CoreHome`.
293     * @api
294     */
295    public static function getPathAndQueryFromUrl($url)
296    {
297        $parsedUrl = parse_url($url);
298        $result = '';
299        if (isset($parsedUrl['path'])) {
300            if (substr($parsedUrl['path'], 0, 1) == '/') {
301                $parsedUrl['path'] = substr($parsedUrl['path'], 1);
302            }
303            $result .= $parsedUrl['path'];
304        }
305        if (isset($parsedUrl['query'])) {
306            $result .= '?' . $parsedUrl['query'];
307        }
308        return $result;
309    }
310
311    /**
312     * Returns the query part from any valid url and adds additional parameters to the query part if needed.
313     *
314     * @param string $url    Any url eg `"http://example.com/piwik/?foo=bar"`
315     * @param array $additionalParamsToAdd    If not empty the given parameters will be added to the query.
316     *
317     * @return string eg. `"foo=bar&foo2=bar2"`
318     * @api
319     */
320    public static function getQueryFromUrl($url, array $additionalParamsToAdd = array())
321    {
322        $url = @parse_url($url);
323        $query = '';
324
325        if (!empty($url['query'])) {
326            $query .= $url['query'];
327        }
328
329        if (!empty($additionalParamsToAdd)) {
330            if (!empty($query)) {
331                $query .= '&';
332            }
333
334            $query .= Url::getQueryStringFromParameters($additionalParamsToAdd);
335        }
336
337        return $query;
338    }
339
340    public static function getHostFromUrl($url)
341    {
342        if (!UrlHelper::isLookLikeUrl($url)) {
343            $url = "http://" . $url;
344        }
345        return parse_url($url, PHP_URL_HOST);
346    }
347}
348