1<?php
2/**
3 * Net_URL2, a class representing a URL as per RFC 3986.
4 *
5 * PHP version 5
6 *
7 * LICENSE:
8 *
9 * Copyright (c) 2007-2009, Peytz & Co. A/S
10 * All rights reserved.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 *
16 *   * Redistributions of source code must retain the above copyright
17 *     notice, this list of conditions and the following disclaimer.
18 *   * Redistributions in binary form must reproduce the above copyright
19 *     notice, this list of conditions and the following disclaimer in
20 *     the documentation and/or other materials provided with the distribution.
21 *   * Neither the name of the Net_URL2 nor the names of its contributors may
22 *     be used to endorse or promote products derived from this software
23 *     without specific prior written permission.
24 *
25 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
26 * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
27 * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
29 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
33 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 *
37 * @category  Networking
38 * @package   Net_URL2
39 * @author    Christian Schmidt <schmidt@php.net>
40 * @copyright 2007-2009 Peytz & Co. A/S
41 * @license   https://spdx.org/licenses/BSD-3-Clause BSD-3-Clause
42 * @version   CVS: $Id$
43 * @link      https://tools.ietf.org/html/rfc3986
44 */
45
46/**
47 * Represents a URL as per RFC 3986.
48 *
49 * @category  Networking
50 * @package   Net_URL2
51 * @author    Christian Schmidt <schmidt@php.net>
52 * @copyright 2007-2009 Peytz & Co. A/S
53 * @license   https://spdx.org/licenses/BSD-3-Clause BSD-3-Clause
54 * @version   Release: @package_version@
55 * @link      https://pear.php.net/package/Net_URL2
56 */
57class Net_URL2
58{
59    /**
60     * Do strict parsing in resolve() (see RFC 3986, section 5.2.2). Default
61     * is true.
62     */
63    const OPTION_STRICT = 'strict';
64
65    /**
66     * Represent arrays in query using PHP's [] notation. Default is true.
67     */
68    const OPTION_USE_BRACKETS = 'use_brackets';
69
70    /**
71     * Drop zero-based integer sequences in query using PHP's [] notation. Default
72     * is true.
73     */
74    const OPTION_DROP_SEQUENCE = 'drop_sequence';
75
76    /**
77     * URL-encode query variable keys. Default is true.
78     */
79    const OPTION_ENCODE_KEYS = 'encode_keys';
80
81    /**
82     * Query variable separators when parsing the query string. Every character
83     * is considered a separator. Default is "&".
84     */
85    const OPTION_SEPARATOR_INPUT = 'input_separator';
86
87    /**
88     * Query variable separator used when generating the query string. Default
89     * is "&".
90     */
91    const OPTION_SEPARATOR_OUTPUT = 'output_separator';
92
93    /**
94     * Default options corresponds to how PHP handles $_GET.
95     */
96    private $_options = array(
97        self::OPTION_STRICT           => true,
98        self::OPTION_USE_BRACKETS     => true,
99        self::OPTION_DROP_SEQUENCE    => true,
100        self::OPTION_ENCODE_KEYS      => true,
101        self::OPTION_SEPARATOR_INPUT  => '&',
102        self::OPTION_SEPARATOR_OUTPUT => '&',
103        );
104
105    /**
106     * @var  string|bool
107     */
108    private $_scheme = false;
109
110    /**
111     * @var  string|bool
112     */
113    private $_userinfo = false;
114
115    /**
116     * @var  string|bool
117     */
118    private $_host = false;
119
120    /**
121     * @var  string|bool
122     */
123    private $_port = false;
124
125    /**
126     * @var  string
127     */
128    private $_path = '';
129
130    /**
131     * @var  string|bool
132     */
133    private $_query = false;
134
135    /**
136     * @var  string|bool
137     */
138    private $_fragment = false;
139
140    /**
141     * Constructor.
142     *
143     * @param string $url     an absolute or relative URL
144     * @param array  $options an array of OPTION_xxx constants
145     *
146     * @uses   self::parseUrl()
147     */
148    public function __construct($url, array $options = array())
149    {
150        foreach ($options as $optionName => $value) {
151            if (array_key_exists($optionName, $this->_options)) {
152                $this->_options[$optionName] = $value;
153            }
154        }
155
156        $this->parseUrl($url);
157    }
158
159    /**
160     * Magic Setter.
161     *
162     * This method will magically set the value of a private variable ($var)
163     * with the value passed as the args
164     *
165     * @param string $var The private variable to set.
166     * @param mixed  $arg An argument of any type.
167     *
168     * @return void
169     */
170    public function __set($var, $arg)
171    {
172        $method = 'set' . $var;
173        if (method_exists($this, $method)) {
174            $this->$method($arg);
175        }
176    }
177
178    /**
179     * Magic Getter.
180     *
181     * This is the magic get method to retrieve the private variable
182     * that was set by either __set() or it's setter...
183     *
184     * @param string $var The property name to retrieve.
185     *
186     * @return mixed  $this->$var Either a boolean false if the
187     *                            property is not set or the value
188     *                            of the private property.
189     */
190    public function __get($var)
191    {
192        $method = 'get' . $var;
193        if (method_exists($this, $method)) {
194            return $this->$method();
195        }
196
197        return false;
198    }
199
200    /**
201     * Returns the scheme, e.g. "http" or "urn", or false if there is no
202     * scheme specified, i.e. if this is a relative URL.
203     *
204     * @return string|bool
205     */
206    public function getScheme()
207    {
208        return $this->_scheme;
209    }
210
211    /**
212     * Sets the scheme, e.g. "http" or "urn". Specify false if there is no
213     * scheme specified, i.e. if this is a relative URL.
214     *
215     * @param string|bool $scheme e.g. "http" or "urn", or false if there is no
216     *                            scheme specified, i.e. if this is a relative
217     *                            URL
218     *
219     * @return $this
220     * @see    getScheme
221     */
222    public function setScheme($scheme)
223    {
224        $this->_scheme = $scheme;
225        return $this;
226    }
227
228    /**
229     * Returns the user part of the userinfo part (the part preceding the first
230     *  ":"), or false if there is no userinfo part.
231     *
232     * @return string|bool
233     */
234    public function getUser()
235    {
236        return $this->_userinfo !== false
237            ? preg_replace('(:.*$)', '', $this->_userinfo)
238            : false;
239    }
240
241    /**
242     * Returns the password part of the userinfo part (the part after the first
243     *  ":"), or false if there is no userinfo part (i.e. the URL does not
244     * contain "@" in front of the hostname) or the userinfo part does not
245     * contain ":".
246     *
247     * @return string|bool
248     */
249    public function getPassword()
250    {
251        return $this->_userinfo !== false
252            ? substr(strstr($this->_userinfo, ':'), 1)
253            : false;
254    }
255
256    /**
257     * Returns the userinfo part, or false if there is none, i.e. if the
258     * authority part does not contain "@".
259     *
260     * @return string|bool
261     */
262    public function getUserinfo()
263    {
264        return $this->_userinfo;
265    }
266
267    /**
268     * Sets the userinfo part. If two arguments are passed, they are combined
269     * in the userinfo part as username ":" password.
270     *
271     * @param string|bool $userinfo userinfo or username
272     * @param string|bool $password optional password, or false
273     *
274     * @return $this
275     */
276    public function setUserinfo($userinfo, $password = false)
277    {
278        if ($password !== false) {
279            $userinfo .= ':' . $password;
280        }
281
282        if ($userinfo !== false) {
283            $userinfo = $this->_encodeData($userinfo);
284        }
285
286        $this->_userinfo = $userinfo;
287        return $this;
288    }
289
290    /**
291     * Returns the host part, or false if there is no authority part, e.g.
292     * relative URLs.
293     *
294     * @return string|bool a hostname, an IP address, or false
295     */
296    public function getHost()
297    {
298        return $this->_host;
299    }
300
301    /**
302     * Sets the host part. Specify false if there is no authority part, e.g.
303     * relative URLs.
304     *
305     * @param string|bool $host a hostname, an IP address, or false
306     *
307     * @return $this
308     */
309    public function setHost($host)
310    {
311        $this->_host = $host;
312        return $this;
313    }
314
315    /**
316     * Returns the port number, or false if there is no port number specified,
317     * i.e. if the default port is to be used.
318     *
319     * @return string|bool
320     */
321    public function getPort()
322    {
323        return $this->_port;
324    }
325
326    /**
327     * Sets the port number. Specify false if there is no port number specified,
328     * i.e. if the default port is to be used.
329     *
330     * @param string|bool $port a port number, or false
331     *
332     * @return $this
333     */
334    public function setPort($port)
335    {
336        $this->_port = $port;
337        return $this;
338    }
339
340    /**
341     * Returns the authority part, i.e. [ userinfo "@" ] host [ ":" port ], or
342     * false if there is no authority.
343     *
344     * @return string|bool
345     */
346    public function getAuthority()
347    {
348        if (false === $this->_host) {
349            return false;
350        }
351
352        $authority = '';
353
354        if (strlen($this->_userinfo)) {
355            $authority .= $this->_userinfo . '@';
356        }
357
358        $authority .= $this->_host;
359
360        if ($this->_port !== false) {
361            $authority .= ':' . $this->_port;
362        }
363
364        return $authority;
365    }
366
367    /**
368     * Sets the authority part, i.e. [ userinfo "@" ] host [ ":" port ]. Specify
369     * false if there is no authority.
370     *
371     * @param string|bool $authority a hostname or an IP address, possibly
372     *                                with userinfo prefixed and port number
373     *                                appended, e.g. "foo:bar@example.org:81".
374     *
375     * @return $this
376     */
377    public function setAuthority($authority)
378    {
379        $this->_userinfo = false;
380        $this->_host     = false;
381        $this->_port     = false;
382
383        if ('' === $authority) {
384            $this->_host = $authority;
385            return $this;
386        }
387
388        if (!preg_match('(^(([^@]*)@)?(.+?)(:(\d*))?$)', $authority, $matches)) {
389            return $this;
390        }
391
392        if ($matches[1]) {
393            $this->_userinfo = $this->_encodeData($matches[2]);
394        }
395
396        $this->_host = $matches[3];
397
398        if (isset($matches[5]) && strlen($matches[5])) {
399            $this->_port = $matches[5];
400        }
401        return $this;
402    }
403
404    /**
405     * Returns the path part (possibly an empty string).
406     *
407     * @return string
408     */
409    public function getPath()
410    {
411        return $this->_path;
412    }
413
414    /**
415     * Sets the path part (possibly an empty string).
416     *
417     * @param string $path a path
418     *
419     * @return $this
420     */
421    public function setPath($path)
422    {
423        $this->_path = $path;
424        return $this;
425    }
426
427    /**
428     * Returns the query string (excluding the leading "?"), or false if "?"
429     * is not present in the URL.
430     *
431     * @return  string|bool
432     * @see     getQueryVariables
433     */
434    public function getQuery()
435    {
436        return $this->_query;
437    }
438
439    /**
440     * Sets the query string (excluding the leading "?"). Specify false if "?"
441     * is not present in the URL.
442     *
443     * @param string|bool $query a query string, e.g. "foo=1&bar=2"
444     *
445     * @return $this
446     * @see    setQueryVariables
447     */
448    public function setQuery($query)
449    {
450        $this->_query = $query;
451        return $this;
452    }
453
454    /**
455     * Returns the fragment name, or false if "#" is not present in the URL.
456     *
457     * @return string|bool
458     */
459    public function getFragment()
460    {
461        return $this->_fragment;
462    }
463
464    /**
465     * Sets the fragment name. Specify false if "#" is not present in the URL.
466     *
467     * @param string|bool $fragment a fragment excluding the leading "#", or
468     *                              false
469     *
470     * @return $this
471     */
472    public function setFragment($fragment)
473    {
474        $this->_fragment = $fragment;
475        return $this;
476    }
477
478    /**
479     * Returns the query string like an array as the variables would appear in
480     * $_GET in a PHP script. If the URL does not contain a "?", an empty array
481     * is returned.
482     *
483     * @return array
484     */
485    public function getQueryVariables()
486    {
487        $separator   = $this->getOption(self::OPTION_SEPARATOR_INPUT);
488        $encodeKeys  = $this->getOption(self::OPTION_ENCODE_KEYS);
489        $useBrackets = $this->getOption(self::OPTION_USE_BRACKETS);
490
491        $return  = array();
492
493        for ($part = strtok($this->_query, $separator);
494            strlen($part);
495            $part = strtok($separator)
496        ) {
497            list($key, $value) = explode('=', $part, 2) + array(1 => '');
498
499            if ($encodeKeys) {
500                $key = rawurldecode($key);
501            }
502            $value = rawurldecode($value);
503
504            if ($useBrackets) {
505                $return = $this->_queryArrayByKey($key, $value, $return);
506            } else {
507                if (isset($return[$key])) {
508                    $return[$key]  = (array) $return[$key];
509                    $return[$key][] = $value;
510                } else {
511                    $return[$key] = $value;
512                }
513            }
514        }
515
516        return $return;
517    }
518
519    /**
520     * Parse a single query key=value pair into an existing php array
521     *
522     * @param string $key   query-key
523     * @param string $value query-value
524     * @param array  $array of existing query variables (if any)
525     *
526     * @return mixed
527     */
528    private function _queryArrayByKey($key, $value, array $array = array())
529    {
530        if (!strlen($key)) {
531            return $array;
532        }
533
534        $offset = $this->_queryKeyBracketOffset($key);
535        if ($offset === false) {
536            $name = $key;
537        } else {
538            $name = substr($key, 0, $offset);
539        }
540
541        if (!strlen($name)) {
542            return $array;
543        }
544
545        if (!$offset) {
546            // named value
547            $array[$name] = $value;
548        } else {
549            // array
550            $brackets = substr($key, $offset);
551            if (!isset($array[$name])) {
552                $array[$name] = null;
553            }
554            $array[$name] = $this->_queryArrayByBrackets(
555                $brackets, $value, $array[$name]
556            );
557        }
558
559        return $array;
560    }
561
562    /**
563     * Parse a key-buffer to place value in array
564     *
565     * @param string $buffer to consume all keys from
566     * @param string $value  to be set/add
567     * @param array  $array  to traverse and set/add value in
568     *
569     * @throws Exception
570     * @return array
571     */
572    private function _queryArrayByBrackets($buffer, $value, array $array = null)
573    {
574        $entry = &$array;
575
576        for ($iteration = 0; strlen($buffer); $iteration++) {
577            $open = $this->_queryKeyBracketOffset($buffer);
578            if ($open !== 0) {
579                // Opening bracket [ must exist at offset 0, if not, there is
580                // no bracket to parse and the value dropped.
581                // if this happens in the first iteration, this is flawed, see
582                // as well the second exception below.
583                if ($iteration) {
584                    break;
585                }
586                // @codeCoverageIgnoreStart
587                throw new Exception(
588                    'Net_URL2 Internal Error: '. __METHOD__ .'(): ' .
589                    'Opening bracket [ must exist at offset 0'
590                );
591                // @codeCoverageIgnoreEnd
592            }
593
594            $close = strpos($buffer, ']', 1);
595            if (!$close) {
596                // this error condition should never be reached as this is a
597                // private method and bracket pairs are checked beforehand.
598                // See as well the first exception for the opening bracket.
599                // @codeCoverageIgnoreStart
600                throw new Exception(
601                    'Net_URL2 Internal Error: '. __METHOD__ .'(): ' .
602                    'Closing bracket ] must exist, not found'
603                );
604                // @codeCoverageIgnoreEnd
605            }
606
607            $index = substr($buffer, 1, $close - 1);
608            if (strlen($index)) {
609                $entry = &$entry[$index];
610            } else {
611                if (!is_array($entry)) {
612                    $entry = array();
613                }
614                $entry[] = &$new;
615                $entry = &$new;
616                unset($new);
617            }
618            $buffer = substr($buffer, $close + 1);
619        }
620
621        $entry = $value;
622
623        return $array;
624    }
625
626    /**
627     * Query-key has brackets ("...[]")
628     *
629     * @param string $key query-key
630     *
631     * @return bool|int offset of opening bracket, false if no brackets
632     */
633    private function _queryKeyBracketOffset($key)
634    {
635        if (false !== $open = strpos($key, '[')
636            and false === strpos($key, ']', $open + 1)
637        ) {
638            $open = false;
639        }
640
641        return $open;
642    }
643
644    /**
645     * Sets the query string to the specified variable in the query string.
646     *
647     * @param array $array (name => value) array
648     *
649     * @return $this
650     */
651    public function setQueryVariables(array $array)
652    {
653        if (!$array) {
654            $this->_query = false;
655        } else {
656            $this->_query = $this->buildQuery(
657                $array,
658                $this->getOption(self::OPTION_SEPARATOR_OUTPUT)
659            );
660        }
661        return $this;
662    }
663
664    /**
665     * Sets the specified variable in the query string.
666     *
667     * @param string $name  variable name
668     * @param mixed  $value variable value
669     *
670     * @return $this
671     */
672    public function setQueryVariable($name, $value)
673    {
674        $array = $this->getQueryVariables();
675        $array[$name] = $value;
676        $this->setQueryVariables($array);
677        return $this;
678    }
679
680    /**
681     * Removes the specified variable from the query string.
682     *
683     * @param string $name a query string variable, e.g. "foo" in "?foo=1"
684     *
685     * @return void
686     */
687    public function unsetQueryVariable($name)
688    {
689        $array = $this->getQueryVariables();
690        unset($array[$name]);
691        $this->setQueryVariables($array);
692    }
693
694    /**
695     * Returns a string representation of this URL.
696     *
697     * @return string
698     */
699    public function getURL()
700    {
701        // See RFC 3986, section 5.3
702        $url = '';
703
704        if ($this->_scheme !== false) {
705            $url .= $this->_scheme . ':';
706        }
707
708        $authority = $this->getAuthority();
709        if ($authority === false && strtolower($this->_scheme) === 'file') {
710            $authority = '';
711        }
712
713        $url .= $this->_buildAuthorityAndPath($authority, $this->_path);
714
715        if ($this->_query !== false) {
716            $url .= '?' . $this->_query;
717        }
718
719        if ($this->_fragment !== false) {
720            $url .= '#' . $this->_fragment;
721        }
722
723        return $url;
724    }
725
726    /**
727     * Put authority and path together, wrapping authority
728     * into proper separators/terminators.
729     *
730     * @param string|bool $authority authority
731     * @param string      $path      path
732     *
733     * @return string
734     */
735    private function _buildAuthorityAndPath($authority, $path)
736    {
737        if ($authority === false) {
738            return $path;
739        }
740
741        $terminator = ($path !== '' && $path[0] !== '/') ? '/' : '';
742
743        return '//' . $authority . $terminator . $path;
744    }
745
746    /**
747     * Returns a string representation of this URL.
748     *
749     * @return string
750     * @link https://php.net/language.oop5.magic#object.tostring
751     */
752    public function __toString()
753    {
754        return $this->getURL();
755    }
756
757    /**
758     * Returns a normalized string representation of this URL. This is useful
759     * for comparison of URLs.
760     *
761     * @return string
762     */
763    public function getNormalizedURL()
764    {
765        $url = clone $this;
766        $url->normalize();
767        return $url->getURL();
768    }
769
770    /**
771     * Normalizes the URL
772     *
773     * See RFC 3986, Section 6.  Normalization and Comparison
774     *
775     * @link https://tools.ietf.org/html/rfc3986#section-6
776     *
777     * @return void
778     */
779    public function normalize()
780    {
781        // See RFC 3986, section 6
782
783        // Scheme is case-insensitive
784        if ($this->_scheme) {
785            $this->_scheme = strtolower($this->_scheme);
786        }
787
788        // Hostname is case-insensitive
789        if ($this->_host) {
790            $this->_host = strtolower($this->_host);
791        }
792
793        // Remove default port number for known schemes (RFC 3986, section 6.2.3)
794        if ('' === $this->_port
795            || $this->_port
796            && $this->_scheme
797            && $this->_port == getservbyname($this->_scheme, 'tcp')
798        ) {
799            $this->_port = false;
800        }
801
802        // Normalize case of %XX percentage-encodings (RFC 3986, section 6.2.2.1)
803        // Normalize percentage-encoded unreserved characters (section 6.2.2.2)
804        $fields = array(&$this->_userinfo, &$this->_host, &$this->_path,
805                        &$this->_query, &$this->_fragment);
806        foreach ($fields as &$field) {
807            if ($field !== false) {
808                $field = $this->_normalize("$field");
809            }
810        }
811        unset($field);
812
813        // Path segment normalization (RFC 3986, section 6.2.2.3)
814        $this->_path = self::removeDotSegments($this->_path);
815
816        // Scheme based normalization (RFC 3986, section 6.2.3)
817        if (false !== $this->_host && '' === $this->_path) {
818            $this->_path = '/';
819        }
820
821        // path should start with '/' if there is authority (section 3.3.)
822        if (strlen($this->getAuthority())
823            && strlen($this->_path)
824            && $this->_path[0] !== '/'
825        ) {
826            $this->_path = '/' . $this->_path;
827        }
828    }
829
830    /**
831     * Normalize case of %XX percentage-encodings (RFC 3986, section 6.2.2.1)
832     * Normalize percentage-encoded unreserved characters (section 6.2.2.2)
833     *
834     * @param string|array $mixed string or array of strings to normalize
835     *
836     * @return string|array
837     * @see normalize
838     * @see _normalizeCallback()
839     */
840    private function _normalize($mixed)
841    {
842        return preg_replace_callback(
843            '((?:%[0-9a-fA-Z]{2})+)', array($this, '_normalizeCallback'),
844            $mixed
845        );
846    }
847
848    /**
849     * Callback for _normalize() of %XX percentage-encodings
850     *
851     * @param array $matches as by preg_replace_callback
852     *
853     * @return string
854     * @see normalize
855     * @see _normalize
856     * @SuppressWarnings(PHPMD.UnusedPrivateMethod)
857     */
858    private function _normalizeCallback($matches)
859    {
860        return self::urlencode(urldecode($matches[0]));
861    }
862
863    /**
864     * Returns whether this instance represents an absolute URL.
865     *
866     * @return bool
867     */
868    public function isAbsolute()
869    {
870        return (bool) $this->_scheme;
871    }
872
873    /**
874     * Returns an Net_URL2 instance representing an absolute URL relative to
875     * this URL.
876     *
877     * @param Net_URL2|string $reference relative URL
878     *
879     * @throws Exception
880     * @return $this
881     */
882    public function resolve($reference)
883    {
884        if (!$reference instanceof Net_URL2) {
885            $reference = new self($reference);
886        }
887        if (!$reference->_isFragmentOnly() && !$this->isAbsolute()) {
888            throw new Exception(
889                'Base-URL must be absolute if reference is not fragment-only'
890            );
891        }
892
893        // A non-strict parser may ignore a scheme in the reference if it is
894        // identical to the base URI's scheme.
895        if (!$this->getOption(self::OPTION_STRICT)
896            && $reference->_scheme == $this->_scheme
897        ) {
898            $reference->_scheme = false;
899        }
900
901        $target = new self('');
902        if ($reference->_scheme !== false) {
903            $target->_scheme = $reference->_scheme;
904            $target->setAuthority($reference->getAuthority());
905            $target->_path  = self::removeDotSegments($reference->_path);
906            $target->_query = $reference->_query;
907        } else {
908            $authority = $reference->getAuthority();
909            if ($authority !== false) {
910                $target->setAuthority($authority);
911                $target->_path  = self::removeDotSegments($reference->_path);
912                $target->_query = $reference->_query;
913            } else {
914                if ($reference->_path == '') {
915                    $target->_path = $this->_path;
916                    if ($reference->_query !== false) {
917                        $target->_query = $reference->_query;
918                    } else {
919                        $target->_query = $this->_query;
920                    }
921                } else {
922                    if (substr($reference->_path, 0, 1) == '/') {
923                        $target->_path = self::removeDotSegments($reference->_path);
924                    } else {
925                        // Merge paths (RFC 3986, section 5.2.3)
926                        if ($this->_host !== false && $this->_path == '') {
927                            $target->_path = '/' . $reference->_path;
928                        } else {
929                            $i = strrpos($this->_path, '/');
930                            if ($i !== false) {
931                                $target->_path = substr($this->_path, 0, $i + 1);
932                            }
933                            $target->_path .= $reference->_path;
934                        }
935                        $target->_path = self::removeDotSegments($target->_path);
936                    }
937                    $target->_query = $reference->_query;
938                }
939                $target->setAuthority($this->getAuthority());
940            }
941            $target->_scheme = $this->_scheme;
942        }
943
944        $target->_fragment = $reference->_fragment;
945
946        return $target;
947    }
948
949    /**
950     * URL is fragment-only
951     *
952     * @SuppressWarnings(PHPMD.UnusedPrivateMethod)
953     * @return bool
954     */
955    private function _isFragmentOnly()
956    {
957        return (
958            $this->_fragment !== false
959            && $this->_query === false
960            && $this->_path === ''
961            && $this->_port === false
962            && $this->_host === false
963            && $this->_userinfo === false
964            && $this->_scheme === false
965        );
966    }
967
968    /**
969     * Removes dots as described in RFC 3986, section 5.2.4, e.g.
970     * "/foo/../bar/baz" => "/bar/baz"
971     *
972     * @param string $path a path
973     *
974     * @return string a path
975     */
976    public static function removeDotSegments($path)
977    {
978        $path = (string) $path;
979        $output = '';
980
981        // Make sure not to be trapped in an infinite loop due to a bug in this
982        // method
983        $loopLimit = 256;
984        $j = 0;
985        while ('' !== $path && $j++ < $loopLimit) {
986            if (substr($path, 0, 2) === './') {
987                // Step 2.A
988                $path = substr($path, 2);
989            } elseif (substr($path, 0, 3) === '../') {
990                // Step 2.A
991                $path = substr($path, 3);
992            } elseif (substr($path, 0, 3) === '/./' || $path === '/.') {
993                // Step 2.B
994                $path = '/' . substr($path, 3);
995            } elseif (substr($path, 0, 4) === '/../' || $path === '/..') {
996                // Step 2.C
997                $path   = '/' . substr($path, 4);
998                $i      = strrpos($output, '/');
999                $output = $i === false ? '' : substr($output, 0, $i);
1000            } elseif ($path === '.' || $path === '..') {
1001                // Step 2.D
1002                $path = '';
1003            } else {
1004                // Step 2.E
1005                $i = strpos($path, '/', $path[0] === '/');
1006                if ($i === false) {
1007                    $output .= $path;
1008                    $path = '';
1009                    break;
1010                }
1011                $output .= substr($path, 0, $i);
1012                $path = substr($path, $i);
1013            }
1014        }
1015
1016        if ($path !== '') {
1017            $message = sprintf(
1018                'Unable to remove dot segments; hit loop limit %d (left: %s)',
1019                $j, var_export($path, true)
1020            );
1021            trigger_error($message, E_USER_WARNING);
1022        }
1023
1024        return $output;
1025    }
1026
1027    /**
1028     * Percent-encodes all non-alphanumeric characters except these: _ . - ~
1029     * Similar to PHP's rawurlencode(), except that it also encodes ~ in PHP
1030     * 5.2.x and earlier.
1031     *
1032     * @param string $string string to encode
1033     *
1034     * @return string
1035     */
1036    public static function urlencode($string)
1037    {
1038        $encoded = rawurlencode($string);
1039
1040        // This is only necessary in PHP < 5.3.
1041        $encoded = str_replace('%7E', '~', $encoded);
1042        return $encoded;
1043    }
1044
1045    /**
1046     * Returns a Net_URL2 instance representing the canonical URL of the
1047     * currently executing PHP script.
1048     *
1049     * @throws Exception
1050     * @return string
1051     */
1052    public static function getCanonical()
1053    {
1054        if (!isset($_SERVER['REQUEST_METHOD'])) {
1055            // ALERT - no current URL
1056            throw new Exception('Script was not called through a webserver');
1057        }
1058
1059        // Begin with a relative URL
1060        $url = new self($_SERVER['PHP_SELF']);
1061        $url->_scheme = isset($_SERVER['HTTPS']) ? 'https' : 'http';
1062        $url->_host   = $_SERVER['SERVER_NAME'];
1063        $port = $_SERVER['SERVER_PORT'];
1064        if ($url->_scheme == 'http' && $port != 80
1065            || $url->_scheme == 'https' && $port != 443
1066        ) {
1067            $url->_port = $port;
1068        }
1069        return $url;
1070    }
1071
1072    /**
1073     * Returns the URL used to retrieve the current request.
1074     *
1075     * @return  string
1076     */
1077    public static function getRequestedURL()
1078    {
1079        return self::getRequested()->getUrl();
1080    }
1081
1082    /**
1083     * Returns a Net_URL2 instance representing the URL used to retrieve the
1084     * current request.
1085     *
1086     * @throws Exception
1087     * @return $this
1088     */
1089    public static function getRequested()
1090    {
1091        if (!isset($_SERVER['REQUEST_METHOD'])) {
1092            // ALERT - no current URL
1093            throw new Exception('Script was not called through a webserver');
1094        }
1095
1096        // Begin with a relative URL
1097        $url = new self($_SERVER['REQUEST_URI']);
1098        $url->_scheme = isset($_SERVER['HTTPS']) ? 'https' : 'http';
1099        // Set host and possibly port
1100        $url->setAuthority($_SERVER['HTTP_HOST']);
1101        return $url;
1102    }
1103
1104    /**
1105     * Returns the value of the specified option.
1106     *
1107     * @param string $optionName The name of the option to retrieve
1108     *
1109     * @return mixed
1110     */
1111    public function getOption($optionName)
1112    {
1113        return isset($this->_options[$optionName])
1114            ? $this->_options[$optionName] : false;
1115    }
1116
1117    /**
1118     * A simple version of http_build_query in userland. The encoded string is
1119     * percentage encoded according to RFC 3986.
1120     *
1121     * @param array  $data      An array, which has to be converted into
1122     *                          QUERY_STRING. Anything is possible.
1123     * @param string $separator Separator {@link self::OPTION_SEPARATOR_OUTPUT}
1124     * @param string $key       For stacked values (arrays in an array).
1125     *
1126     * @return string
1127     */
1128    protected function buildQuery(array $data, $separator, $key = null)
1129    {
1130        $query = array();
1131        $drop_names = (
1132            $this->_options[self::OPTION_DROP_SEQUENCE] === true
1133            && array_keys($data) === array_keys(array_values($data))
1134        );
1135        foreach ($data as $name => $value) {
1136            if ($this->getOption(self::OPTION_ENCODE_KEYS) === true) {
1137                $name = rawurlencode($name);
1138            }
1139            if ($key !== null) {
1140                if ($this->getOption(self::OPTION_USE_BRACKETS) === true) {
1141                    $drop_names && $name = '';
1142                    $name = $key . '[' . $name . ']';
1143                } else {
1144                    $name = $key;
1145                }
1146            }
1147            if (is_array($value)) {
1148                $query[] = $this->buildQuery($value, $separator, $name);
1149            } else {
1150                $query[] = $name . '=' . rawurlencode($value);
1151            }
1152        }
1153        return implode($separator, $query);
1154    }
1155
1156    /**
1157     * This method uses a regex to parse the url into the designated parts.
1158     *
1159     * @param string $url URL
1160     *
1161     * @return void
1162     * @uses   self::$_scheme, self::setAuthority(), self::$_path, self::$_query,
1163     *         self::$_fragment
1164     * @see    __construct
1165     */
1166    protected function parseUrl($url)
1167    {
1168        // The regular expression is copied verbatim from RFC 3986, appendix B.
1169        // The expression does not validate the URL but matches any string.
1170        preg_match(
1171            '(^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?)',
1172            $url, $matches
1173        );
1174
1175        // "path" is always present (possibly as an empty string); the rest
1176        // are optional.
1177        $this->_scheme   = !empty($matches[1]) ? $matches[2] : false;
1178        $this->setAuthority(!empty($matches[3]) ? $matches[4] : false);
1179        $this->_path     = $this->_encodeData($matches[5]);
1180        $this->_query    = !empty($matches[6])
1181                           ? $this->_encodeData($matches[7])
1182                           : false
1183            ;
1184        $this->_fragment = !empty($matches[8]) ? $matches[9] : false;
1185    }
1186
1187    /**
1188     * Encode characters that might have been forgotten to encode when passing
1189     * in an URL. Applied onto Userinfo, Path and Query.
1190     *
1191     * @param string $url URL
1192     *
1193     * @return string
1194     * @see parseUrl
1195     * @see setAuthority
1196     * @link https://pear.php.net/bugs/bug.php?id=20425
1197     */
1198    private function _encodeData($url)
1199    {
1200        return preg_replace_callback(
1201            '([\x-\x20\x22\x3C\x3E\x7F-\xFF]+)',
1202            array($this, '_encodeCallback'), $url
1203        );
1204    }
1205
1206    /**
1207     * callback for encoding character data
1208     *
1209     * @param array $matches Matches
1210     *
1211     * @return string
1212     * @see _encodeData
1213     * @SuppressWarnings(PHPMD.UnusedPrivateMethod)
1214     */
1215    private function _encodeCallback(array $matches)
1216    {
1217        return rawurlencode($matches[0]);
1218    }
1219}
1220