1<?php
2/**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
17 *
18 * @file
19 */
20
21use MediaWiki\MediaWikiServices;
22use Psr\Log\LoggerAwareInterface;
23use Psr\Log\LoggerInterface;
24use Psr\Log\NullLogger;
25
26/**
27 * This wrapper class will call out to curl (if available) or fallback
28 * to regular PHP if necessary for handling internal HTTP requests.
29 *
30 * Renamed from HttpRequest to MWHttpRequest to avoid conflict with
31 * PHP's HTTP extension.
32 */
33abstract class MWHttpRequest implements LoggerAwareInterface {
34	public const SUPPORTS_FILE_POSTS = false;
35
36	/**
37	 * @var int|string
38	 */
39	protected $timeout = 'default';
40
41	protected $content;
42	protected $headersOnly = null;
43	protected $postData = null;
44	protected $proxy = null;
45	protected $noProxy = false;
46	protected $sslVerifyHost = true;
47	protected $sslVerifyCert = true;
48	protected $caInfo = null;
49	protected $method = "GET";
50	/** @var array */
51	protected $reqHeaders = [];
52	protected $url;
53	protected $parsedUrl;
54	/** @var callable */
55	protected $callback;
56	protected $maxRedirects = 5;
57	protected $followRedirects = false;
58	protected $connectTimeout;
59
60	/**
61	 * @var CookieJar
62	 */
63	protected $cookieJar;
64
65	protected $headerList = [];
66	protected $respVersion = "0.9";
67	protected $respStatus = "200 Ok";
68	/** @var string[][] */
69	protected $respHeaders = [];
70
71	/** @var StatusValue */
72	protected $status;
73
74	/**
75	 * @var Profiler
76	 */
77	protected $profiler;
78
79	/**
80	 * @var string
81	 */
82	protected $profileName;
83
84	/**
85	 * @var LoggerInterface
86	 */
87	protected $logger;
88
89	/**
90	 * @param string $url Url to use. If protocol-relative, will be expanded to an http:// URL
91	 * @param array $options (optional) extra params to pass (see HttpRequestFactory::create())
92	 * @codingStandardsIgnoreStart
93	 * @phan-param array{timeout?:int|string,connectTimeout?:int|string,postData?:array,proxy?:string,noProxy?:bool,sslVerifyHost?:bool,sslVerifyCert?:bool,caInfo?:string,maxRedirects?:int,followRedirects?:bool,userAgent?:string,logger?:LoggerInterface,username?:string,password?:string,originalRequest?:WebRequest|array{ip:string,userAgent:string},method?:string} $options
94	 * @codingStandardsIgnoreEnd
95	 * @param string $caller The method making this request, for profiling
96	 * @param Profiler|null $profiler An instance of the profiler for profiling, or null
97	 * @throws Exception
98	 */
99	public function __construct(
100		$url, array $options = [], $caller = __METHOD__, Profiler $profiler = null
101	) {
102		$this->url = wfExpandUrl( $url, PROTO_HTTP );
103		$this->parsedUrl = wfParseUrl( $this->url );
104
105		$this->logger = $options['logger'] ?? new NullLogger();
106
107		if ( !$this->parsedUrl || !self::isValidURI( $this->url ) ) {
108			$this->status = StatusValue::newFatal( 'http-invalid-url', $url );
109		} else {
110			$this->status = StatusValue::newGood( 100 ); // continue
111		}
112
113		if ( isset( $options['timeout'] ) && $options['timeout'] != 'default' ) {
114			$this->timeout = $options['timeout'];
115		} else {
116			// The timeout should always be set by HttpRequestFactory, so this
117			// should only happen if the class was directly constructed
118			wfDeprecated( __METHOD__ . ' without the timeout option', '1.35' );
119			global $wgHTTPTimeout;
120			$this->timeout = $wgHTTPTimeout;
121		}
122		if ( isset( $options['connectTimeout'] ) && $options['connectTimeout'] != 'default' ) {
123			$this->connectTimeout = $options['connectTimeout'];
124		} else {
125			// The timeout should always be set by HttpRequestFactory, so this
126			// should only happen if the class was directly constructed
127			wfDeprecated( __METHOD__ . ' without the connectTimeout option', '1.35' );
128			global $wgHTTPConnectTimeout;
129			$this->connectTimeout = $wgHTTPConnectTimeout;
130		}
131		if ( isset( $options['userAgent'] ) ) {
132			$this->setUserAgent( $options['userAgent'] );
133		}
134		if ( isset( $options['username'] ) && isset( $options['password'] ) ) {
135			$this->setHeader(
136				'Authorization',
137				'Basic ' . base64_encode( $options['username'] . ':' . $options['password'] )
138			);
139		}
140		if ( isset( $options['originalRequest'] ) ) {
141			$this->setOriginalRequest( $options['originalRequest'] );
142		}
143
144		$this->setHeader( 'X-Request-Id', WebRequest::getRequestId() );
145
146		$members = [ "postData", "proxy", "noProxy", "sslVerifyHost", "caInfo",
147				"method", "followRedirects", "maxRedirects", "sslVerifyCert", "callback" ];
148
149		foreach ( $members as $o ) {
150			if ( isset( $options[$o] ) ) {
151				// ensure that MWHttpRequest::method is always
152				// uppercased. T38137
153				if ( $o == 'method' ) {
154					$options[$o] = strtoupper( $options[$o] );
155				}
156				$this->$o = $options[$o];
157			}
158		}
159
160		if ( $this->noProxy ) {
161			$this->proxy = ''; // noProxy takes precedence
162		}
163
164		// Profile based on what's calling us
165		$this->profiler = $profiler;
166		$this->profileName = $caller;
167	}
168
169	/**
170	 * @param LoggerInterface $logger
171	 */
172	public function setLogger( LoggerInterface $logger ) {
173		$this->logger = $logger;
174	}
175
176	/**
177	 * Simple function to test if we can make any sort of requests at all, using
178	 * cURL or fopen()
179	 * @return bool
180	 */
181	public static function canMakeRequests() {
182		return function_exists( 'curl_init' ) || wfIniGetBool( 'allow_url_fopen' );
183	}
184
185	/**
186	 * Generate a new request object
187	 * @deprecated since 1.34, use HttpRequestFactory instead
188	 * @param string $url Url to use
189	 * @param array|null $options (optional) extra params to pass (see HttpRequestFactory::create())
190	 * @param string $caller The method making this request, for profiling
191	 * @throws DomainException
192	 * @return MWHttpRequest
193	 * @see MWHttpRequest::__construct
194	 */
195	public static function factory( $url, array $options = null, $caller = __METHOD__ ) {
196		if ( $options === null ) {
197			$options = [];
198		}
199		return MediaWikiServices::getInstance()->getHttpRequestFactory()
200			->create( $url, $options, $caller );
201	}
202
203	/**
204	 * Get the body, or content, of the response to the request
205	 *
206	 * @return string
207	 */
208	public function getContent() {
209		return $this->content;
210	}
211
212	/**
213	 * Set the parameters of the request
214	 *
215	 * @param array $args
216	 * @todo overload the args param
217	 */
218	public function setData( array $args ) {
219		$this->postData = $args;
220	}
221
222	/**
223	 * Take care of setting up the proxy (do nothing if "noProxy" is set)
224	 *
225	 * @return void
226	 */
227	protected function proxySetup() {
228		// If there is an explicit proxy set and proxies are not disabled, then use it
229		if ( $this->proxy && !$this->noProxy ) {
230			return;
231		}
232
233		// Otherwise, fallback to $wgHTTPProxy if this is not a machine
234		// local URL and proxies are not disabled
235		if ( self::isLocalURL( $this->url ) || $this->noProxy ) {
236			$this->proxy = '';
237		} else {
238			global $wgHTTPProxy;
239			$this->proxy = (string)$wgHTTPProxy;
240		}
241	}
242
243	/**
244	 * Check if the URL can be served by localhost
245	 *
246	 * @param string $url Full url to check
247	 * @return bool
248	 */
249	private static function isLocalURL( $url ) {
250		global $wgCommandLineMode, $wgLocalVirtualHosts;
251
252		if ( $wgCommandLineMode ) {
253			return false;
254		}
255
256		// Extract host part
257		$matches = [];
258		if ( preg_match( '!^https?://([\w.-]+)[/:].*$!', $url, $matches ) ) {
259			$host = $matches[1];
260			// Split up dotwise
261			$domainParts = explode( '.', $host );
262			// Check if this domain or any superdomain is listed as a local virtual host
263			$domainParts = array_reverse( $domainParts );
264
265			$domain = '';
266			$countParts = count( $domainParts );
267			for ( $i = 0; $i < $countParts; $i++ ) {
268				$domainPart = $domainParts[$i];
269				if ( $i == 0 ) {
270					$domain = $domainPart;
271				} else {
272					$domain = $domainPart . '.' . $domain;
273				}
274
275				if ( in_array( $domain, $wgLocalVirtualHosts ) ) {
276					return true;
277				}
278			}
279		}
280
281		return false;
282	}
283
284	/**
285	 * Set the user agent
286	 * @param string $UA
287	 */
288	public function setUserAgent( $UA ) {
289		$this->setHeader( 'User-Agent', $UA );
290	}
291
292	/**
293	 * Set an arbitrary header
294	 * @param string $name
295	 * @param string $value
296	 */
297	public function setHeader( $name, $value ) {
298		// I feel like I should normalize the case here...
299		$this->reqHeaders[$name] = $value;
300	}
301
302	/**
303	 * Get an array of the headers
304	 * @return array
305	 */
306	protected function getHeaderList() {
307		$list = [];
308
309		if ( $this->cookieJar ) {
310			$this->reqHeaders['Cookie'] =
311				$this->cookieJar->serializeToHttpRequest(
312					$this->parsedUrl['path'],
313					$this->parsedUrl['host']
314				);
315		}
316
317		foreach ( $this->reqHeaders as $name => $value ) {
318			$list[] = "$name: $value";
319		}
320
321		return $list;
322	}
323
324	/**
325	 * Set a read callback to accept data read from the HTTP request.
326	 * By default, data is appended to an internal buffer which can be
327	 * retrieved through $req->getContent().
328	 *
329	 * To handle data as it comes in -- especially for large files that
330	 * would not fit in memory -- you can instead set your own callback,
331	 * in the form function($resource, $buffer) where the first parameter
332	 * is the low-level resource being read (implementation specific),
333	 * and the second parameter is the data buffer.
334	 *
335	 * You MUST return the number of bytes handled in the buffer; if fewer
336	 * bytes are reported handled than were passed to you, the HTTP fetch
337	 * will be aborted.
338	 *
339	 * @param callable|null $callback
340	 * @throws InvalidArgumentException
341	 */
342	public function setCallback( $callback ) {
343		$this->doSetCallback( $callback );
344	}
345
346	/**
347	 * Worker function for setting callbacks.  Calls can originate both internally and externally
348	 * via setCallback).  Defaults to the internal read callback if $callback is null.
349	 *
350	 * @param callable|null $callback
351	 * @throws InvalidArgumentException
352	 */
353	protected function doSetCallback( $callback ) {
354		if ( $callback === null ) {
355			$callback = [ $this, 'read' ];
356		} elseif ( !is_callable( $callback ) ) {
357			$this->status->fatal( 'http-internal-error' );
358			throw new InvalidArgumentException( __METHOD__ . ': invalid callback' );
359		}
360		$this->callback = $callback;
361	}
362
363	/**
364	 * A generic callback to read the body of the response from a remote
365	 * server.
366	 *
367	 * @param resource $fh
368	 * @param string $content
369	 * @return int
370	 * @internal
371	 */
372	public function read( $fh, $content ) {
373		$this->content .= $content;
374		return strlen( $content );
375	}
376
377	/**
378	 * Take care of whatever is necessary to perform the URI request.
379	 *
380	 * @return Status
381	 * @note currently returns Status for B/C
382	 */
383	public function execute() {
384		throw new LogicException( 'children must override this' );
385	}
386
387	protected function prepare() {
388		$this->content = "";
389
390		if ( strtoupper( $this->method ) == "HEAD" ) {
391			$this->headersOnly = true;
392		}
393
394		$this->proxySetup(); // set up any proxy as needed
395
396		if ( !$this->callback ) {
397			$this->doSetCallback( null );
398		}
399
400		if ( !isset( $this->reqHeaders['User-Agent'] ) ) {
401			$http = MediaWikiServices::getInstance()->getHttpRequestFactory();
402			$this->setUserAgent( $http->getUserAgent() );
403		}
404	}
405
406	/**
407	 * Parses the headers, including the HTTP status code and any
408	 * Set-Cookie headers.  This function expects the headers to be
409	 * found in an array in the member variable headerList.
410	 */
411	protected function parseHeader() {
412		$lastname = "";
413
414		// Failure without (valid) headers gets a response status of zero
415		if ( !$this->status->isOK() ) {
416			$this->respStatus = '0 Error';
417		}
418
419		foreach ( $this->headerList as $header ) {
420			if ( preg_match( "#^HTTP/([0-9.]+) (.*)#", $header, $match ) ) {
421				$this->respVersion = $match[1];
422				$this->respStatus = $match[2];
423			} elseif ( preg_match( "#^[ \t]#", $header ) ) {
424				$last = count( $this->respHeaders[$lastname] ) - 1;
425				$this->respHeaders[$lastname][$last] .= "\r\n$header";
426			} elseif ( preg_match( "#^([^:]*):[\t ]*(.*)#", $header, $match ) ) {
427				$this->respHeaders[strtolower( $match[1] )][] = $match[2];
428				$lastname = strtolower( $match[1] );
429			}
430		}
431
432		$this->parseCookies();
433	}
434
435	/**
436	 * Sets HTTPRequest status member to a fatal value with the error
437	 * message if the returned integer value of the status code was
438	 * not successful (1-299) or a redirect (300-399).
439	 * See RFC2616, section 10, http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
440	 * for a list of status codes.
441	 */
442	protected function setStatus() {
443		if ( !$this->respHeaders ) {
444			$this->parseHeader();
445		}
446
447		if ( ( (int)$this->respStatus > 0 && (int)$this->respStatus < 400 ) ) {
448			$this->status->setResult( true, (int)$this->respStatus );
449		} else {
450			list( $code, $message ) = explode( " ", $this->respStatus, 2 );
451			$this->status->setResult( false, (int)$this->respStatus );
452			$this->status->fatal( "http-bad-status", $code, $message );
453		}
454	}
455
456	/**
457	 * Get the integer value of the HTTP status code (e.g. 200 for "200 Ok")
458	 * (see RFC2616, section 10, http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html
459	 * for a list of status codes.)
460	 *
461	 * @return int
462	 */
463	public function getStatus() {
464		if ( !$this->respHeaders ) {
465			$this->parseHeader();
466		}
467
468		return (int)$this->respStatus;
469	}
470
471	/**
472	 * Returns true if the last status code was a redirect.
473	 *
474	 * @return bool
475	 */
476	public function isRedirect() {
477		if ( !$this->respHeaders ) {
478			$this->parseHeader();
479		}
480
481		$status = (int)$this->respStatus;
482
483		if ( $status >= 300 && $status <= 303 ) {
484			return true;
485		}
486
487		return false;
488	}
489
490	/**
491	 * Returns an associative array of response headers after the
492	 * request has been executed.  Because some headers
493	 * (e.g. Set-Cookie) can appear more than once the, each value of
494	 * the associative array is an array of the values given.
495	 * Header names are always in lowercase.
496	 *
497	 * @return array
498	 */
499	public function getResponseHeaders() {
500		if ( !$this->respHeaders ) {
501			$this->parseHeader();
502		}
503
504		return $this->respHeaders;
505	}
506
507	/**
508	 * Returns the value of the given response header.
509	 *
510	 * @param string $header case-insensitive
511	 * @return string|null
512	 */
513	public function getResponseHeader( $header ) {
514		if ( !$this->respHeaders ) {
515			$this->parseHeader();
516		}
517
518		if ( isset( $this->respHeaders[strtolower( $header )] ) ) {
519			$v = $this->respHeaders[strtolower( $header )];
520			return $v[count( $v ) - 1];
521		}
522
523		return null;
524	}
525
526	/**
527	 * Tells the MWHttpRequest object to use this pre-loaded CookieJar.
528	 *
529	 * To read response cookies from the jar, getCookieJar must be called first.
530	 *
531	 * @param CookieJar $jar
532	 */
533	public function setCookieJar( CookieJar $jar ) {
534		$this->cookieJar = $jar;
535	}
536
537	/**
538	 * Returns the cookie jar in use.
539	 *
540	 * @return CookieJar
541	 */
542	public function getCookieJar() {
543		if ( !$this->respHeaders ) {
544			$this->parseHeader();
545		}
546
547		return $this->cookieJar;
548	}
549
550	/**
551	 * Sets a cookie. Used before a request to set up any individual
552	 * cookies. Used internally after a request to parse the
553	 * Set-Cookie headers.
554	 * @see Cookie::set
555	 * @param string $name
556	 * @param string $value
557	 * @param array $attr
558	 */
559	public function setCookie( $name, $value, array $attr = [] ) {
560		if ( !$this->cookieJar ) {
561			$this->cookieJar = new CookieJar;
562		}
563
564		if ( $this->parsedUrl && !isset( $attr['domain'] ) ) {
565			$attr['domain'] = $this->parsedUrl['host'];
566		}
567
568		$this->cookieJar->setCookie( $name, $value, $attr );
569	}
570
571	/**
572	 * Parse the cookies in the response headers and store them in the cookie jar.
573	 */
574	protected function parseCookies() {
575		if ( !$this->cookieJar ) {
576			$this->cookieJar = new CookieJar;
577		}
578
579		if ( isset( $this->respHeaders['set-cookie'] ) ) {
580			$url = parse_url( $this->getFinalUrl() );
581			foreach ( $this->respHeaders['set-cookie'] as $cookie ) {
582				$this->cookieJar->parseCookieResponseHeader( $cookie, $url['host'] );
583			}
584		}
585	}
586
587	/**
588	 * Returns the final URL after all redirections.
589	 *
590	 * Relative values of the "Location" header are incorrect as
591	 * stated in RFC, however they do happen and modern browsers
592	 * support them.  This function loops backwards through all
593	 * locations in order to build the proper absolute URI - Marooned
594	 * at wikia-inc.com
595	 *
596	 * Note that the multiple Location: headers are an artifact of
597	 * CURL -- they shouldn't actually get returned this way. Rewrite
598	 * this when T31232 is taken care of (high-level redirect
599	 * handling rewrite).
600	 *
601	 * @return string
602	 */
603	public function getFinalUrl() {
604		$headers = $this->getResponseHeaders();
605
606		// return full url (fix for incorrect but handled relative location)
607		if ( isset( $headers['location'] ) ) {
608			$locations = $headers['location'];
609			$domain = '';
610			$foundRelativeURI = false;
611			$countLocations = count( $locations );
612
613			for ( $i = $countLocations - 1; $i >= 0; $i-- ) {
614				$url = parse_url( $locations[$i] );
615
616				if ( isset( $url['host'] ) ) {
617					$domain = $url['scheme'] . '://' . $url['host'];
618					break; // found correct URI (with host)
619				} else {
620					$foundRelativeURI = true;
621				}
622			}
623
624			if ( !$foundRelativeURI ) {
625				return $locations[$countLocations - 1];
626			}
627			if ( $domain ) {
628				return $domain . $locations[$countLocations - 1];
629			}
630			$url = parse_url( $this->url );
631			if ( isset( $url['host'] ) ) {
632				return $url['scheme'] . '://' . $url['host'] .
633					$locations[$countLocations - 1];
634			}
635		}
636
637		return $this->url;
638	}
639
640	/**
641	 * Returns true if the backend can follow redirects. Overridden by the
642	 * child classes.
643	 * @return bool
644	 */
645	public function canFollowRedirects() {
646		return true;
647	}
648
649	/**
650	 * Set information about the original request. This can be useful for
651	 * endpoints/API modules which act as a proxy for some service, and
652	 * throttling etc. needs to happen in that service.
653	 * Calling this will result in the X-Forwarded-For and X-Original-User-Agent
654	 * headers being set.
655	 * @param WebRequest|array $originalRequest When in array form, it's
656	 *   expected to have the keys 'ip' and 'userAgent'.
657	 * @note IP/user agent is personally identifiable information, and should
658	 *   only be set when the privacy policy of the request target is
659	 *   compatible with that of the MediaWiki installation.
660	 */
661	public function setOriginalRequest( $originalRequest ) {
662		if ( $originalRequest instanceof WebRequest ) {
663			$originalRequest = [
664				'ip' => $originalRequest->getIP(),
665				'userAgent' => $originalRequest->getHeader( 'User-Agent' ),
666			];
667		} elseif (
668			!is_array( $originalRequest )
669			|| array_diff( [ 'ip', 'userAgent' ], array_keys( $originalRequest ) )
670		) {
671			throw new InvalidArgumentException( __METHOD__ . ': $originalRequest must be a '
672				. "WebRequest or an array with 'ip' and 'userAgent' keys" );
673		}
674
675		$this->reqHeaders['X-Forwarded-For'] = $originalRequest['ip'];
676		$this->reqHeaders['X-Original-User-Agent'] = $originalRequest['userAgent'];
677	}
678
679	/**
680	 * Check that the given URI is a valid one.
681	 *
682	 * This hardcodes a small set of protocols only, because we want to
683	 * deterministically reject protocols not supported by all HTTP-transport
684	 * methods.
685	 *
686	 * "file://" specifically must not be allowed, for security reasons
687	 * (see <https://www.mediawiki.org/wiki/Special:Code/MediaWiki/r67684>).
688	 *
689	 * @todo FIXME this is wildly inaccurate and fails to actually check most stuff
690	 *
691	 * @since 1.34
692	 * @param string $uri URI to check for validity
693	 * @return bool
694	 */
695	public static function isValidURI( $uri ) {
696		return (bool)preg_match(
697			'/^https?:\/\/[^\/\s]\S*$/D',
698			$uri
699		);
700	}
701}
702