1<?php 2/** 3 * This program is free software; you can redistribute it and/or modify 4 * it under the terms of the GNU General Public License as published by 5 * the Free Software Foundation; either version 2 of the License, or 6 * (at your option) any later version. 7 * 8 * This program is distributed in the hope that it will be useful, 9 * but WITHOUT ANY WARRANTY; without even the implied warranty of 10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 11 * GNU General Public License for more details. 12 * 13 * You should have received a copy of the GNU General Public License along 14 * with this program; if not, write to the Free Software Foundation, Inc., 15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 16 * http://www.gnu.org/copyleft/gpl.html 17 * 18 * @file 19 */ 20 21use MediaWiki\MediaWikiServices; 22use Psr\Log\LoggerAwareInterface; 23use Psr\Log\LoggerInterface; 24use Psr\Log\NullLogger; 25 26/** 27 * This wrapper class will call out to curl (if available) or fallback 28 * to regular PHP if necessary for handling internal HTTP requests. 29 * 30 * Renamed from HttpRequest to MWHttpRequest to avoid conflict with 31 * PHP's HTTP extension. 32 */ 33abstract class MWHttpRequest implements LoggerAwareInterface { 34 public const SUPPORTS_FILE_POSTS = false; 35 36 /** 37 * @var int|string 38 */ 39 protected $timeout = 'default'; 40 41 protected $content; 42 protected $headersOnly = null; 43 protected $postData = null; 44 protected $proxy = null; 45 protected $noProxy = false; 46 protected $sslVerifyHost = true; 47 protected $sslVerifyCert = true; 48 protected $caInfo = null; 49 protected $method = "GET"; 50 /** @var array */ 51 protected $reqHeaders = []; 52 protected $url; 53 protected $parsedUrl; 54 /** @var callable */ 55 protected $callback; 56 protected $maxRedirects = 5; 57 protected $followRedirects = false; 58 protected $connectTimeout; 59 60 /** 61 * @var CookieJar 62 */ 63 protected $cookieJar; 64 65 protected $headerList = []; 66 protected $respVersion = "0.9"; 67 protected $respStatus = "200 Ok"; 68 /** @var string[][] */ 69 protected $respHeaders = []; 70 71 /** @var StatusValue */ 72 protected $status; 73 74 /** 75 * @var Profiler 76 */ 77 protected $profiler; 78 79 /** 80 * @var string 81 */ 82 protected $profileName; 83 84 /** 85 * @var LoggerInterface 86 */ 87 protected $logger; 88 89 /** 90 * @param string $url Url to use. If protocol-relative, will be expanded to an http:// URL 91 * @param array $options (optional) extra params to pass (see HttpRequestFactory::create()) 92 * @codingStandardsIgnoreStart 93 * @phan-param array{timeout?:int|string,connectTimeout?:int|string,postData?:array,proxy?:string,noProxy?:bool,sslVerifyHost?:bool,sslVerifyCert?:bool,caInfo?:string,maxRedirects?:int,followRedirects?:bool,userAgent?:string,logger?:LoggerInterface,username?:string,password?:string,originalRequest?:WebRequest|array{ip:string,userAgent:string},method?:string} $options 94 * @codingStandardsIgnoreEnd 95 * @param string $caller The method making this request, for profiling 96 * @param Profiler|null $profiler An instance of the profiler for profiling, or null 97 * @throws Exception 98 */ 99 public function __construct( 100 $url, array $options = [], $caller = __METHOD__, Profiler $profiler = null 101 ) { 102 $this->url = wfExpandUrl( $url, PROTO_HTTP ); 103 $this->parsedUrl = wfParseUrl( $this->url ); 104 105 $this->logger = $options['logger'] ?? new NullLogger(); 106 107 if ( !$this->parsedUrl || !self::isValidURI( $this->url ) ) { 108 $this->status = StatusValue::newFatal( 'http-invalid-url', $url ); 109 } else { 110 $this->status = StatusValue::newGood( 100 ); // continue 111 } 112 113 if ( isset( $options['timeout'] ) && $options['timeout'] != 'default' ) { 114 $this->timeout = $options['timeout']; 115 } else { 116 // The timeout should always be set by HttpRequestFactory, so this 117 // should only happen if the class was directly constructed 118 wfDeprecated( __METHOD__ . ' without the timeout option', '1.35' ); 119 global $wgHTTPTimeout; 120 $this->timeout = $wgHTTPTimeout; 121 } 122 if ( isset( $options['connectTimeout'] ) && $options['connectTimeout'] != 'default' ) { 123 $this->connectTimeout = $options['connectTimeout']; 124 } else { 125 // The timeout should always be set by HttpRequestFactory, so this 126 // should only happen if the class was directly constructed 127 wfDeprecated( __METHOD__ . ' without the connectTimeout option', '1.35' ); 128 global $wgHTTPConnectTimeout; 129 $this->connectTimeout = $wgHTTPConnectTimeout; 130 } 131 if ( isset( $options['userAgent'] ) ) { 132 $this->setUserAgent( $options['userAgent'] ); 133 } 134 if ( isset( $options['username'] ) && isset( $options['password'] ) ) { 135 $this->setHeader( 136 'Authorization', 137 'Basic ' . base64_encode( $options['username'] . ':' . $options['password'] ) 138 ); 139 } 140 if ( isset( $options['originalRequest'] ) ) { 141 $this->setOriginalRequest( $options['originalRequest'] ); 142 } 143 144 $this->setHeader( 'X-Request-Id', WebRequest::getRequestId() ); 145 146 $members = [ "postData", "proxy", "noProxy", "sslVerifyHost", "caInfo", 147 "method", "followRedirects", "maxRedirects", "sslVerifyCert", "callback" ]; 148 149 foreach ( $members as $o ) { 150 if ( isset( $options[$o] ) ) { 151 // ensure that MWHttpRequest::method is always 152 // uppercased. T38137 153 if ( $o == 'method' ) { 154 $options[$o] = strtoupper( $options[$o] ); 155 } 156 $this->$o = $options[$o]; 157 } 158 } 159 160 if ( $this->noProxy ) { 161 $this->proxy = ''; // noProxy takes precedence 162 } 163 164 // Profile based on what's calling us 165 $this->profiler = $profiler; 166 $this->profileName = $caller; 167 } 168 169 /** 170 * @param LoggerInterface $logger 171 */ 172 public function setLogger( LoggerInterface $logger ) { 173 $this->logger = $logger; 174 } 175 176 /** 177 * Simple function to test if we can make any sort of requests at all, using 178 * cURL or fopen() 179 * @return bool 180 */ 181 public static function canMakeRequests() { 182 return function_exists( 'curl_init' ) || wfIniGetBool( 'allow_url_fopen' ); 183 } 184 185 /** 186 * Generate a new request object 187 * @deprecated since 1.34, use HttpRequestFactory instead 188 * @param string $url Url to use 189 * @param array|null $options (optional) extra params to pass (see HttpRequestFactory::create()) 190 * @param string $caller The method making this request, for profiling 191 * @throws DomainException 192 * @return MWHttpRequest 193 * @see MWHttpRequest::__construct 194 */ 195 public static function factory( $url, array $options = null, $caller = __METHOD__ ) { 196 if ( $options === null ) { 197 $options = []; 198 } 199 return MediaWikiServices::getInstance()->getHttpRequestFactory() 200 ->create( $url, $options, $caller ); 201 } 202 203 /** 204 * Get the body, or content, of the response to the request 205 * 206 * @return string 207 */ 208 public function getContent() { 209 return $this->content; 210 } 211 212 /** 213 * Set the parameters of the request 214 * 215 * @param array $args 216 * @todo overload the args param 217 */ 218 public function setData( array $args ) { 219 $this->postData = $args; 220 } 221 222 /** 223 * Take care of setting up the proxy (do nothing if "noProxy" is set) 224 * 225 * @return void 226 */ 227 protected function proxySetup() { 228 // If there is an explicit proxy set and proxies are not disabled, then use it 229 if ( $this->proxy && !$this->noProxy ) { 230 return; 231 } 232 233 // Otherwise, fallback to $wgHTTPProxy if this is not a machine 234 // local URL and proxies are not disabled 235 if ( self::isLocalURL( $this->url ) || $this->noProxy ) { 236 $this->proxy = ''; 237 } else { 238 global $wgHTTPProxy; 239 $this->proxy = (string)$wgHTTPProxy; 240 } 241 } 242 243 /** 244 * Check if the URL can be served by localhost 245 * 246 * @param string $url Full url to check 247 * @return bool 248 */ 249 private static function isLocalURL( $url ) { 250 global $wgCommandLineMode, $wgLocalVirtualHosts; 251 252 if ( $wgCommandLineMode ) { 253 return false; 254 } 255 256 // Extract host part 257 $matches = []; 258 if ( preg_match( '!^https?://([\w.-]+)[/:].*$!', $url, $matches ) ) { 259 $host = $matches[1]; 260 // Split up dotwise 261 $domainParts = explode( '.', $host ); 262 // Check if this domain or any superdomain is listed as a local virtual host 263 $domainParts = array_reverse( $domainParts ); 264 265 $domain = ''; 266 $countParts = count( $domainParts ); 267 for ( $i = 0; $i < $countParts; $i++ ) { 268 $domainPart = $domainParts[$i]; 269 if ( $i == 0 ) { 270 $domain = $domainPart; 271 } else { 272 $domain = $domainPart . '.' . $domain; 273 } 274 275 if ( in_array( $domain, $wgLocalVirtualHosts ) ) { 276 return true; 277 } 278 } 279 } 280 281 return false; 282 } 283 284 /** 285 * Set the user agent 286 * @param string $UA 287 */ 288 public function setUserAgent( $UA ) { 289 $this->setHeader( 'User-Agent', $UA ); 290 } 291 292 /** 293 * Set an arbitrary header 294 * @param string $name 295 * @param string $value 296 */ 297 public function setHeader( $name, $value ) { 298 // I feel like I should normalize the case here... 299 $this->reqHeaders[$name] = $value; 300 } 301 302 /** 303 * Get an array of the headers 304 * @return array 305 */ 306 protected function getHeaderList() { 307 $list = []; 308 309 if ( $this->cookieJar ) { 310 $this->reqHeaders['Cookie'] = 311 $this->cookieJar->serializeToHttpRequest( 312 $this->parsedUrl['path'], 313 $this->parsedUrl['host'] 314 ); 315 } 316 317 foreach ( $this->reqHeaders as $name => $value ) { 318 $list[] = "$name: $value"; 319 } 320 321 return $list; 322 } 323 324 /** 325 * Set a read callback to accept data read from the HTTP request. 326 * By default, data is appended to an internal buffer which can be 327 * retrieved through $req->getContent(). 328 * 329 * To handle data as it comes in -- especially for large files that 330 * would not fit in memory -- you can instead set your own callback, 331 * in the form function($resource, $buffer) where the first parameter 332 * is the low-level resource being read (implementation specific), 333 * and the second parameter is the data buffer. 334 * 335 * You MUST return the number of bytes handled in the buffer; if fewer 336 * bytes are reported handled than were passed to you, the HTTP fetch 337 * will be aborted. 338 * 339 * @param callable|null $callback 340 * @throws InvalidArgumentException 341 */ 342 public function setCallback( $callback ) { 343 $this->doSetCallback( $callback ); 344 } 345 346 /** 347 * Worker function for setting callbacks. Calls can originate both internally and externally 348 * via setCallback). Defaults to the internal read callback if $callback is null. 349 * 350 * @param callable|null $callback 351 * @throws InvalidArgumentException 352 */ 353 protected function doSetCallback( $callback ) { 354 if ( $callback === null ) { 355 $callback = [ $this, 'read' ]; 356 } elseif ( !is_callable( $callback ) ) { 357 $this->status->fatal( 'http-internal-error' ); 358 throw new InvalidArgumentException( __METHOD__ . ': invalid callback' ); 359 } 360 $this->callback = $callback; 361 } 362 363 /** 364 * A generic callback to read the body of the response from a remote 365 * server. 366 * 367 * @param resource $fh 368 * @param string $content 369 * @return int 370 * @internal 371 */ 372 public function read( $fh, $content ) { 373 $this->content .= $content; 374 return strlen( $content ); 375 } 376 377 /** 378 * Take care of whatever is necessary to perform the URI request. 379 * 380 * @return Status 381 * @note currently returns Status for B/C 382 */ 383 public function execute() { 384 throw new LogicException( 'children must override this' ); 385 } 386 387 protected function prepare() { 388 $this->content = ""; 389 390 if ( strtoupper( $this->method ) == "HEAD" ) { 391 $this->headersOnly = true; 392 } 393 394 $this->proxySetup(); // set up any proxy as needed 395 396 if ( !$this->callback ) { 397 $this->doSetCallback( null ); 398 } 399 400 if ( !isset( $this->reqHeaders['User-Agent'] ) ) { 401 $http = MediaWikiServices::getInstance()->getHttpRequestFactory(); 402 $this->setUserAgent( $http->getUserAgent() ); 403 } 404 } 405 406 /** 407 * Parses the headers, including the HTTP status code and any 408 * Set-Cookie headers. This function expects the headers to be 409 * found in an array in the member variable headerList. 410 */ 411 protected function parseHeader() { 412 $lastname = ""; 413 414 // Failure without (valid) headers gets a response status of zero 415 if ( !$this->status->isOK() ) { 416 $this->respStatus = '0 Error'; 417 } 418 419 foreach ( $this->headerList as $header ) { 420 if ( preg_match( "#^HTTP/([0-9.]+) (.*)#", $header, $match ) ) { 421 $this->respVersion = $match[1]; 422 $this->respStatus = $match[2]; 423 } elseif ( preg_match( "#^[ \t]#", $header ) ) { 424 $last = count( $this->respHeaders[$lastname] ) - 1; 425 $this->respHeaders[$lastname][$last] .= "\r\n$header"; 426 } elseif ( preg_match( "#^([^:]*):[\t ]*(.*)#", $header, $match ) ) { 427 $this->respHeaders[strtolower( $match[1] )][] = $match[2]; 428 $lastname = strtolower( $match[1] ); 429 } 430 } 431 432 $this->parseCookies(); 433 } 434 435 /** 436 * Sets HTTPRequest status member to a fatal value with the error 437 * message if the returned integer value of the status code was 438 * not successful (1-299) or a redirect (300-399). 439 * See RFC2616, section 10, http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html 440 * for a list of status codes. 441 */ 442 protected function setStatus() { 443 if ( !$this->respHeaders ) { 444 $this->parseHeader(); 445 } 446 447 if ( ( (int)$this->respStatus > 0 && (int)$this->respStatus < 400 ) ) { 448 $this->status->setResult( true, (int)$this->respStatus ); 449 } else { 450 list( $code, $message ) = explode( " ", $this->respStatus, 2 ); 451 $this->status->setResult( false, (int)$this->respStatus ); 452 $this->status->fatal( "http-bad-status", $code, $message ); 453 } 454 } 455 456 /** 457 * Get the integer value of the HTTP status code (e.g. 200 for "200 Ok") 458 * (see RFC2616, section 10, http://www.w3.org/Protocols/rfc2616/rfc2616-sec10.html 459 * for a list of status codes.) 460 * 461 * @return int 462 */ 463 public function getStatus() { 464 if ( !$this->respHeaders ) { 465 $this->parseHeader(); 466 } 467 468 return (int)$this->respStatus; 469 } 470 471 /** 472 * Returns true if the last status code was a redirect. 473 * 474 * @return bool 475 */ 476 public function isRedirect() { 477 if ( !$this->respHeaders ) { 478 $this->parseHeader(); 479 } 480 481 $status = (int)$this->respStatus; 482 483 if ( $status >= 300 && $status <= 303 ) { 484 return true; 485 } 486 487 return false; 488 } 489 490 /** 491 * Returns an associative array of response headers after the 492 * request has been executed. Because some headers 493 * (e.g. Set-Cookie) can appear more than once the, each value of 494 * the associative array is an array of the values given. 495 * Header names are always in lowercase. 496 * 497 * @return array 498 */ 499 public function getResponseHeaders() { 500 if ( !$this->respHeaders ) { 501 $this->parseHeader(); 502 } 503 504 return $this->respHeaders; 505 } 506 507 /** 508 * Returns the value of the given response header. 509 * 510 * @param string $header case-insensitive 511 * @return string|null 512 */ 513 public function getResponseHeader( $header ) { 514 if ( !$this->respHeaders ) { 515 $this->parseHeader(); 516 } 517 518 if ( isset( $this->respHeaders[strtolower( $header )] ) ) { 519 $v = $this->respHeaders[strtolower( $header )]; 520 return $v[count( $v ) - 1]; 521 } 522 523 return null; 524 } 525 526 /** 527 * Tells the MWHttpRequest object to use this pre-loaded CookieJar. 528 * 529 * To read response cookies from the jar, getCookieJar must be called first. 530 * 531 * @param CookieJar $jar 532 */ 533 public function setCookieJar( CookieJar $jar ) { 534 $this->cookieJar = $jar; 535 } 536 537 /** 538 * Returns the cookie jar in use. 539 * 540 * @return CookieJar 541 */ 542 public function getCookieJar() { 543 if ( !$this->respHeaders ) { 544 $this->parseHeader(); 545 } 546 547 return $this->cookieJar; 548 } 549 550 /** 551 * Sets a cookie. Used before a request to set up any individual 552 * cookies. Used internally after a request to parse the 553 * Set-Cookie headers. 554 * @see Cookie::set 555 * @param string $name 556 * @param string $value 557 * @param array $attr 558 */ 559 public function setCookie( $name, $value, array $attr = [] ) { 560 if ( !$this->cookieJar ) { 561 $this->cookieJar = new CookieJar; 562 } 563 564 if ( $this->parsedUrl && !isset( $attr['domain'] ) ) { 565 $attr['domain'] = $this->parsedUrl['host']; 566 } 567 568 $this->cookieJar->setCookie( $name, $value, $attr ); 569 } 570 571 /** 572 * Parse the cookies in the response headers and store them in the cookie jar. 573 */ 574 protected function parseCookies() { 575 if ( !$this->cookieJar ) { 576 $this->cookieJar = new CookieJar; 577 } 578 579 if ( isset( $this->respHeaders['set-cookie'] ) ) { 580 $url = parse_url( $this->getFinalUrl() ); 581 foreach ( $this->respHeaders['set-cookie'] as $cookie ) { 582 $this->cookieJar->parseCookieResponseHeader( $cookie, $url['host'] ); 583 } 584 } 585 } 586 587 /** 588 * Returns the final URL after all redirections. 589 * 590 * Relative values of the "Location" header are incorrect as 591 * stated in RFC, however they do happen and modern browsers 592 * support them. This function loops backwards through all 593 * locations in order to build the proper absolute URI - Marooned 594 * at wikia-inc.com 595 * 596 * Note that the multiple Location: headers are an artifact of 597 * CURL -- they shouldn't actually get returned this way. Rewrite 598 * this when T31232 is taken care of (high-level redirect 599 * handling rewrite). 600 * 601 * @return string 602 */ 603 public function getFinalUrl() { 604 $headers = $this->getResponseHeaders(); 605 606 // return full url (fix for incorrect but handled relative location) 607 if ( isset( $headers['location'] ) ) { 608 $locations = $headers['location']; 609 $domain = ''; 610 $foundRelativeURI = false; 611 $countLocations = count( $locations ); 612 613 for ( $i = $countLocations - 1; $i >= 0; $i-- ) { 614 $url = parse_url( $locations[$i] ); 615 616 if ( isset( $url['host'] ) ) { 617 $domain = $url['scheme'] . '://' . $url['host']; 618 break; // found correct URI (with host) 619 } else { 620 $foundRelativeURI = true; 621 } 622 } 623 624 if ( !$foundRelativeURI ) { 625 return $locations[$countLocations - 1]; 626 } 627 if ( $domain ) { 628 return $domain . $locations[$countLocations - 1]; 629 } 630 $url = parse_url( $this->url ); 631 if ( isset( $url['host'] ) ) { 632 return $url['scheme'] . '://' . $url['host'] . 633 $locations[$countLocations - 1]; 634 } 635 } 636 637 return $this->url; 638 } 639 640 /** 641 * Returns true if the backend can follow redirects. Overridden by the 642 * child classes. 643 * @return bool 644 */ 645 public function canFollowRedirects() { 646 return true; 647 } 648 649 /** 650 * Set information about the original request. This can be useful for 651 * endpoints/API modules which act as a proxy for some service, and 652 * throttling etc. needs to happen in that service. 653 * Calling this will result in the X-Forwarded-For and X-Original-User-Agent 654 * headers being set. 655 * @param WebRequest|array $originalRequest When in array form, it's 656 * expected to have the keys 'ip' and 'userAgent'. 657 * @note IP/user agent is personally identifiable information, and should 658 * only be set when the privacy policy of the request target is 659 * compatible with that of the MediaWiki installation. 660 */ 661 public function setOriginalRequest( $originalRequest ) { 662 if ( $originalRequest instanceof WebRequest ) { 663 $originalRequest = [ 664 'ip' => $originalRequest->getIP(), 665 'userAgent' => $originalRequest->getHeader( 'User-Agent' ), 666 ]; 667 } elseif ( 668 !is_array( $originalRequest ) 669 || array_diff( [ 'ip', 'userAgent' ], array_keys( $originalRequest ) ) 670 ) { 671 throw new InvalidArgumentException( __METHOD__ . ': $originalRequest must be a ' 672 . "WebRequest or an array with 'ip' and 'userAgent' keys" ); 673 } 674 675 $this->reqHeaders['X-Forwarded-For'] = $originalRequest['ip']; 676 $this->reqHeaders['X-Original-User-Agent'] = $originalRequest['userAgent']; 677 } 678 679 /** 680 * Check that the given URI is a valid one. 681 * 682 * This hardcodes a small set of protocols only, because we want to 683 * deterministically reject protocols not supported by all HTTP-transport 684 * methods. 685 * 686 * "file://" specifically must not be allowed, for security reasons 687 * (see <https://www.mediawiki.org/wiki/Special:Code/MediaWiki/r67684>). 688 * 689 * @todo FIXME this is wildly inaccurate and fails to actually check most stuff 690 * 691 * @since 1.34 692 * @param string $uri URI to check for validity 693 * @return bool 694 */ 695 public static function isValidURI( $uri ) { 696 return (bool)preg_match( 697 '/^https?:\/\/[^\/\s]\S*$/D', 698 $uri 699 ); 700 } 701} 702