1<?php 2 3declare(strict_types=1); 4 5namespace Sabre\HTTP; 6 7use DateTime; 8use InvalidArgumentException; 9 10/** 11 * A collection of useful helpers for parsing or generating various HTTP 12 * headers. 13 * 14 * @copyright Copyright (C) fruux GmbH (https://fruux.com/) 15 * @author Evert Pot (http://evertpot.com/) 16 * @license http://sabre.io/license/ Modified BSD License 17 */ 18 19/** 20 * Parses a HTTP date-string. 21 * 22 * This method returns false if the date is invalid. 23 * 24 * The following formats are supported: 25 * Sun, 06 Nov 1994 08:49:37 GMT ; IMF-fixdate 26 * Sunday, 06-Nov-94 08:49:37 GMT ; obsolete RFC 850 format 27 * Sun Nov 6 08:49:37 1994 ; ANSI C's asctime() format 28 * 29 * See: 30 * http://tools.ietf.org/html/rfc7231#section-7.1.1.1 31 * 32 * @return bool|DateTime 33 */ 34function parseDate(string $dateString) 35{ 36 // Only the format is checked, valid ranges are checked by strtotime below 37 $month = '(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)'; 38 $weekday = '(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)'; 39 $wkday = '(Mon|Tue|Wed|Thu|Fri|Sat|Sun)'; 40 $time = '([0-1]\d|2[0-3])(\:[0-5]\d){2}'; 41 $date3 = $month.' ([12]\d|3[01]| [1-9])'; 42 $date2 = '(0[1-9]|[12]\d|3[01])\-'.$month.'\-\d{2}'; 43 // 4-digit year cannot begin with 0 - unix timestamp begins in 1970 44 $date1 = '(0[1-9]|[12]\d|3[01]) '.$month.' [1-9]\d{3}'; 45 46 // ANSI C's asctime() format 47 // 4-digit year cannot begin with 0 - unix timestamp begins in 1970 48 $asctime_date = $wkday.' '.$date3.' '.$time.' [1-9]\d{3}'; 49 // RFC 850, obsoleted by RFC 1036 50 $rfc850_date = $weekday.', '.$date2.' '.$time.' GMT'; 51 // RFC 822, updated by RFC 1123 52 $rfc1123_date = $wkday.', '.$date1.' '.$time.' GMT'; 53 // allowed date formats by RFC 2616 54 $HTTP_date = "($rfc1123_date|$rfc850_date|$asctime_date)"; 55 56 // allow for space around the string and strip it 57 $dateString = trim($dateString, ' '); 58 if (!preg_match('/^'.$HTTP_date.'$/', $dateString)) { 59 return false; 60 } 61 62 // append implicit GMT timezone to ANSI C time format 63 if (false === strpos($dateString, ' GMT')) { 64 $dateString .= ' GMT'; 65 } 66 67 try { 68 return new DateTime($dateString, new \DateTimeZone('UTC')); 69 } catch (\Exception $e) { 70 return false; 71 } 72} 73 74/** 75 * Transforms a DateTime object to a valid HTTP/1.1 Date header value. 76 */ 77function toDate(DateTime $dateTime): string 78{ 79 // We need to clone it, as we don't want to affect the existing 80 // DateTime. 81 $dateTime = clone $dateTime; 82 $dateTime->setTimezone(new \DateTimeZone('GMT')); 83 84 return $dateTime->format('D, d M Y H:i:s \G\M\T'); 85} 86 87/** 88 * This function can be used to aid with content negotiation. 89 * 90 * It takes 2 arguments, the $acceptHeaderValue, which usually comes from 91 * an Accept header, and $availableOptions, which contains an array of 92 * items that the server can support. 93 * 94 * The result of this function will be the 'best possible option'. If no 95 * best possible option could be found, null is returned. 96 * 97 * When it's null you can according to the spec either return a default, or 98 * you can choose to emit 406 Not Acceptable. 99 * 100 * The method also accepts sending 'null' for the $acceptHeaderValue, 101 * implying that no accept header was sent. 102 * 103 * @param string|null $acceptHeaderValue 104 * 105 * @return string|null 106 */ 107function negotiateContentType($acceptHeaderValue, array $availableOptions) 108{ 109 if (!$acceptHeaderValue) { 110 // Grabbing the first in the list. 111 return reset($availableOptions); 112 } 113 114 $proposals = array_map( 115 'Sabre\HTTP\parseMimeType', 116 explode(',', $acceptHeaderValue) 117 ); 118 119 // Ensuring array keys are reset. 120 $availableOptions = array_values($availableOptions); 121 122 $options = array_map( 123 'Sabre\HTTP\parseMimeType', 124 $availableOptions 125 ); 126 127 $lastQuality = 0; 128 $lastSpecificity = 0; 129 $lastOptionIndex = 0; 130 $lastChoice = null; 131 132 foreach ($proposals as $proposal) { 133 // Ignoring broken values. 134 if (null === $proposal) { 135 continue; 136 } 137 138 // If the quality is lower we don't have to bother comparing. 139 if ($proposal['quality'] < $lastQuality) { 140 continue; 141 } 142 143 foreach ($options as $optionIndex => $option) { 144 if ('*' !== $proposal['type'] && $proposal['type'] !== $option['type']) { 145 // no match on type. 146 continue; 147 } 148 if ('*' !== $proposal['subType'] && $proposal['subType'] !== $option['subType']) { 149 // no match on subtype. 150 continue; 151 } 152 153 // Any parameters appearing on the options must appear on 154 // proposals. 155 foreach ($option['parameters'] as $paramName => $paramValue) { 156 if (!array_key_exists($paramName, $proposal['parameters'])) { 157 continue 2; 158 } 159 if ($paramValue !== $proposal['parameters'][$paramName]) { 160 continue 2; 161 } 162 } 163 164 // If we got here, we have a match on parameters, type and 165 // subtype. We need to calculate a score for how specific the 166 // match was. 167 $specificity = 168 ('*' !== $proposal['type'] ? 20 : 0) + 169 ('*' !== $proposal['subType'] ? 10 : 0) + 170 count($option['parameters']); 171 172 // Does this entry win? 173 if ( 174 ($proposal['quality'] > $lastQuality) || 175 ($proposal['quality'] === $lastQuality && $specificity > $lastSpecificity) || 176 ($proposal['quality'] === $lastQuality && $specificity === $lastSpecificity && $optionIndex < $lastOptionIndex) 177 ) { 178 $lastQuality = $proposal['quality']; 179 $lastSpecificity = $specificity; 180 $lastOptionIndex = $optionIndex; 181 $lastChoice = $availableOptions[$optionIndex]; 182 } 183 } 184 } 185 186 return $lastChoice; 187} 188 189/** 190 * Parses the Prefer header, as defined in RFC7240. 191 * 192 * Input can be given as a single header value (string) or multiple headers 193 * (array of string). 194 * 195 * This method will return a key->value array with the various Prefer 196 * parameters. 197 * 198 * Prefer: return=minimal will result in: 199 * 200 * [ 'return' => 'minimal' ] 201 * 202 * Prefer: foo, wait=10 will result in: 203 * 204 * [ 'foo' => true, 'wait' => '10'] 205 * 206 * This method also supports the formats from older drafts of RFC7240, and 207 * it will automatically map them to the new values, as the older values 208 * are still pretty common. 209 * 210 * Parameters are currently discarded. There's no known prefer value that 211 * uses them. 212 * 213 * @param string|string[] $input 214 */ 215function parsePrefer($input): array 216{ 217 $token = '[!#$%&\'*+\-.^_`~A-Za-z0-9]+'; 218 219 // Work in progress 220 $word = '(?: [a-zA-Z0-9]+ | "[a-zA-Z0-9]*" )'; 221 222 $regex = <<<REGEX 223/ 224^ 225(?<name> $token) # Prefer property name 226\s* # Optional space 227(?: = \s* # Prefer property value 228 (?<value> $word) 229)? 230(?: \s* ; (?: .*))? # Prefer parameters (ignored) 231$ 232/x 233REGEX; 234 235 $output = []; 236 foreach (getHeaderValues($input) as $value) { 237 if (!preg_match($regex, $value, $matches)) { 238 // Ignore 239 continue; 240 } 241 242 // Mapping old values to their new counterparts 243 switch ($matches['name']) { 244 case 'return-asynch': 245 $output['respond-async'] = true; 246 break; 247 case 'return-representation': 248 $output['return'] = 'representation'; 249 break; 250 case 'return-minimal': 251 $output['return'] = 'minimal'; 252 break; 253 case 'strict': 254 $output['handling'] = 'strict'; 255 break; 256 case 'lenient': 257 $output['handling'] = 'lenient'; 258 break; 259 default: 260 if (isset($matches['value'])) { 261 $value = trim($matches['value'], '"'); 262 } else { 263 $value = true; 264 } 265 $output[strtolower($matches['name'])] = empty($value) ? true : $value; 266 break; 267 } 268 } 269 270 return $output; 271} 272 273/** 274 * This method splits up headers into all their individual values. 275 * 276 * A HTTP header may have more than one header, such as this: 277 * Cache-Control: private, no-store 278 * 279 * Header values are always split with a comma. 280 * 281 * You can pass either a string, or an array. The resulting value is always 282 * an array with each spliced value. 283 * 284 * If the second headers argument is set, this value will simply be merged 285 * in. This makes it quicker to merge an old list of values with a new set. 286 * 287 * @param string|string[] $values 288 * @param string|string[] $values2 289 */ 290function getHeaderValues($values, $values2 = null): array 291{ 292 $values = (array) $values; 293 if ($values2) { 294 $values = array_merge($values, (array) $values2); 295 } 296 297 $result = []; 298 foreach ($values as $l1) { 299 foreach (explode(',', $l1) as $l2) { 300 $result[] = trim($l2); 301 } 302 } 303 304 return $result; 305} 306 307/** 308 * Parses a mime-type and splits it into:. 309 * 310 * 1. type 311 * 2. subtype 312 * 3. quality 313 * 4. parameters 314 */ 315function parseMimeType(string $str): array 316{ 317 $parameters = []; 318 // If no q= parameter appears, then quality = 1. 319 $quality = 1; 320 321 $parts = explode(';', $str); 322 323 // The first part is the mime-type. 324 $mimeType = trim(array_shift($parts)); 325 326 if ('*' === $mimeType) { 327 $mimeType = '*/*'; 328 } 329 330 $mimeType = explode('/', $mimeType); 331 if (2 !== count($mimeType)) { 332 // Illegal value 333 var_dump($mimeType); 334 die(); 335 throw new InvalidArgumentException('Not a valid mime-type: '.$str); 336 } 337 list($type, $subType) = $mimeType; 338 339 foreach ($parts as $part) { 340 $part = trim($part); 341 if (strpos($part, '=')) { 342 list($partName, $partValue) = 343 explode('=', $part, 2); 344 } else { 345 $partName = $part; 346 $partValue = null; 347 } 348 349 // The quality parameter, if it appears, also marks the end of 350 // the parameter list. Anything after the q= counts as an 351 // 'accept extension' and could introduce new semantics in 352 // content-negotation. 353 if ('q' !== $partName) { 354 $parameters[$partName] = $part; 355 } else { 356 $quality = (float) $partValue; 357 break; // Stop parsing parts 358 } 359 } 360 361 return [ 362 'type' => $type, 363 'subType' => $subType, 364 'quality' => $quality, 365 'parameters' => $parameters, 366 ]; 367} 368 369/** 370 * Encodes the path of a url. 371 * 372 * slashes (/) are treated as path-separators. 373 */ 374function encodePath(string $path): string 375{ 376 return preg_replace_callback('/([^A-Za-z0-9_\-\.~\(\)\/:@])/', function ($match) { 377 return '%'.sprintf('%02x', ord($match[0])); 378 }, $path); 379} 380 381/** 382 * Encodes a 1 segment of a path. 383 * 384 * Slashes are considered part of the name, and are encoded as %2f 385 */ 386function encodePathSegment(string $pathSegment): string 387{ 388 return preg_replace_callback('/([^A-Za-z0-9_\-\.~\(\):@])/', function ($match) { 389 return '%'.sprintf('%02x', ord($match[0])); 390 }, $pathSegment); 391} 392 393/** 394 * Decodes a url-encoded path. 395 */ 396function decodePath(string $path): string 397{ 398 return decodePathSegment($path); 399} 400 401/** 402 * Decodes a url-encoded path segment. 403 */ 404function decodePathSegment(string $path): string 405{ 406 $path = rawurldecode($path); 407 $encoding = mb_detect_encoding($path, ['UTF-8', 'ISO-8859-1']); 408 409 switch ($encoding) { 410 case 'ISO-8859-1': 411 $path = utf8_encode($path); 412 } 413 414 return $path; 415} 416