1<?php
2
3declare(strict_types=1);
4
5namespace Sabre\HTTP;
6
7use DateTime;
8use InvalidArgumentException;
9
10/**
11 * A collection of useful helpers for parsing or generating various HTTP
12 * headers.
13 *
14 * @copyright Copyright (C) fruux GmbH (https://fruux.com/)
15 * @author Evert Pot (http://evertpot.com/)
16 * @license http://sabre.io/license/ Modified BSD License
17 */
18
19/**
20 * Parses a HTTP date-string.
21 *
22 * This method returns false if the date is invalid.
23 *
24 * The following formats are supported:
25 *    Sun, 06 Nov 1994 08:49:37 GMT    ; IMF-fixdate
26 *    Sunday, 06-Nov-94 08:49:37 GMT   ; obsolete RFC 850 format
27 *    Sun Nov  6 08:49:37 1994         ; ANSI C's asctime() format
28 *
29 * See:
30 *   http://tools.ietf.org/html/rfc7231#section-7.1.1.1
31 *
32 * @return bool|DateTime
33 */
34function parseDate(string $dateString)
35{
36    // Only the format is checked, valid ranges are checked by strtotime below
37    $month = '(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)';
38    $weekday = '(Monday|Tuesday|Wednesday|Thursday|Friday|Saturday|Sunday)';
39    $wkday = '(Mon|Tue|Wed|Thu|Fri|Sat|Sun)';
40    $time = '([0-1]\d|2[0-3])(\:[0-5]\d){2}';
41    $date3 = $month.' ([12]\d|3[01]| [1-9])';
42    $date2 = '(0[1-9]|[12]\d|3[01])\-'.$month.'\-\d{2}';
43    // 4-digit year cannot begin with 0 - unix timestamp begins in 1970
44    $date1 = '(0[1-9]|[12]\d|3[01]) '.$month.' [1-9]\d{3}';
45
46    // ANSI C's asctime() format
47    // 4-digit year cannot begin with 0 - unix timestamp begins in 1970
48    $asctime_date = $wkday.' '.$date3.' '.$time.' [1-9]\d{3}';
49    // RFC 850, obsoleted by RFC 1036
50    $rfc850_date = $weekday.', '.$date2.' '.$time.' GMT';
51    // RFC 822, updated by RFC 1123
52    $rfc1123_date = $wkday.', '.$date1.' '.$time.' GMT';
53    // allowed date formats by RFC 2616
54    $HTTP_date = "($rfc1123_date|$rfc850_date|$asctime_date)";
55
56    // allow for space around the string and strip it
57    $dateString = trim($dateString, ' ');
58    if (!preg_match('/^'.$HTTP_date.'$/', $dateString)) {
59        return false;
60    }
61
62    // append implicit GMT timezone to ANSI C time format
63    if (false === strpos($dateString, ' GMT')) {
64        $dateString .= ' GMT';
65    }
66
67    try {
68        return new DateTime($dateString, new \DateTimeZone('UTC'));
69    } catch (\Exception $e) {
70        return false;
71    }
72}
73
74/**
75 * Transforms a DateTime object to a valid HTTP/1.1 Date header value.
76 */
77function toDate(DateTime $dateTime): string
78{
79    // We need to clone it, as we don't want to affect the existing
80    // DateTime.
81    $dateTime = clone $dateTime;
82    $dateTime->setTimezone(new \DateTimeZone('GMT'));
83
84    return $dateTime->format('D, d M Y H:i:s \G\M\T');
85}
86
87/**
88 * This function can be used to aid with content negotiation.
89 *
90 * It takes 2 arguments, the $acceptHeaderValue, which usually comes from
91 * an Accept header, and $availableOptions, which contains an array of
92 * items that the server can support.
93 *
94 * The result of this function will be the 'best possible option'. If no
95 * best possible option could be found, null is returned.
96 *
97 * When it's null you can according to the spec either return a default, or
98 * you can choose to emit 406 Not Acceptable.
99 *
100 * The method also accepts sending 'null' for the $acceptHeaderValue,
101 * implying that no accept header was sent.
102 *
103 * @param string|null $acceptHeaderValue
104 *
105 * @return string|null
106 */
107function negotiateContentType($acceptHeaderValue, array $availableOptions)
108{
109    if (!$acceptHeaderValue) {
110        // Grabbing the first in the list.
111        return reset($availableOptions);
112    }
113
114    $proposals = array_map(
115        'Sabre\HTTP\parseMimeType',
116        explode(',', $acceptHeaderValue)
117    );
118
119    // Ensuring array keys are reset.
120    $availableOptions = array_values($availableOptions);
121
122    $options = array_map(
123        'Sabre\HTTP\parseMimeType',
124        $availableOptions
125    );
126
127    $lastQuality = 0;
128    $lastSpecificity = 0;
129    $lastOptionIndex = 0;
130    $lastChoice = null;
131
132    foreach ($proposals as $proposal) {
133        // Ignoring broken values.
134        if (null === $proposal) {
135            continue;
136        }
137
138        // If the quality is lower we don't have to bother comparing.
139        if ($proposal['quality'] < $lastQuality) {
140            continue;
141        }
142
143        foreach ($options as $optionIndex => $option) {
144            if ('*' !== $proposal['type'] && $proposal['type'] !== $option['type']) {
145                // no match on type.
146                continue;
147            }
148            if ('*' !== $proposal['subType'] && $proposal['subType'] !== $option['subType']) {
149                // no match on subtype.
150                continue;
151            }
152
153            // Any parameters appearing on the options must appear on
154            // proposals.
155            foreach ($option['parameters'] as $paramName => $paramValue) {
156                if (!array_key_exists($paramName, $proposal['parameters'])) {
157                    continue 2;
158                }
159                if ($paramValue !== $proposal['parameters'][$paramName]) {
160                    continue 2;
161                }
162            }
163
164            // If we got here, we have a match on parameters, type and
165            // subtype. We need to calculate a score for how specific the
166            // match was.
167            $specificity =
168                ('*' !== $proposal['type'] ? 20 : 0) +
169                ('*' !== $proposal['subType'] ? 10 : 0) +
170                count($option['parameters']);
171
172            // Does this entry win?
173            if (
174                ($proposal['quality'] > $lastQuality) ||
175                ($proposal['quality'] === $lastQuality && $specificity > $lastSpecificity) ||
176                ($proposal['quality'] === $lastQuality && $specificity === $lastSpecificity && $optionIndex < $lastOptionIndex)
177            ) {
178                $lastQuality = $proposal['quality'];
179                $lastSpecificity = $specificity;
180                $lastOptionIndex = $optionIndex;
181                $lastChoice = $availableOptions[$optionIndex];
182            }
183        }
184    }
185
186    return $lastChoice;
187}
188
189/**
190 * Parses the Prefer header, as defined in RFC7240.
191 *
192 * Input can be given as a single header value (string) or multiple headers
193 * (array of string).
194 *
195 * This method will return a key->value array with the various Prefer
196 * parameters.
197 *
198 * Prefer: return=minimal will result in:
199 *
200 * [ 'return' => 'minimal' ]
201 *
202 * Prefer: foo, wait=10 will result in:
203 *
204 * [ 'foo' => true, 'wait' => '10']
205 *
206 * This method also supports the formats from older drafts of RFC7240, and
207 * it will automatically map them to the new values, as the older values
208 * are still pretty common.
209 *
210 * Parameters are currently discarded. There's no known prefer value that
211 * uses them.
212 *
213 * @param string|string[] $input
214 */
215function parsePrefer($input): array
216{
217    $token = '[!#$%&\'*+\-.^_`~A-Za-z0-9]+';
218
219    // Work in progress
220    $word = '(?: [a-zA-Z0-9]+ | "[a-zA-Z0-9]*" )';
221
222    $regex = <<<REGEX
223/
224^
225(?<name> $token)      # Prefer property name
226\s*                   # Optional space
227(?: = \s*             # Prefer property value
228   (?<value> $word)
229)?
230(?: \s* ; (?: .*))?   # Prefer parameters (ignored)
231$
232/x
233REGEX;
234
235    $output = [];
236    foreach (getHeaderValues($input) as $value) {
237        if (!preg_match($regex, $value, $matches)) {
238            // Ignore
239            continue;
240        }
241
242        // Mapping old values to their new counterparts
243        switch ($matches['name']) {
244            case 'return-asynch':
245                $output['respond-async'] = true;
246                break;
247            case 'return-representation':
248                $output['return'] = 'representation';
249                break;
250            case 'return-minimal':
251                $output['return'] = 'minimal';
252                break;
253            case 'strict':
254                $output['handling'] = 'strict';
255                break;
256            case 'lenient':
257                $output['handling'] = 'lenient';
258                break;
259            default:
260                if (isset($matches['value'])) {
261                    $value = trim($matches['value'], '"');
262                } else {
263                    $value = true;
264                }
265                $output[strtolower($matches['name'])] = empty($value) ? true : $value;
266                break;
267        }
268    }
269
270    return $output;
271}
272
273/**
274 * This method splits up headers into all their individual values.
275 *
276 * A HTTP header may have more than one header, such as this:
277 *   Cache-Control: private, no-store
278 *
279 * Header values are always split with a comma.
280 *
281 * You can pass either a string, or an array. The resulting value is always
282 * an array with each spliced value.
283 *
284 * If the second headers argument is set, this value will simply be merged
285 * in. This makes it quicker to merge an old list of values with a new set.
286 *
287 * @param string|string[] $values
288 * @param string|string[] $values2
289 */
290function getHeaderValues($values, $values2 = null): array
291{
292    $values = (array) $values;
293    if ($values2) {
294        $values = array_merge($values, (array) $values2);
295    }
296
297    $result = [];
298    foreach ($values as $l1) {
299        foreach (explode(',', $l1) as $l2) {
300            $result[] = trim($l2);
301        }
302    }
303
304    return $result;
305}
306
307/**
308 * Parses a mime-type and splits it into:.
309 *
310 * 1. type
311 * 2. subtype
312 * 3. quality
313 * 4. parameters
314 */
315function parseMimeType(string $str): array
316{
317    $parameters = [];
318    // If no q= parameter appears, then quality = 1.
319    $quality = 1;
320
321    $parts = explode(';', $str);
322
323    // The first part is the mime-type.
324    $mimeType = trim(array_shift($parts));
325
326    if ('*' === $mimeType) {
327        $mimeType = '*/*';
328    }
329
330    $mimeType = explode('/', $mimeType);
331    if (2 !== count($mimeType)) {
332        // Illegal value
333        var_dump($mimeType);
334        die();
335        throw new InvalidArgumentException('Not a valid mime-type: '.$str);
336    }
337    list($type, $subType) = $mimeType;
338
339    foreach ($parts as $part) {
340        $part = trim($part);
341        if (strpos($part, '=')) {
342            list($partName, $partValue) =
343                explode('=', $part, 2);
344        } else {
345            $partName = $part;
346            $partValue = null;
347        }
348
349        // The quality parameter, if it appears, also marks the end of
350        // the parameter list. Anything after the q= counts as an
351        // 'accept extension' and could introduce new semantics in
352        // content-negotation.
353        if ('q' !== $partName) {
354            $parameters[$partName] = $part;
355        } else {
356            $quality = (float) $partValue;
357            break; // Stop parsing parts
358        }
359    }
360
361    return [
362        'type' => $type,
363        'subType' => $subType,
364        'quality' => $quality,
365        'parameters' => $parameters,
366    ];
367}
368
369/**
370 * Encodes the path of a url.
371 *
372 * slashes (/) are treated as path-separators.
373 */
374function encodePath(string $path): string
375{
376    return preg_replace_callback('/([^A-Za-z0-9_\-\.~\(\)\/:@])/', function ($match) {
377        return '%'.sprintf('%02x', ord($match[0]));
378    }, $path);
379}
380
381/**
382 * Encodes a 1 segment of a path.
383 *
384 * Slashes are considered part of the name, and are encoded as %2f
385 */
386function encodePathSegment(string $pathSegment): string
387{
388    return preg_replace_callback('/([^A-Za-z0-9_\-\.~\(\):@])/', function ($match) {
389        return '%'.sprintf('%02x', ord($match[0]));
390    }, $pathSegment);
391}
392
393/**
394 * Decodes a url-encoded path.
395 */
396function decodePath(string $path): string
397{
398    return decodePathSegment($path);
399}
400
401/**
402 * Decodes a url-encoded path segment.
403 */
404function decodePathSegment(string $path): string
405{
406    $path = rawurldecode($path);
407    $encoding = mb_detect_encoding($path, ['UTF-8', 'ISO-8859-1']);
408
409    switch ($encoding) {
410        case 'ISO-8859-1':
411            $path = utf8_encode($path);
412    }
413
414    return $path;
415}
416