1 /*
2  * Copyright (C) 2006 Alexey Proskuryakov (ap@webkit.org)
3  * Copyright (C) 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4  * Copyright (C) 2009 Torch Mobile Inc. http://www.torchmobile.com/
5  * Copyright (C) 2009 Google Inc. All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  *
11  * 1.  Redistributions of source code must retain the above copyright
12  *     notice, this list of conditions and the following disclaimer.
13  * 2.  Redistributions in binary form must reproduce the above copyright
14  *     notice, this list of conditions and the following disclaimer in the
15  *     documentation and/or other materials provided with the distribution.
16  * 3.  Neither the name of Apple Computer, Inc. ("Apple") nor the names of
17  *     its contributors may be used to endorse or promote products derived
18  *     from this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY APPLE AND ITS CONTRIBUTORS "AS IS" AND ANY
21  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
22  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
23  * DISCLAIMED. IN NO EVENT SHALL APPLE OR ITS CONTRIBUTORS BE LIABLE FOR ANY
24  * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
25  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
26  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
27  * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
29  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30  */
31 
32 #include "config.h"
33 #include "HTTPParsers.h"
34 #include "ResourceResponseBase.h"
35 
36 #include "PlatformString.h"
37 #include <wtf/text/CString.h>
38 #include <wtf/DateMath.h>
39 
40 using namespace WTF;
41 
42 namespace WebCore {
43 
44 // true if there is more to parse
skipWhiteSpace(const String & str,unsigned & pos,bool fromHttpEquivMeta)45 static inline bool skipWhiteSpace(const String& str, unsigned& pos, bool fromHttpEquivMeta)
46 {
47     unsigned len = str.length();
48 
49     if (fromHttpEquivMeta) {
50         while (pos != len && str[pos] <= ' ')
51             ++pos;
52     } else {
53         while (pos != len && (str[pos] == '\t' || str[pos] == ' '))
54             ++pos;
55     }
56 
57     return pos != len;
58 }
59 
60 // Returns true if the function can match the whole token (case insensitive).
61 // Note: Might return pos == str.length()
skipToken(const String & str,unsigned & pos,const char * token)62 static inline bool skipToken(const String& str, unsigned& pos, const char* token)
63 {
64     unsigned len = str.length();
65 
66     while (pos != len && *token) {
67         if (toASCIILower(str[pos]) != *token++)
68             return false;
69         ++pos;
70     }
71 
72     return true;
73 }
74 
contentDispositionType(const String & contentDisposition)75 ContentDispositionType contentDispositionType(const String& contentDisposition)
76 {
77     if (contentDisposition.isEmpty())
78         return ContentDispositionNone;
79 
80     // Some broken sites just send
81     // Content-Disposition: ; filename="file"
82     // screen those out here.
83     if (contentDisposition.startsWith(";"))
84         return ContentDispositionNone;
85 
86     if (contentDisposition.startsWith("inline", false))
87         return ContentDispositionInline;
88 
89     // Some broken sites just send
90     // Content-Disposition: filename="file"
91     // without a disposition token... screen those out.
92     if (contentDisposition.startsWith("filename", false))
93         return ContentDispositionNone;
94 
95     // Also in use is Content-Disposition: name="file"
96     if (contentDisposition.startsWith("name", false))
97         return ContentDispositionNone;
98 
99     // We have a content-disposition of "attachment" or unknown.
100     // RFC 2183, section 2.8 says that an unknown disposition
101     // value should be treated as "attachment"
102     return ContentDispositionAttachment;
103 }
104 
parseHTTPRefresh(const String & refresh,bool fromHttpEquivMeta,double & delay,String & url)105 bool parseHTTPRefresh(const String& refresh, bool fromHttpEquivMeta, double& delay, String& url)
106 {
107     unsigned len = refresh.length();
108     unsigned pos = 0;
109 
110     if (!skipWhiteSpace(refresh, pos, fromHttpEquivMeta))
111         return false;
112 
113     while (pos != len && refresh[pos] != ',' && refresh[pos] != ';')
114         ++pos;
115 
116     if (pos == len) { // no URL
117         url = String();
118         bool ok;
119         delay = refresh.stripWhiteSpace().toDouble(&ok);
120         return ok;
121     } else {
122         bool ok;
123         delay = refresh.left(pos).stripWhiteSpace().toDouble(&ok);
124         if (!ok)
125             return false;
126 
127         ++pos;
128         skipWhiteSpace(refresh, pos, fromHttpEquivMeta);
129         unsigned urlStartPos = pos;
130         if (refresh.find("url", urlStartPos, false) == urlStartPos) {
131             urlStartPos += 3;
132             skipWhiteSpace(refresh, urlStartPos, fromHttpEquivMeta);
133             if (refresh[urlStartPos] == '=') {
134                 ++urlStartPos;
135                 skipWhiteSpace(refresh, urlStartPos, fromHttpEquivMeta);
136             } else
137                 urlStartPos = pos;  // e.g. "Refresh: 0; url.html"
138         }
139 
140         unsigned urlEndPos = len;
141 
142         if (refresh[urlStartPos] == '"' || refresh[urlStartPos] == '\'') {
143             UChar quotationMark = refresh[urlStartPos];
144             urlStartPos++;
145             while (urlEndPos > urlStartPos) {
146                 urlEndPos--;
147                 if (refresh[urlEndPos] == quotationMark)
148                     break;
149             }
150 
151             // https://bugs.webkit.org/show_bug.cgi?id=27868
152             // Sometimes there is no closing quote for the end of the URL even though there was an opening quote.
153             // If we looped over the entire alleged URL string back to the opening quote, just go ahead and use everything
154             // after the opening quote instead.
155             if (urlEndPos == urlStartPos)
156                 urlEndPos = len;
157         }
158 
159         url = refresh.substring(urlStartPos, urlEndPos - urlStartPos).stripWhiteSpace();
160         return true;
161     }
162 }
163 
parseDate(const String & value)164 double parseDate(const String& value)
165 {
166     return parseDateFromNullTerminatedCharacters(value.utf8().data());
167 }
168 
filenameFromHTTPContentDisposition(const String & value)169 String filenameFromHTTPContentDisposition(const String& value)
170 {
171     Vector<String> keyValuePairs;
172     value.split(';', keyValuePairs);
173 
174     unsigned length = keyValuePairs.size();
175     for (unsigned i = 0; i < length; i++) {
176         size_t valueStartPos = keyValuePairs[i].find('=');
177         if (valueStartPos == notFound)
178             continue;
179 
180         String key = keyValuePairs[i].left(valueStartPos).stripWhiteSpace();
181 
182         if (key.isEmpty() || key != "filename")
183             continue;
184 
185         String value = keyValuePairs[i].substring(valueStartPos + 1).stripWhiteSpace();
186 
187         // Remove quotes if there are any
188         if (value[0] == '\"')
189             value = value.substring(1, value.length() - 2);
190 
191         return value;
192     }
193 
194     return String();
195 }
196 
extractMIMETypeFromMediaType(const String & mediaType)197 String extractMIMETypeFromMediaType(const String& mediaType)
198 {
199     Vector<UChar, 64> mimeType;
200     unsigned length = mediaType.length();
201     mimeType.reserveCapacity(length);
202     for (unsigned i = 0; i < length; i++) {
203         UChar c = mediaType[i];
204 
205         if (c == ';')
206             break;
207 
208         // While RFC 2616 does not allow it, other browsers allow multiple values in the HTTP media
209         // type header field, Content-Type. In such cases, the media type string passed here may contain
210         // the multiple values separated by commas. For now, this code ignores text after the first comma,
211         // which prevents it from simply failing to parse such types altogether. Later for better
212         // compatibility we could consider using the first or last valid MIME type instead.
213         // See https://bugs.webkit.org/show_bug.cgi?id=25352 for more discussion.
214         if (c == ',')
215             break;
216 
217         // FIXME: The following is not correct. RFC 2616 allows linear white space before and
218         // after the MIME type, but not within the MIME type itself. And linear white space
219         // includes only a few specific ASCII characters; a small subset of isSpaceOrNewline.
220         // See https://bugs.webkit.org/show_bug.cgi?id=8644 for a bug tracking part of this.
221         if (isSpaceOrNewline(c))
222             continue;
223 
224         mimeType.append(c);
225     }
226 
227     if (mimeType.size() == length)
228         return mediaType;
229     return String(mimeType.data(), mimeType.size());
230 }
231 
extractCharsetFromMediaType(const String & mediaType)232 String extractCharsetFromMediaType(const String& mediaType)
233 {
234     unsigned int pos, len;
235     findCharsetInMediaType(mediaType, pos, len);
236     return mediaType.substring(pos, len);
237 }
238 
findCharsetInMediaType(const String & mediaType,unsigned int & charsetPos,unsigned int & charsetLen,unsigned int start)239 void findCharsetInMediaType(const String& mediaType, unsigned int& charsetPos, unsigned int& charsetLen, unsigned int start)
240 {
241     charsetPos = start;
242     charsetLen = 0;
243 
244     size_t pos = start;
245     unsigned length = mediaType.length();
246 
247     while (pos < length) {
248         pos = mediaType.find("charset", pos, false);
249         if (pos == notFound || pos == 0) {
250             charsetLen = 0;
251             return;
252         }
253 
254         // is what we found a beginning of a word?
255         if (mediaType[pos-1] > ' ' && mediaType[pos-1] != ';') {
256             pos += 7;
257             continue;
258         }
259 
260         pos += 7;
261 
262         // skip whitespace
263         while (pos != length && mediaType[pos] <= ' ')
264             ++pos;
265 
266         if (mediaType[pos++] != '=') // this "charset" substring wasn't a parameter name, but there may be others
267             continue;
268 
269         while (pos != length && (mediaType[pos] <= ' ' || mediaType[pos] == '"' || mediaType[pos] == '\''))
270             ++pos;
271 
272         // we don't handle spaces within quoted parameter values, because charset names cannot have any
273         unsigned endpos = pos;
274         while (pos != length && mediaType[endpos] > ' ' && mediaType[endpos] != '"' && mediaType[endpos] != '\'' && mediaType[endpos] != ';')
275             ++endpos;
276 
277         charsetPos = pos;
278         charsetLen = endpos - pos;
279         return;
280     }
281 }
282 
parseXSSProtectionHeader(const String & header)283 XSSProtectionDisposition parseXSSProtectionHeader(const String& header)
284 {
285     String stippedHeader = header.stripWhiteSpace();
286 
287     if (stippedHeader.isEmpty())
288         return XSSProtectionEnabled;
289 
290     if (stippedHeader[0] == '0')
291         return XSSProtectionDisabled;
292 
293     unsigned length = header.length();
294     unsigned pos = 0;
295     if (stippedHeader[pos++] == '1'
296         && skipWhiteSpace(stippedHeader, pos, false)
297         && stippedHeader[pos++] == ';'
298         && skipWhiteSpace(stippedHeader, pos, false)
299         && skipToken(stippedHeader, pos, "mode")
300         && skipWhiteSpace(stippedHeader, pos, false)
301         && stippedHeader[pos++] == '='
302         && skipWhiteSpace(stippedHeader, pos, false)
303         && skipToken(stippedHeader, pos, "block")
304         && pos == length)
305         return XSSProtectionBlockEnabled;
306 
307     return XSSProtectionEnabled;
308 }
309 
extractReasonPhraseFromHTTPStatusLine(const String & statusLine)310 String extractReasonPhraseFromHTTPStatusLine(const String& statusLine)
311 {
312     size_t spacePos = statusLine.find(' ');
313     // Remove status code from the status line.
314     spacePos = statusLine.find(' ', spacePos + 1);
315     return statusLine.substring(spacePos + 1);
316 }
317 
parseRange(const String & range,long long & rangeOffset,long long & rangeEnd,long long & rangeSuffixLength)318 bool parseRange(const String& range, long long& rangeOffset, long long& rangeEnd, long long& rangeSuffixLength)
319 {
320     // The format of "Range" header is defined in RFC 2616 Section 14.35.1.
321     // http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.35.1
322     // We don't support multiple range requests.
323 
324     rangeOffset = rangeEnd = rangeSuffixLength = -1;
325 
326     // The "bytes" unit identifier should be present.
327     static const char bytesStart[] = "bytes=";
328     if (!range.startsWith(bytesStart, false))
329         return false;
330     String byteRange = range.substring(sizeof(bytesStart) - 1);
331 
332     // The '-' character needs to be present.
333     int index = byteRange.find('-');
334     if (index == -1)
335         return false;
336 
337     // If the '-' character is at the beginning, the suffix length, which specifies the last N bytes, is provided.
338     // Example:
339     //     -500
340     if (!index) {
341         String suffixLengthString = byteRange.substring(index + 1).stripWhiteSpace();
342         bool ok;
343         long long value = suffixLengthString.toInt64Strict(&ok);
344         if (ok)
345             rangeSuffixLength = value;
346         return true;
347     }
348 
349     // Otherwise, the first-byte-position and the last-byte-position are provied.
350     // Examples:
351     //     0-499
352     //     500-
353     String firstBytePosStr = byteRange.left(index).stripWhiteSpace();
354     bool ok;
355     long long firstBytePos = firstBytePosStr.toInt64Strict(&ok);
356     if (!ok)
357         return false;
358 
359     String lastBytePosStr = byteRange.substring(index + 1).stripWhiteSpace();
360     long long lastBytePos = -1;
361     if (!lastBytePosStr.isEmpty()) {
362         lastBytePos = lastBytePosStr.toInt64Strict(&ok);
363         if (!ok)
364             return false;
365     }
366 
367     if (firstBytePos < 0 || !(lastBytePos == -1 || lastBytePos >= firstBytePos))
368         return false;
369 
370     rangeOffset = firstBytePos;
371     rangeEnd = lastBytePos;
372     return true;
373 }
374 
375 }
376