1 /*
2  * Copyright (C) 2008 Apple Inc. All Rights Reserved.
3  *
4  * Redistribution and use in source and binary forms, with or without
5  * modification, are permitted provided that the following conditions
6  * are met:
7  * 1. Redistributions of source code must retain the above copyright
8  *    notice, this list of conditions and the following disclaimer.
9  * 2. Redistributions in binary form must reproduce the above copyright
10  *    notice, this list of conditions and the following disclaimer in the
11  *    documentation and/or other materials provided with the distribution.
12  *
13  * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21  * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24  */
25 
26 #include "config.h"
27 #include "ManifestParser.h"
28 
29 #if ENABLE(OFFLINE_WEB_APPLICATIONS)
30 
31 #include "KURL.h"
32 #include "TextResourceDecoder.h"
33 #include <wtf/unicode/CharacterNames.h>
34 
35 using namespace std;
36 
37 namespace WebCore {
38 
39 enum Mode { Explicit, Fallback, OnlineWhitelist, Unknown };
40 
parseManifest(const KURL & manifestURL,const char * data,int length,Manifest & manifest)41 bool parseManifest(const KURL& manifestURL, const char* data, int length, Manifest& manifest)
42 {
43     ASSERT(manifest.explicitURLs.isEmpty());
44     ASSERT(manifest.onlineWhitelistedURLs.isEmpty());
45     ASSERT(manifest.fallbackURLs.isEmpty());
46     manifest.allowAllNetworkRequests = false;
47 
48     Mode mode = Explicit;
49 
50     RefPtr<TextResourceDecoder> decoder = TextResourceDecoder::create("text/cache-manifest", "UTF-8");
51     String s = decoder->decode(data, length);
52     s += decoder->flush();
53 
54     // Look for the magic signature: "^\xFEFF?CACHE MANIFEST[ \t]?" (the BOM is removed by TextResourceDecoder).
55     // Example: "CACHE MANIFEST #comment" is a valid signature.
56     // Example: "CACHE MANIFEST;V2" is not.
57     if (!s.startsWith("CACHE MANIFEST"))
58         return false;
59 
60     const UChar* end = s.characters() + s.length();
61     const UChar* p = s.characters() + 14; // "CACHE MANIFEST" is 14 characters.
62 
63     if (p < end && *p != ' ' && *p != '\t' && *p != '\n' && *p != '\r')
64         return false;
65 
66     // Skip to the end of the line.
67     while (p < end && *p != '\r' && *p != '\n')
68         p++;
69 
70     while (1) {
71         // Skip whitespace
72         while (p < end && (*p == '\n' || *p == '\r' || *p == ' ' || *p == '\t'))
73             p++;
74 
75         if (p == end)
76             break;
77 
78         const UChar* lineStart = p;
79 
80         // Find the end of the line
81         while (p < end && *p != '\r' && *p != '\n')
82             p++;
83 
84         // Check if we have a comment
85         if (*lineStart == '#')
86             continue;
87 
88         // Get rid of trailing whitespace
89         const UChar* tmp = p - 1;
90         while (tmp > lineStart && (*tmp == ' ' || *tmp == '\t'))
91             tmp--;
92 
93         String line(lineStart, tmp - lineStart + 1);
94 
95         if (line == "CACHE:")
96             mode = Explicit;
97         else if (line == "FALLBACK:")
98             mode = Fallback;
99         else if (line == "NETWORK:")
100             mode = OnlineWhitelist;
101         else if (line.endsWith(":"))
102             mode = Unknown;
103         else if (mode == Unknown)
104             continue;
105         else if (mode == Explicit || mode == OnlineWhitelist) {
106             const UChar* p = line.characters();
107             const UChar* lineEnd = p + line.length();
108 
109             // Look for whitespace separating the URL from subsequent ignored tokens.
110             while (p < lineEnd && *p != '\t' && *p != ' ')
111                 p++;
112 
113             if (mode == OnlineWhitelist && p - line.characters() == 1 && *line.characters() == '*') {
114                 // Wildcard was found.
115                 manifest.allowAllNetworkRequests = true;
116                 continue;
117             }
118 
119             KURL url(manifestURL, String(line.characters(), p - line.characters()));
120 
121             if (!url.isValid())
122                 continue;
123 
124             if (url.hasFragmentIdentifier())
125                 url.removeFragmentIdentifier();
126 
127             if (!equalIgnoringCase(url.protocol(), manifestURL.protocol()))
128                 continue;
129 
130             if (mode == Explicit && manifestURL.protocolIs("https") && !protocolHostAndPortAreEqual(manifestURL, url))
131                 continue;
132 
133             if (mode == Explicit)
134                 manifest.explicitURLs.add(url.string());
135             else
136                 manifest.onlineWhitelistedURLs.append(url);
137 
138         } else if (mode == Fallback) {
139             const UChar* p = line.characters();
140             const UChar* lineEnd = p + line.length();
141 
142             // Look for whitespace separating the two URLs
143             while (p < lineEnd && *p != '\t' && *p != ' ')
144                 p++;
145 
146             if (p == lineEnd) {
147                 // There was no whitespace separating the URLs.
148                 continue;
149             }
150 
151             KURL namespaceURL(manifestURL, String(line.characters(), p - line.characters()));
152             if (!namespaceURL.isValid())
153                 continue;
154             if (namespaceURL.hasFragmentIdentifier())
155                 namespaceURL.removeFragmentIdentifier();
156 
157             if (!protocolHostAndPortAreEqual(manifestURL, namespaceURL))
158                 continue;
159 
160             // Skip whitespace separating fallback namespace from URL.
161             while (p < lineEnd && (*p == '\t' || *p == ' '))
162                 p++;
163 
164             // Look for whitespace separating the URL from subsequent ignored tokens.
165             const UChar* fallbackStart = p;
166             while (p < lineEnd && *p != '\t' && *p != ' ')
167                 p++;
168 
169             KURL fallbackURL(manifestURL, String(fallbackStart, p - fallbackStart));
170             if (!fallbackURL.isValid())
171                 continue;
172             if (fallbackURL.hasFragmentIdentifier())
173                 fallbackURL.removeFragmentIdentifier();
174 
175             if (!protocolHostAndPortAreEqual(manifestURL, fallbackURL))
176                 continue;
177 
178             manifest.fallbackURLs.append(make_pair(namespaceURL, fallbackURL));
179         } else
180             ASSERT_NOT_REACHED();
181     }
182 
183     return true;
184 }
185 
186 }
187 
188 #endif // ENABLE(OFFLINE_WEB_APPLICATIONS)
189