1 #include "node_url.h"
2 #include "node_internals.h"
3 #include "base_object-inl.h"
4 #include "node_i18n.h"
5
6 #include <string>
7 #include <vector>
8 #include <stdio.h>
9 #include <cmath>
10
11 namespace node {
12
13 using v8::Array;
14 using v8::Context;
15 using v8::Function;
16 using v8::FunctionCallbackInfo;
17 using v8::HandleScope;
18 using v8::Int32;
19 using v8::Integer;
20 using v8::Isolate;
21 using v8::Local;
22 using v8::MaybeLocal;
23 using v8::NewStringType;
24 using v8::Null;
25 using v8::Object;
26 using v8::String;
27 using v8::TryCatch;
28 using v8::Undefined;
29 using v8::Value;
30
Utf8String(Isolate * isolate,const std::string & str)31 inline Local<String> Utf8String(Isolate* isolate, const std::string& str) {
32 return String::NewFromUtf8(isolate,
33 str.data(),
34 NewStringType::kNormal,
35 str.length()).ToLocalChecked();
36 }
37
38 namespace url {
39
40 namespace {
41
42 // https://url.spec.whatwg.org/#eof-code-point
43 const char kEOL = -1;
44
45 // Used in ToUSVString().
46 const char16_t kUnicodeReplacementCharacter = 0xFFFD;
47
48 // https://url.spec.whatwg.org/#concept-host
49 class URLHost {
50 public:
51 ~URLHost();
52
53 void ParseIPv4Host(const char* input, size_t length, bool* is_ipv4);
54 void ParseIPv6Host(const char* input, size_t length);
55 void ParseOpaqueHost(const char* input, size_t length);
56 void ParseHost(const char* input,
57 size_t length,
58 bool is_special,
59 bool unicode = false);
60
ParsingFailed() const61 inline bool ParsingFailed() const { return type_ == HostType::H_FAILED; }
62 std::string ToString() const;
63 // Like ToString(), but avoids a copy in exchange for invalidating `*this`.
64 std::string ToStringMove();
65
66 private:
67 enum class HostType {
68 H_FAILED,
69 H_DOMAIN,
70 H_IPV4,
71 H_IPV6,
72 H_OPAQUE,
73 };
74
75 union Value {
76 std::string domain_or_opaque;
77 uint32_t ipv4;
78 uint16_t ipv6[8];
79
~Value()80 ~Value() {}
Value()81 Value() : ipv4(0) {}
82 };
83
84 Value value_;
85 HostType type_ = HostType::H_FAILED;
86
Reset()87 inline void Reset() {
88 using string = std::string;
89 switch (type_) {
90 case HostType::H_DOMAIN:
91 case HostType::H_OPAQUE:
92 value_.domain_or_opaque.~string();
93 break;
94 default:
95 break;
96 }
97 type_ = HostType::H_FAILED;
98 }
99
100 // Setting the string members of the union with = is brittle because
101 // it relies on them being initialized to a state that requires no
102 // destruction of old data.
103 // For a long time, that worked well enough because ParseIPv6Host() happens
104 // to zero-fill `value_`, but that really is relying on standard library
105 // internals too much.
106 // These helpers are the easiest solution but we might want to consider
107 // just not forcing strings into an union.
SetOpaque(std::string && string)108 inline void SetOpaque(std::string&& string) {
109 Reset();
110 type_ = HostType::H_OPAQUE;
111 new(&value_.domain_or_opaque) std::string(std::move(string));
112 }
113
SetDomain(std::string && string)114 inline void SetDomain(std::string&& string) {
115 Reset();
116 type_ = HostType::H_DOMAIN;
117 new(&value_.domain_or_opaque) std::string(std::move(string));
118 }
119 };
120
~URLHost()121 URLHost::~URLHost() {
122 Reset();
123 }
124
125 #define ARGS(XX) \
126 XX(ARG_FLAGS) \
127 XX(ARG_PROTOCOL) \
128 XX(ARG_USERNAME) \
129 XX(ARG_PASSWORD) \
130 XX(ARG_HOST) \
131 XX(ARG_PORT) \
132 XX(ARG_PATH) \
133 XX(ARG_QUERY) \
134 XX(ARG_FRAGMENT) \
135 XX(ARG_COUNT) // This one has to be last.
136
137 #define ERR_ARGS(XX) \
138 XX(ERR_ARG_FLAGS) \
139 XX(ERR_ARG_INPUT) \
140
141 enum url_cb_args {
142 #define XX(name) name,
143 ARGS(XX)
144 #undef XX
145 };
146
147 enum url_error_cb_args {
148 #define XX(name) name,
149 ERR_ARGS(XX)
150 #undef XX
151 };
152
153 #define CHAR_TEST(bits, name, expr) \
154 template <typename T> \
155 inline bool name(const T ch) { \
156 static_assert(sizeof(ch) >= (bits) / 8, \
157 "Character must be wider than " #bits " bits"); \
158 return (expr); \
159 }
160
161 #define TWO_CHAR_STRING_TEST(bits, name, expr) \
162 template <typename T> \
163 inline bool name(const T ch1, const T ch2) { \
164 static_assert(sizeof(ch1) >= (bits) / 8, \
165 "Character must be wider than " #bits " bits"); \
166 return (expr); \
167 } \
168 template <typename T> \
169 inline bool name(const std::basic_string<T>& str) { \
170 static_assert(sizeof(str[0]) >= (bits) / 8, \
171 "Character must be wider than " #bits " bits"); \
172 return str.length() >= 2 && name(str[0], str[1]); \
173 }
174
175 // https://infra.spec.whatwg.org/#ascii-tab-or-newline
176 CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
177
178 // https://infra.spec.whatwg.org/#c0-control-or-space
179 CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
180
181 // https://infra.spec.whatwg.org/#ascii-digit
182 CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))
183
184 // https://infra.spec.whatwg.org/#ascii-hex-digit
185 CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) ||
186 (ch >= 'A' && ch <= 'F') ||
187 (ch >= 'a' && ch <= 'f')))
188
189 // https://infra.spec.whatwg.org/#ascii-alpha
190 CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') ||
191 (ch >= 'a' && ch <= 'z')))
192
193 // https://infra.spec.whatwg.org/#ascii-alphanumeric
194 CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch)))
195
196 // https://infra.spec.whatwg.org/#ascii-lowercase
197 template <typename T>
ASCIILowercase(T ch)198 inline T ASCIILowercase(T ch) {
199 return IsASCIIAlpha(ch) ? (ch | 0x20) : ch;
200 }
201
202 // https://url.spec.whatwg.org/#forbidden-host-code-point
203 CHAR_TEST(8, IsForbiddenHostCodePoint,
204 ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' ||
205 ch == ' ' || ch == '#' || ch == '%' || ch == '/' ||
206 ch == ':' || ch == '?' || ch == '@' || ch == '[' ||
207 ch == '\\' || ch == ']')
208
209 // https://url.spec.whatwg.org/#windows-drive-letter
210 TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter,
211 (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|')))
212
213 // https://url.spec.whatwg.org/#normalized-windows-drive-letter
214 TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter,
215 (IsASCIIAlpha(ch1) && ch2 == ':'))
216
217 // If a UTF-16 character is a low/trailing surrogate.
218 CHAR_TEST(16, IsUnicodeTrail, (ch & 0xFC00) == 0xDC00)
219
220 // If a UTF-16 character is a surrogate.
221 CHAR_TEST(16, IsUnicodeSurrogate, (ch & 0xF800) == 0xD800)
222
223 // If a UTF-16 surrogate is a low/trailing one.
224 CHAR_TEST(16, IsUnicodeSurrogateTrail, (ch & 0x400) != 0)
225
226 #undef CHAR_TEST
227 #undef TWO_CHAR_STRING_TEST
228
229 const char* hex[256] = {
230 "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
231 "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
232 "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
233 "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
234 "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
235 "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F",
236 "%30", "%31", "%32", "%33", "%34", "%35", "%36", "%37",
237 "%38", "%39", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
238 "%40", "%41", "%42", "%43", "%44", "%45", "%46", "%47",
239 "%48", "%49", "%4A", "%4B", "%4C", "%4D", "%4E", "%4F",
240 "%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57",
241 "%58", "%59", "%5A", "%5B", "%5C", "%5D", "%5E", "%5F",
242 "%60", "%61", "%62", "%63", "%64", "%65", "%66", "%67",
243 "%68", "%69", "%6A", "%6B", "%6C", "%6D", "%6E", "%6F",
244 "%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77",
245 "%78", "%79", "%7A", "%7B", "%7C", "%7D", "%7E", "%7F",
246 "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
247 "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
248 "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
249 "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
250 "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
251 "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
252 "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
253 "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
254 "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
255 "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
256 "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
257 "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
258 "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
259 "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
260 "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
261 "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
262 };
263
264 const uint8_t C0_CONTROL_ENCODE_SET[32] = {
265 // 00 01 02 03 04 05 06 07
266 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
267 // 08 09 0A 0B 0C 0D 0E 0F
268 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
269 // 10 11 12 13 14 15 16 17
270 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
271 // 18 19 1A 1B 1C 1D 1E 1F
272 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
273 // 20 21 22 23 24 25 26 27
274 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
275 // 28 29 2A 2B 2C 2D 2E 2F
276 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
277 // 30 31 32 33 34 35 36 37
278 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
279 // 38 39 3A 3B 3C 3D 3E 3F
280 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
281 // 40 41 42 43 44 45 46 47
282 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
283 // 48 49 4A 4B 4C 4D 4E 4F
284 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
285 // 50 51 52 53 54 55 56 57
286 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
287 // 58 59 5A 5B 5C 5D 5E 5F
288 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
289 // 60 61 62 63 64 65 66 67
290 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
291 // 68 69 6A 6B 6C 6D 6E 6F
292 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
293 // 70 71 72 73 74 75 76 77
294 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
295 // 78 79 7A 7B 7C 7D 7E 7F
296 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
297 // 80 81 82 83 84 85 86 87
298 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
299 // 88 89 8A 8B 8C 8D 8E 8F
300 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
301 // 90 91 92 93 94 95 96 97
302 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
303 // 98 99 9A 9B 9C 9D 9E 9F
304 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
305 // A0 A1 A2 A3 A4 A5 A6 A7
306 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
307 // A8 A9 AA AB AC AD AE AF
308 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
309 // B0 B1 B2 B3 B4 B5 B6 B7
310 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
311 // B8 B9 BA BB BC BD BE BF
312 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
313 // C0 C1 C2 C3 C4 C5 C6 C7
314 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
315 // C8 C9 CA CB CC CD CE CF
316 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
317 // D0 D1 D2 D3 D4 D5 D6 D7
318 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
319 // D8 D9 DA DB DC DD DE DF
320 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
321 // E0 E1 E2 E3 E4 E5 E6 E7
322 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
323 // E8 E9 EA EB EC ED EE EF
324 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
325 // F0 F1 F2 F3 F4 F5 F6 F7
326 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
327 // F8 F9 FA FB FC FD FE FF
328 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
329 };
330
331 const uint8_t FRAGMENT_ENCODE_SET[32] = {
332 // 00 01 02 03 04 05 06 07
333 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
334 // 08 09 0A 0B 0C 0D 0E 0F
335 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
336 // 10 11 12 13 14 15 16 17
337 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
338 // 18 19 1A 1B 1C 1D 1E 1F
339 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
340 // 20 21 22 23 24 25 26 27
341 0x01 | 0x00 | 0x04 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
342 // 28 29 2A 2B 2C 2D 2E 2F
343 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
344 // 30 31 32 33 34 35 36 37
345 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
346 // 38 39 3A 3B 3C 3D 3E 3F
347 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
348 // 40 41 42 43 44 45 46 47
349 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
350 // 48 49 4A 4B 4C 4D 4E 4F
351 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
352 // 50 51 52 53 54 55 56 57
353 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
354 // 58 59 5A 5B 5C 5D 5E 5F
355 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
356 // 60 61 62 63 64 65 66 67
357 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
358 // 68 69 6A 6B 6C 6D 6E 6F
359 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
360 // 70 71 72 73 74 75 76 77
361 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
362 // 78 79 7A 7B 7C 7D 7E 7F
363 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
364 // 80 81 82 83 84 85 86 87
365 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
366 // 88 89 8A 8B 8C 8D 8E 8F
367 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
368 // 90 91 92 93 94 95 96 97
369 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
370 // 98 99 9A 9B 9C 9D 9E 9F
371 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
372 // A0 A1 A2 A3 A4 A5 A6 A7
373 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
374 // A8 A9 AA AB AC AD AE AF
375 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
376 // B0 B1 B2 B3 B4 B5 B6 B7
377 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
378 // B8 B9 BA BB BC BD BE BF
379 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
380 // C0 C1 C2 C3 C4 C5 C6 C7
381 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
382 // C8 C9 CA CB CC CD CE CF
383 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
384 // D0 D1 D2 D3 D4 D5 D6 D7
385 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
386 // D8 D9 DA DB DC DD DE DF
387 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
388 // E0 E1 E2 E3 E4 E5 E6 E7
389 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
390 // E8 E9 EA EB EC ED EE EF
391 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
392 // F0 F1 F2 F3 F4 F5 F6 F7
393 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
394 // F8 F9 FA FB FC FD FE FF
395 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
396 };
397
398
399 const uint8_t PATH_ENCODE_SET[32] = {
400 // 00 01 02 03 04 05 06 07
401 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
402 // 08 09 0A 0B 0C 0D 0E 0F
403 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
404 // 10 11 12 13 14 15 16 17
405 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
406 // 18 19 1A 1B 1C 1D 1E 1F
407 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
408 // 20 21 22 23 24 25 26 27
409 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
410 // 28 29 2A 2B 2C 2D 2E 2F
411 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
412 // 30 31 32 33 34 35 36 37
413 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
414 // 38 39 3A 3B 3C 3D 3E 3F
415 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x80,
416 // 40 41 42 43 44 45 46 47
417 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
418 // 48 49 4A 4B 4C 4D 4E 4F
419 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
420 // 50 51 52 53 54 55 56 57
421 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
422 // 58 59 5A 5B 5C 5D 5E 5F
423 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
424 // 60 61 62 63 64 65 66 67
425 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
426 // 68 69 6A 6B 6C 6D 6E 6F
427 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
428 // 70 71 72 73 74 75 76 77
429 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
430 // 78 79 7A 7B 7C 7D 7E 7F
431 0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x00 | 0x80,
432 // 80 81 82 83 84 85 86 87
433 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
434 // 88 89 8A 8B 8C 8D 8E 8F
435 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
436 // 90 91 92 93 94 95 96 97
437 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
438 // 98 99 9A 9B 9C 9D 9E 9F
439 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
440 // A0 A1 A2 A3 A4 A5 A6 A7
441 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
442 // A8 A9 AA AB AC AD AE AF
443 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
444 // B0 B1 B2 B3 B4 B5 B6 B7
445 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
446 // B8 B9 BA BB BC BD BE BF
447 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
448 // C0 C1 C2 C3 C4 C5 C6 C7
449 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
450 // C8 C9 CA CB CC CD CE CF
451 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
452 // D0 D1 D2 D3 D4 D5 D6 D7
453 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
454 // D8 D9 DA DB DC DD DE DF
455 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
456 // E0 E1 E2 E3 E4 E5 E6 E7
457 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
458 // E8 E9 EA EB EC ED EE EF
459 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
460 // F0 F1 F2 F3 F4 F5 F6 F7
461 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
462 // F8 F9 FA FB FC FD FE FF
463 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
464 };
465
466 const uint8_t USERINFO_ENCODE_SET[32] = {
467 // 00 01 02 03 04 05 06 07
468 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
469 // 08 09 0A 0B 0C 0D 0E 0F
470 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
471 // 10 11 12 13 14 15 16 17
472 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
473 // 18 19 1A 1B 1C 1D 1E 1F
474 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
475 // 20 21 22 23 24 25 26 27
476 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
477 // 28 29 2A 2B 2C 2D 2E 2F
478 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
479 // 30 31 32 33 34 35 36 37
480 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
481 // 38 39 3A 3B 3C 3D 3E 3F
482 0x00 | 0x00 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
483 // 40 41 42 43 44 45 46 47
484 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
485 // 48 49 4A 4B 4C 4D 4E 4F
486 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
487 // 50 51 52 53 54 55 56 57
488 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
489 // 58 59 5A 5B 5C 5D 5E 5F
490 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00,
491 // 60 61 62 63 64 65 66 67
492 0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
493 // 68 69 6A 6B 6C 6D 6E 6F
494 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
495 // 70 71 72 73 74 75 76 77
496 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
497 // 78 79 7A 7B 7C 7D 7E 7F
498 0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x00 | 0x80,
499 // 80 81 82 83 84 85 86 87
500 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
501 // 88 89 8A 8B 8C 8D 8E 8F
502 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
503 // 90 91 92 93 94 95 96 97
504 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
505 // 98 99 9A 9B 9C 9D 9E 9F
506 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
507 // A0 A1 A2 A3 A4 A5 A6 A7
508 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
509 // A8 A9 AA AB AC AD AE AF
510 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
511 // B0 B1 B2 B3 B4 B5 B6 B7
512 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
513 // B8 B9 BA BB BC BD BE BF
514 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
515 // C0 C1 C2 C3 C4 C5 C6 C7
516 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
517 // C8 C9 CA CB CC CD CE CF
518 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
519 // D0 D1 D2 D3 D4 D5 D6 D7
520 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
521 // D8 D9 DA DB DC DD DE DF
522 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
523 // E0 E1 E2 E3 E4 E5 E6 E7
524 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
525 // E8 E9 EA EB EC ED EE EF
526 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
527 // F0 F1 F2 F3 F4 F5 F6 F7
528 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
529 // F8 F9 FA FB FC FD FE FF
530 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
531 };
532
533 const uint8_t QUERY_ENCODE_SET_NONSPECIAL[32] = {
534 // 00 01 02 03 04 05 06 07
535 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
536 // 08 09 0A 0B 0C 0D 0E 0F
537 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
538 // 10 11 12 13 14 15 16 17
539 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
540 // 18 19 1A 1B 1C 1D 1E 1F
541 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
542 // 20 21 22 23 24 25 26 27
543 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
544 // 28 29 2A 2B 2C 2D 2E 2F
545 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
546 // 30 31 32 33 34 35 36 37
547 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
548 // 38 39 3A 3B 3C 3D 3E 3F
549 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
550 // 40 41 42 43 44 45 46 47
551 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
552 // 48 49 4A 4B 4C 4D 4E 4F
553 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
554 // 50 51 52 53 54 55 56 57
555 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
556 // 58 59 5A 5B 5C 5D 5E 5F
557 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
558 // 60 61 62 63 64 65 66 67
559 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
560 // 68 69 6A 6B 6C 6D 6E 6F
561 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
562 // 70 71 72 73 74 75 76 77
563 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
564 // 78 79 7A 7B 7C 7D 7E 7F
565 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
566 // 80 81 82 83 84 85 86 87
567 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
568 // 88 89 8A 8B 8C 8D 8E 8F
569 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
570 // 90 91 92 93 94 95 96 97
571 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
572 // 98 99 9A 9B 9C 9D 9E 9F
573 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
574 // A0 A1 A2 A3 A4 A5 A6 A7
575 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
576 // A8 A9 AA AB AC AD AE AF
577 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
578 // B0 B1 B2 B3 B4 B5 B6 B7
579 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
580 // B8 B9 BA BB BC BD BE BF
581 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
582 // C0 C1 C2 C3 C4 C5 C6 C7
583 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
584 // C8 C9 CA CB CC CD CE CF
585 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
586 // D0 D1 D2 D3 D4 D5 D6 D7
587 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
588 // D8 D9 DA DB DC DD DE DF
589 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
590 // E0 E1 E2 E3 E4 E5 E6 E7
591 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
592 // E8 E9 EA EB EC ED EE EF
593 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
594 // F0 F1 F2 F3 F4 F5 F6 F7
595 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
596 // F8 F9 FA FB FC FD FE FF
597 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
598 };
599
600 // Same as QUERY_ENCODE_SET_NONSPECIAL, but with 0x27 (') encoded.
601 const uint8_t QUERY_ENCODE_SET_SPECIAL[32] = {
602 // 00 01 02 03 04 05 06 07
603 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
604 // 08 09 0A 0B 0C 0D 0E 0F
605 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
606 // 10 11 12 13 14 15 16 17
607 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
608 // 18 19 1A 1B 1C 1D 1E 1F
609 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
610 // 20 21 22 23 24 25 26 27
611 0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x80,
612 // 28 29 2A 2B 2C 2D 2E 2F
613 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
614 // 30 31 32 33 34 35 36 37
615 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
616 // 38 39 3A 3B 3C 3D 3E 3F
617 0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
618 // 40 41 42 43 44 45 46 47
619 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
620 // 48 49 4A 4B 4C 4D 4E 4F
621 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
622 // 50 51 52 53 54 55 56 57
623 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
624 // 58 59 5A 5B 5C 5D 5E 5F
625 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
626 // 60 61 62 63 64 65 66 67
627 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
628 // 68 69 6A 6B 6C 6D 6E 6F
629 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
630 // 70 71 72 73 74 75 76 77
631 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
632 // 78 79 7A 7B 7C 7D 7E 7F
633 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
634 // 80 81 82 83 84 85 86 87
635 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
636 // 88 89 8A 8B 8C 8D 8E 8F
637 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
638 // 90 91 92 93 94 95 96 97
639 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
640 // 98 99 9A 9B 9C 9D 9E 9F
641 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
642 // A0 A1 A2 A3 A4 A5 A6 A7
643 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
644 // A8 A9 AA AB AC AD AE AF
645 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
646 // B0 B1 B2 B3 B4 B5 B6 B7
647 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
648 // B8 B9 BA BB BC BD BE BF
649 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
650 // C0 C1 C2 C3 C4 C5 C6 C7
651 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
652 // C8 C9 CA CB CC CD CE CF
653 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
654 // D0 D1 D2 D3 D4 D5 D6 D7
655 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
656 // D8 D9 DA DB DC DD DE DF
657 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
658 // E0 E1 E2 E3 E4 E5 E6 E7
659 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
660 // E8 E9 EA EB EC ED EE EF
661 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
662 // F0 F1 F2 F3 F4 F5 F6 F7
663 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
664 // F8 F9 FA FB FC FD FE FF
665 0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
666 };
667
BitAt(const uint8_t a[],const uint8_t i)668 inline bool BitAt(const uint8_t a[], const uint8_t i) {
669 return !!(a[i >> 3] & (1 << (i & 7)));
670 }
671
672 // Appends ch to str. If ch position in encode_set is set, the ch will
673 // be percent-encoded then appended.
AppendOrEscape(std::string * str,const unsigned char ch,const uint8_t encode_set[])674 inline void AppendOrEscape(std::string* str,
675 const unsigned char ch,
676 const uint8_t encode_set[]) {
677 if (BitAt(encode_set, ch))
678 *str += hex[ch];
679 else
680 *str += ch;
681 }
682
683 template <typename T>
hex2bin(const T ch)684 inline unsigned hex2bin(const T ch) {
685 if (ch >= '0' && ch <= '9')
686 return ch - '0';
687 if (ch >= 'A' && ch <= 'F')
688 return 10 + (ch - 'A');
689 if (ch >= 'a' && ch <= 'f')
690 return 10 + (ch - 'a');
691 return static_cast<unsigned>(-1);
692 }
693
PercentDecode(const char * input,size_t len)694 inline std::string PercentDecode(const char* input, size_t len) {
695 std::string dest;
696 if (len == 0)
697 return dest;
698 dest.reserve(len);
699 const char* pointer = input;
700 const char* end = input + len;
701
702 while (pointer < end) {
703 const char ch = pointer[0];
704 const size_t remaining = end - pointer - 1;
705 if (ch != '%' || remaining < 2 ||
706 (ch == '%' &&
707 (!IsASCIIHexDigit(pointer[1]) ||
708 !IsASCIIHexDigit(pointer[2])))) {
709 dest += ch;
710 pointer++;
711 continue;
712 } else {
713 unsigned a = hex2bin(pointer[1]);
714 unsigned b = hex2bin(pointer[2]);
715 char c = static_cast<char>(a * 16 + b);
716 dest += c;
717 pointer += 3;
718 }
719 }
720 return dest;
721 }
722
723 #define SPECIALS(XX) \
724 XX("ftp:", 21) \
725 XX("file:", -1) \
726 XX("gopher:", 70) \
727 XX("http:", 80) \
728 XX("https:", 443) \
729 XX("ws:", 80) \
730 XX("wss:", 443)
731
IsSpecial(const std::string & scheme)732 inline bool IsSpecial(const std::string& scheme) {
733 #define XX(name, _) if (scheme == name) return true;
734 SPECIALS(XX);
735 #undef XX
736 return false;
737 }
738
739 // https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
StartsWithWindowsDriveLetter(const char * p,const char * end)740 inline bool StartsWithWindowsDriveLetter(const char* p, const char* end) {
741 const size_t length = end - p;
742 return length >= 2 &&
743 IsWindowsDriveLetter(p[0], p[1]) &&
744 (length == 2 ||
745 p[2] == '/' ||
746 p[2] == '\\' ||
747 p[2] == '?' ||
748 p[2] == '#');
749 }
750
NormalizePort(const std::string & scheme,int p)751 inline int NormalizePort(const std::string& scheme, int p) {
752 #define XX(name, port) if (scheme == name && p == port) return -1;
753 SPECIALS(XX);
754 #undef XX
755 return p;
756 }
757
758 #if defined(NODE_HAVE_I18N_SUPPORT)
ToUnicode(const std::string & input,std::string * output)759 inline bool ToUnicode(const std::string& input, std::string* output) {
760 MaybeStackBuffer<char> buf;
761 if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0)
762 return false;
763 output->assign(*buf, buf.length());
764 return true;
765 }
766
ToASCII(const std::string & input,std::string * output)767 inline bool ToASCII(const std::string& input, std::string* output) {
768 MaybeStackBuffer<char> buf;
769 if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0)
770 return false;
771 output->assign(*buf, buf.length());
772 return true;
773 }
774 #else
775 // Intentional non-ops if ICU is not present.
ToUnicode(const std::string & input,std::string * output)776 inline bool ToUnicode(const std::string& input, std::string* output) {
777 *output = input;
778 return true;
779 }
780
ToASCII(const std::string & input,std::string * output)781 inline bool ToASCII(const std::string& input, std::string* output) {
782 *output = input;
783 return true;
784 }
785 #endif
786
ParseIPv6Host(const char * input,size_t length)787 void URLHost::ParseIPv6Host(const char* input, size_t length) {
788 CHECK_EQ(type_, HostType::H_FAILED);
789 unsigned size = arraysize(value_.ipv6);
790 for (unsigned n = 0; n < size; n++)
791 value_.ipv6[n] = 0;
792 uint16_t* piece_pointer = &value_.ipv6[0];
793 uint16_t* const buffer_end = piece_pointer + size;
794 uint16_t* compress_pointer = nullptr;
795 const char* pointer = input;
796 const char* end = pointer + length;
797 unsigned value, len, numbers_seen;
798 char ch = pointer < end ? pointer[0] : kEOL;
799 if (ch == ':') {
800 if (length < 2 || pointer[1] != ':')
801 return;
802 pointer += 2;
803 ch = pointer < end ? pointer[0] : kEOL;
804 piece_pointer++;
805 compress_pointer = piece_pointer;
806 }
807 while (ch != kEOL) {
808 if (piece_pointer >= buffer_end)
809 return;
810 if (ch == ':') {
811 if (compress_pointer != nullptr)
812 return;
813 pointer++;
814 ch = pointer < end ? pointer[0] : kEOL;
815 piece_pointer++;
816 compress_pointer = piece_pointer;
817 continue;
818 }
819 value = 0;
820 len = 0;
821 while (len < 4 && IsASCIIHexDigit(ch)) {
822 value = value * 0x10 + hex2bin(ch);
823 pointer++;
824 ch = pointer < end ? pointer[0] : kEOL;
825 len++;
826 }
827 switch (ch) {
828 case '.':
829 if (len == 0)
830 return;
831 pointer -= len;
832 ch = pointer < end ? pointer[0] : kEOL;
833 if (piece_pointer > buffer_end - 2)
834 return;
835 numbers_seen = 0;
836 while (ch != kEOL) {
837 value = 0xffffffff;
838 if (numbers_seen > 0) {
839 if (ch == '.' && numbers_seen < 4) {
840 pointer++;
841 ch = pointer < end ? pointer[0] : kEOL;
842 } else {
843 return;
844 }
845 }
846 if (!IsASCIIDigit(ch))
847 return;
848 while (IsASCIIDigit(ch)) {
849 unsigned number = ch - '0';
850 if (value == 0xffffffff) {
851 value = number;
852 } else if (value == 0) {
853 return;
854 } else {
855 value = value * 10 + number;
856 }
857 if (value > 255)
858 return;
859 pointer++;
860 ch = pointer < end ? pointer[0] : kEOL;
861 }
862 *piece_pointer = *piece_pointer * 0x100 + value;
863 numbers_seen++;
864 if (numbers_seen == 2 || numbers_seen == 4)
865 piece_pointer++;
866 }
867 if (numbers_seen != 4)
868 return;
869 continue;
870 case ':':
871 pointer++;
872 ch = pointer < end ? pointer[0] : kEOL;
873 if (ch == kEOL)
874 return;
875 break;
876 case kEOL:
877 break;
878 default:
879 return;
880 }
881 *piece_pointer = value;
882 piece_pointer++;
883 }
884
885 if (compress_pointer != nullptr) {
886 unsigned swaps = piece_pointer - compress_pointer;
887 piece_pointer = buffer_end - 1;
888 while (piece_pointer != &value_.ipv6[0] && swaps > 0) {
889 uint16_t temp = *piece_pointer;
890 uint16_t* swap_piece = compress_pointer + swaps - 1;
891 *piece_pointer = *swap_piece;
892 *swap_piece = temp;
893 piece_pointer--;
894 swaps--;
895 }
896 } else if (compress_pointer == nullptr &&
897 piece_pointer != buffer_end) {
898 return;
899 }
900 type_ = HostType::H_IPV6;
901 }
902
ParseNumber(const char * start,const char * end)903 inline int64_t ParseNumber(const char* start, const char* end) {
904 unsigned R = 10;
905 if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
906 start += 2;
907 R = 16;
908 }
909 if (end - start == 0) {
910 return 0;
911 } else if (R == 10 && end - start > 1 && start[0] == '0') {
912 start++;
913 R = 8;
914 }
915 const char* p = start;
916
917 while (p < end) {
918 const char ch = p[0];
919 switch (R) {
920 case 8:
921 if (ch < '0' || ch > '7')
922 return -1;
923 break;
924 case 10:
925 if (!IsASCIIDigit(ch))
926 return -1;
927 break;
928 case 16:
929 if (!IsASCIIHexDigit(ch))
930 return -1;
931 break;
932 }
933 p++;
934 }
935 return strtoll(start, nullptr, R);
936 }
937
ParseIPv4Host(const char * input,size_t length,bool * is_ipv4)938 void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
939 CHECK_EQ(type_, HostType::H_FAILED);
940 *is_ipv4 = false;
941 const char* pointer = input;
942 const char* mark = input;
943 const char* end = pointer + length;
944 int parts = 0;
945 uint32_t val = 0;
946 uint64_t numbers[4];
947 int tooBigNumbers = 0;
948 if (length == 0)
949 return;
950
951 while (pointer <= end) {
952 const char ch = pointer < end ? pointer[0] : kEOL;
953 const int remaining = end - pointer - 1;
954 if (ch == '.' || ch == kEOL) {
955 if (++parts > static_cast<int>(arraysize(numbers)))
956 return;
957 if (pointer == mark)
958 return;
959 int64_t n = ParseNumber(mark, pointer);
960 if (n < 0)
961 return;
962
963 if (n > 255) {
964 tooBigNumbers++;
965 }
966 numbers[parts - 1] = n;
967 mark = pointer + 1;
968 if (ch == '.' && remaining == 0)
969 break;
970 }
971 pointer++;
972 }
973 CHECK_GT(parts, 0);
974 *is_ipv4 = true;
975
976 // If any but the last item in numbers is greater than 255, return failure.
977 // If the last item in numbers is greater than or equal to
978 // 256^(5 - the number of items in numbers), return failure.
979 if (tooBigNumbers > 1 ||
980 (tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
981 numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
982 return;
983 }
984
985 type_ = HostType::H_IPV4;
986 val = numbers[parts - 1];
987 for (int n = 0; n < parts - 1; n++) {
988 double b = 3 - n;
989 val += numbers[n] * pow(256, b);
990 }
991
992 value_.ipv4 = val;
993 }
994
ParseOpaqueHost(const char * input,size_t length)995 void URLHost::ParseOpaqueHost(const char* input, size_t length) {
996 CHECK_EQ(type_, HostType::H_FAILED);
997 std::string output;
998 output.reserve(length);
999 for (size_t i = 0; i < length; i++) {
1000 const char ch = input[i];
1001 if (ch != '%' && IsForbiddenHostCodePoint(ch)) {
1002 return;
1003 } else {
1004 AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
1005 }
1006 }
1007
1008 SetOpaque(std::move(output));
1009 }
1010
ParseHost(const char * input,size_t length,bool is_special,bool unicode)1011 void URLHost::ParseHost(const char* input,
1012 size_t length,
1013 bool is_special,
1014 bool unicode) {
1015 CHECK_EQ(type_, HostType::H_FAILED);
1016 const char* pointer = input;
1017
1018 if (length == 0)
1019 return;
1020
1021 if (pointer[0] == '[') {
1022 if (pointer[length - 1] != ']')
1023 return;
1024 return ParseIPv6Host(++pointer, length - 2);
1025 }
1026
1027 if (!is_special)
1028 return ParseOpaqueHost(input, length);
1029
1030 // First, we have to percent decode
1031 std::string decoded = PercentDecode(input, length);
1032
1033 // Then we have to punycode toASCII
1034 if (!ToASCII(decoded, &decoded))
1035 return;
1036
1037 // If any of the following characters are still present, we have to fail
1038 for (size_t n = 0; n < decoded.size(); n++) {
1039 const char ch = decoded[n];
1040 if (IsForbiddenHostCodePoint(ch)) {
1041 return;
1042 }
1043 }
1044
1045 // Check to see if it's an IPv4 IP address
1046 bool is_ipv4;
1047 ParseIPv4Host(decoded.c_str(), decoded.length(), &is_ipv4);
1048 if (is_ipv4)
1049 return;
1050
1051 // If the unicode flag is set, run the result through punycode ToUnicode
1052 if (unicode && !ToUnicode(decoded, &decoded))
1053 return;
1054
1055 // It's not an IPv4 or IPv6 address, it must be a domain
1056 SetDomain(std::move(decoded));
1057 }
1058
1059 // Locates the longest sequence of 0 segments in an IPv6 address
1060 // in order to use the :: compression when serializing
1061 template <typename T>
FindLongestZeroSequence(T * values,size_t len)1062 inline T* FindLongestZeroSequence(T* values, size_t len) {
1063 T* start = values;
1064 T* end = start + len;
1065 T* result = nullptr;
1066
1067 T* current = nullptr;
1068 unsigned counter = 0, longest = 1;
1069
1070 while (start < end) {
1071 if (*start == 0) {
1072 if (current == nullptr)
1073 current = start;
1074 counter++;
1075 } else {
1076 if (counter > longest) {
1077 longest = counter;
1078 result = current;
1079 }
1080 counter = 0;
1081 current = nullptr;
1082 }
1083 start++;
1084 }
1085 if (counter > longest)
1086 result = current;
1087 return result;
1088 }
1089
ToStringMove()1090 std::string URLHost::ToStringMove() {
1091 std::string return_value;
1092 switch (type_) {
1093 case HostType::H_DOMAIN:
1094 case HostType::H_OPAQUE:
1095 return_value = std::move(value_.domain_or_opaque);
1096 break;
1097 default:
1098 return_value = ToString();
1099 break;
1100 }
1101 Reset();
1102 return return_value;
1103 }
1104
ToString() const1105 std::string URLHost::ToString() const {
1106 std::string dest;
1107 switch (type_) {
1108 case HostType::H_DOMAIN:
1109 case HostType::H_OPAQUE:
1110 return value_.domain_or_opaque;
1111 break;
1112 case HostType::H_IPV4: {
1113 dest.reserve(15);
1114 uint32_t value = value_.ipv4;
1115 for (int n = 0; n < 4; n++) {
1116 char buf[4];
1117 snprintf(buf, sizeof(buf), "%d", value % 256);
1118 dest.insert(0, buf);
1119 if (n < 3)
1120 dest.insert(0, 1, '.');
1121 value /= 256;
1122 }
1123 break;
1124 }
1125 case HostType::H_IPV6: {
1126 dest.reserve(41);
1127 dest += '[';
1128 const uint16_t* start = &value_.ipv6[0];
1129 const uint16_t* compress_pointer =
1130 FindLongestZeroSequence(start, 8);
1131 bool ignore0 = false;
1132 for (int n = 0; n <= 7; n++) {
1133 const uint16_t* piece = &value_.ipv6[n];
1134 if (ignore0 && *piece == 0)
1135 continue;
1136 else if (ignore0)
1137 ignore0 = false;
1138 if (compress_pointer == piece) {
1139 dest += n == 0 ? "::" : ":";
1140 ignore0 = true;
1141 continue;
1142 }
1143 char buf[5];
1144 snprintf(buf, sizeof(buf), "%x", *piece);
1145 dest += buf;
1146 if (n < 7)
1147 dest += ':';
1148 }
1149 dest += ']';
1150 break;
1151 }
1152 case HostType::H_FAILED:
1153 break;
1154 }
1155 return dest;
1156 }
1157
ParseHost(const std::string & input,std::string * output,bool is_special,bool unicode=false)1158 bool ParseHost(const std::string& input,
1159 std::string* output,
1160 bool is_special,
1161 bool unicode = false) {
1162 if (input.length() == 0) {
1163 output->clear();
1164 return true;
1165 }
1166 URLHost host;
1167 host.ParseHost(input.c_str(), input.length(), is_special, unicode);
1168 if (host.ParsingFailed())
1169 return false;
1170 *output = host.ToStringMove();
1171 return true;
1172 }
1173
FromJSStringArray(Environment * env,Local<Array> array)1174 inline std::vector<std::string> FromJSStringArray(Environment* env,
1175 Local<Array> array) {
1176 std::vector<std::string> vec;
1177 const int32_t len = array->Length();
1178 if (len == 0)
1179 return vec; // nothing to copy
1180 vec.reserve(len);
1181 for (int32_t n = 0; n < len; n++) {
1182 Local<Value> val = array->Get(env->context(), n).ToLocalChecked();
1183 if (val->IsString()) {
1184 Utf8Value value(env->isolate(), val.As<String>());
1185 vec.emplace_back(*value, value.length());
1186 }
1187 }
1188 return vec;
1189 }
1190
ToJSStringArray(Environment * env,const std::vector<std::string> & vec)1191 inline Local<Array> ToJSStringArray(Environment* env,
1192 const std::vector<std::string>& vec) {
1193 Isolate* isolate = env->isolate();
1194 Local<Array> array = Array::New(isolate, vec.size());
1195 for (size_t n = 0; n < vec.size(); n++)
1196 array->Set(env->context(), n, Utf8String(isolate, vec[n])).FromJust();
1197 return array;
1198 }
1199
HarvestBase(Environment * env,Local<Object> base_obj)1200 inline url_data HarvestBase(Environment* env, Local<Object> base_obj) {
1201 url_data base;
1202 Local<Context> context = env->context();
1203 Local<Value> flags =
1204 base_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
1205 if (flags->IsInt32())
1206 base.flags = flags->Int32Value(context).FromJust();
1207
1208 Local<Value> scheme =
1209 base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
1210 base.scheme = Utf8Value(env->isolate(), scheme).out();
1211
1212 auto GetStr = [&](std::string url_data::*member,
1213 int flag,
1214 Local<String> name,
1215 bool empty_as_present) {
1216 Local<Value> value = base_obj->Get(env->context(), name).ToLocalChecked();
1217 if (value->IsString()) {
1218 Utf8Value utf8value(env->isolate(), value.As<String>());
1219 (base.*member).assign(*utf8value, utf8value.length());
1220 if (empty_as_present || value.As<String>()->Length() != 0) {
1221 base.flags |= flag;
1222 }
1223 }
1224 };
1225 GetStr(&url_data::username,
1226 URL_FLAGS_HAS_USERNAME,
1227 env->username_string(),
1228 false);
1229 GetStr(&url_data::password,
1230 URL_FLAGS_HAS_PASSWORD,
1231 env->password_string(),
1232 false);
1233 GetStr(&url_data::host, URL_FLAGS_HAS_HOST, env->host_string(), true);
1234 GetStr(&url_data::query, URL_FLAGS_HAS_QUERY, env->query_string(), true);
1235 GetStr(&url_data::fragment,
1236 URL_FLAGS_HAS_FRAGMENT,
1237 env->fragment_string(),
1238 true);
1239
1240 Local<Value> port =
1241 base_obj->Get(env->context(), env->port_string()).ToLocalChecked();
1242 if (port->IsInt32())
1243 base.port = port.As<Int32>()->Value();
1244
1245 Local<Value>
1246 path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked();
1247 if (path->IsArray()) {
1248 base.flags |= URL_FLAGS_HAS_PATH;
1249 base.path = FromJSStringArray(env, path.As<Array>());
1250 }
1251 return base;
1252 }
1253
HarvestContext(Environment * env,Local<Object> context_obj)1254 inline url_data HarvestContext(Environment* env, Local<Object> context_obj) {
1255 url_data context;
1256 Local<Value> flags =
1257 context_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
1258 if (flags->IsInt32()) {
1259 static const int32_t copy_flags_mask =
1260 URL_FLAGS_SPECIAL |
1261 URL_FLAGS_CANNOT_BE_BASE |
1262 URL_FLAGS_HAS_USERNAME |
1263 URL_FLAGS_HAS_PASSWORD |
1264 URL_FLAGS_HAS_HOST;
1265 context.flags |= flags.As<Int32>()->Value() & copy_flags_mask;
1266 }
1267 Local<Value> scheme =
1268 context_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
1269 if (scheme->IsString()) {
1270 Utf8Value value(env->isolate(), scheme);
1271 context.scheme.assign(*value, value.length());
1272 }
1273 Local<Value> port =
1274 context_obj->Get(env->context(), env->port_string()).ToLocalChecked();
1275 if (port->IsInt32())
1276 context.port = port.As<Int32>()->Value();
1277 if (context.flags & URL_FLAGS_HAS_USERNAME) {
1278 Local<Value> username =
1279 context_obj->Get(env->context(),
1280 env->username_string()).ToLocalChecked();
1281 CHECK(username->IsString());
1282 Utf8Value value(env->isolate(), username);
1283 context.username.assign(*value, value.length());
1284 }
1285 if (context.flags & URL_FLAGS_HAS_PASSWORD) {
1286 Local<Value> password =
1287 context_obj->Get(env->context(),
1288 env->password_string()).ToLocalChecked();
1289 CHECK(password->IsString());
1290 Utf8Value value(env->isolate(), password);
1291 context.password.assign(*value, value.length());
1292 }
1293 Local<Value> host =
1294 context_obj->Get(env->context(),
1295 env->host_string()).ToLocalChecked();
1296 if (host->IsString()) {
1297 Utf8Value value(env->isolate(), host);
1298 context.host.assign(*value, value.length());
1299 }
1300 return context;
1301 }
1302
1303 // Single dot segment can be ".", "%2e", or "%2E"
IsSingleDotSegment(const std::string & str)1304 inline bool IsSingleDotSegment(const std::string& str) {
1305 switch (str.size()) {
1306 case 1:
1307 return str == ".";
1308 case 3:
1309 return str[0] == '%' &&
1310 str[1] == '2' &&
1311 ASCIILowercase(str[2]) == 'e';
1312 default:
1313 return false;
1314 }
1315 }
1316
1317 // Double dot segment can be:
1318 // "..", ".%2e", ".%2E", "%2e.", "%2E.",
1319 // "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
IsDoubleDotSegment(const std::string & str)1320 inline bool IsDoubleDotSegment(const std::string& str) {
1321 switch (str.size()) {
1322 case 2:
1323 return str == "..";
1324 case 4:
1325 if (str[0] != '.' && str[0] != '%')
1326 return false;
1327 return ((str[0] == '.' &&
1328 str[1] == '%' &&
1329 str[2] == '2' &&
1330 ASCIILowercase(str[3]) == 'e') ||
1331 (str[0] == '%' &&
1332 str[1] == '2' &&
1333 ASCIILowercase(str[2]) == 'e' &&
1334 str[3] == '.'));
1335 case 6:
1336 return (str[0] == '%' &&
1337 str[1] == '2' &&
1338 ASCIILowercase(str[2]) == 'e' &&
1339 str[3] == '%' &&
1340 str[4] == '2' &&
1341 ASCIILowercase(str[5]) == 'e');
1342 default:
1343 return false;
1344 }
1345 }
1346
ShortenUrlPath(struct url_data * url)1347 inline void ShortenUrlPath(struct url_data* url) {
1348 if (url->path.empty()) return;
1349 if (url->path.size() == 1 && url->scheme == "file:" &&
1350 IsNormalizedWindowsDriveLetter(url->path[0])) return;
1351 url->path.pop_back();
1352 }
1353
1354 } // anonymous namespace
1355
Parse(const char * input,size_t len,enum url_parse_state state_override,struct url_data * url,bool has_url,const struct url_data * base,bool has_base)1356 void URL::Parse(const char* input,
1357 size_t len,
1358 enum url_parse_state state_override,
1359 struct url_data* url,
1360 bool has_url,
1361 const struct url_data* base,
1362 bool has_base) {
1363 const char* p = input;
1364 const char* end = input + len;
1365
1366 if (!has_url) {
1367 for (const char* ptr = p; ptr < end; ptr++) {
1368 if (IsC0ControlOrSpace(*ptr))
1369 p++;
1370 else
1371 break;
1372 }
1373 for (const char* ptr = end - 1; ptr >= p; ptr--) {
1374 if (IsC0ControlOrSpace(*ptr))
1375 end--;
1376 else
1377 break;
1378 }
1379 input = p;
1380 len = end - p;
1381 }
1382
1383 // The spec says we should strip out any ASCII tabs or newlines.
1384 // In those cases, we create another std::string instance with the filtered
1385 // contents, but in the general case we avoid the overhead.
1386 std::string whitespace_stripped;
1387 for (const char* ptr = p; ptr < end; ptr++) {
1388 if (!IsASCIITabOrNewline(*ptr))
1389 continue;
1390 // Hit tab or newline. Allocate storage, copy what we have until now,
1391 // and then iterate and filter all similar characters out.
1392 whitespace_stripped.reserve(len - 1);
1393 whitespace_stripped.assign(p, ptr - p);
1394 // 'ptr + 1' skips the current char, which we know to be tab or newline.
1395 for (ptr = ptr + 1; ptr < end; ptr++) {
1396 if (!IsASCIITabOrNewline(*ptr))
1397 whitespace_stripped += *ptr;
1398 }
1399
1400 // Update variables like they should have looked like if the string
1401 // had been stripped of whitespace to begin with.
1402 input = whitespace_stripped.c_str();
1403 len = whitespace_stripped.size();
1404 p = input;
1405 end = input + len;
1406 break;
1407 }
1408
1409 bool atflag = false; // Set when @ has been seen.
1410 bool square_bracket_flag = false; // Set inside of [...]
1411 bool password_token_seen_flag = false; // Set after a : after an username.
1412
1413 std::string buffer;
1414
1415 // Set the initial parse state.
1416 const bool has_state_override = state_override != kUnknownState;
1417 enum url_parse_state state = has_state_override ? state_override :
1418 kSchemeStart;
1419
1420 if (state < kSchemeStart || state > kFragment) {
1421 url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1422 return;
1423 }
1424
1425 while (p <= end) {
1426 const char ch = p < end ? p[0] : kEOL;
1427 bool special = (url->flags & URL_FLAGS_SPECIAL);
1428 bool cannot_be_base;
1429 const bool special_back_slash = (special && ch == '\\');
1430
1431 switch (state) {
1432 case kSchemeStart:
1433 if (IsASCIIAlpha(ch)) {
1434 buffer += ASCIILowercase(ch);
1435 state = kScheme;
1436 } else if (!has_state_override) {
1437 state = kNoScheme;
1438 continue;
1439 } else {
1440 url->flags |= URL_FLAGS_FAILED;
1441 return;
1442 }
1443 break;
1444 case kScheme:
1445 if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') {
1446 buffer += ASCIILowercase(ch);
1447 } else if (ch == ':' || (has_state_override && ch == kEOL)) {
1448 if (has_state_override && buffer.size() == 0) {
1449 url->flags |= URL_FLAGS_TERMINATED;
1450 return;
1451 }
1452 buffer += ':';
1453
1454 bool new_is_special = IsSpecial(buffer);
1455
1456 if (has_state_override) {
1457 if ((special != new_is_special) ||
1458 ((buffer == "file:") &&
1459 ((url->flags & URL_FLAGS_HAS_USERNAME) ||
1460 (url->flags & URL_FLAGS_HAS_PASSWORD) ||
1461 (url->port != -1)))) {
1462 url->flags |= URL_FLAGS_TERMINATED;
1463 return;
1464 }
1465
1466 // File scheme && (host == empty or null) check left to JS-land
1467 // as it can be done before even entering C++ binding.
1468 }
1469
1470 url->scheme = std::move(buffer);
1471 url->port = NormalizePort(url->scheme, url->port);
1472 if (new_is_special) {
1473 url->flags |= URL_FLAGS_SPECIAL;
1474 special = true;
1475 } else {
1476 url->flags &= ~URL_FLAGS_SPECIAL;
1477 special = false;
1478 }
1479 buffer.clear();
1480 if (has_state_override)
1481 return;
1482 if (url->scheme == "file:") {
1483 state = kFile;
1484 } else if (special &&
1485 has_base &&
1486 url->scheme == base->scheme) {
1487 state = kSpecialRelativeOrAuthority;
1488 } else if (special) {
1489 state = kSpecialAuthoritySlashes;
1490 } else if (p[1] == '/') {
1491 state = kPathOrAuthority;
1492 p++;
1493 } else {
1494 url->flags |= URL_FLAGS_CANNOT_BE_BASE;
1495 url->flags |= URL_FLAGS_HAS_PATH;
1496 url->path.emplace_back("");
1497 state = kCannotBeBase;
1498 }
1499 } else if (!has_state_override) {
1500 buffer.clear();
1501 state = kNoScheme;
1502 p = input;
1503 continue;
1504 } else {
1505 url->flags |= URL_FLAGS_FAILED;
1506 return;
1507 }
1508 break;
1509 case kNoScheme:
1510 cannot_be_base = has_base && (base->flags & URL_FLAGS_CANNOT_BE_BASE);
1511 if (!has_base || (cannot_be_base && ch != '#')) {
1512 url->flags |= URL_FLAGS_FAILED;
1513 return;
1514 } else if (cannot_be_base && ch == '#') {
1515 url->scheme = base->scheme;
1516 if (IsSpecial(url->scheme)) {
1517 url->flags |= URL_FLAGS_SPECIAL;
1518 special = true;
1519 } else {
1520 url->flags &= ~URL_FLAGS_SPECIAL;
1521 special = false;
1522 }
1523 if (base->flags & URL_FLAGS_HAS_PATH) {
1524 url->flags |= URL_FLAGS_HAS_PATH;
1525 url->path = base->path;
1526 }
1527 if (base->flags & URL_FLAGS_HAS_QUERY) {
1528 url->flags |= URL_FLAGS_HAS_QUERY;
1529 url->query = base->query;
1530 }
1531 if (base->flags & URL_FLAGS_HAS_FRAGMENT) {
1532 url->flags |= URL_FLAGS_HAS_FRAGMENT;
1533 url->fragment = base->fragment;
1534 }
1535 url->flags |= URL_FLAGS_CANNOT_BE_BASE;
1536 state = kFragment;
1537 } else if (has_base &&
1538 base->scheme != "file:") {
1539 state = kRelative;
1540 continue;
1541 } else {
1542 url->scheme = "file:";
1543 url->flags |= URL_FLAGS_SPECIAL;
1544 special = true;
1545 state = kFile;
1546 continue;
1547 }
1548 break;
1549 case kSpecialRelativeOrAuthority:
1550 if (ch == '/' && p[1] == '/') {
1551 state = kSpecialAuthorityIgnoreSlashes;
1552 p++;
1553 } else {
1554 state = kRelative;
1555 continue;
1556 }
1557 break;
1558 case kPathOrAuthority:
1559 if (ch == '/') {
1560 state = kAuthority;
1561 } else {
1562 state = kPath;
1563 continue;
1564 }
1565 break;
1566 case kRelative:
1567 url->scheme = base->scheme;
1568 if (IsSpecial(url->scheme)) {
1569 url->flags |= URL_FLAGS_SPECIAL;
1570 special = true;
1571 } else {
1572 url->flags &= ~URL_FLAGS_SPECIAL;
1573 special = false;
1574 }
1575 switch (ch) {
1576 case kEOL:
1577 if (base->flags & URL_FLAGS_HAS_USERNAME) {
1578 url->flags |= URL_FLAGS_HAS_USERNAME;
1579 url->username = base->username;
1580 }
1581 if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1582 url->flags |= URL_FLAGS_HAS_PASSWORD;
1583 url->password = base->password;
1584 }
1585 if (base->flags & URL_FLAGS_HAS_HOST) {
1586 url->flags |= URL_FLAGS_HAS_HOST;
1587 url->host = base->host;
1588 }
1589 if (base->flags & URL_FLAGS_HAS_QUERY) {
1590 url->flags |= URL_FLAGS_HAS_QUERY;
1591 url->query = base->query;
1592 }
1593 if (base->flags & URL_FLAGS_HAS_PATH) {
1594 url->flags |= URL_FLAGS_HAS_PATH;
1595 url->path = base->path;
1596 }
1597 url->port = base->port;
1598 break;
1599 case '/':
1600 state = kRelativeSlash;
1601 break;
1602 case '?':
1603 if (base->flags & URL_FLAGS_HAS_USERNAME) {
1604 url->flags |= URL_FLAGS_HAS_USERNAME;
1605 url->username = base->username;
1606 }
1607 if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1608 url->flags |= URL_FLAGS_HAS_PASSWORD;
1609 url->password = base->password;
1610 }
1611 if (base->flags & URL_FLAGS_HAS_HOST) {
1612 url->flags |= URL_FLAGS_HAS_HOST;
1613 url->host = base->host;
1614 }
1615 if (base->flags & URL_FLAGS_HAS_PATH) {
1616 url->flags |= URL_FLAGS_HAS_PATH;
1617 url->path = base->path;
1618 }
1619 url->port = base->port;
1620 state = kQuery;
1621 break;
1622 case '#':
1623 if (base->flags & URL_FLAGS_HAS_USERNAME) {
1624 url->flags |= URL_FLAGS_HAS_USERNAME;
1625 url->username = base->username;
1626 }
1627 if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1628 url->flags |= URL_FLAGS_HAS_PASSWORD;
1629 url->password = base->password;
1630 }
1631 if (base->flags & URL_FLAGS_HAS_HOST) {
1632 url->flags |= URL_FLAGS_HAS_HOST;
1633 url->host = base->host;
1634 }
1635 if (base->flags & URL_FLAGS_HAS_QUERY) {
1636 url->flags |= URL_FLAGS_HAS_QUERY;
1637 url->query = base->query;
1638 }
1639 if (base->flags & URL_FLAGS_HAS_PATH) {
1640 url->flags |= URL_FLAGS_HAS_PATH;
1641 url->path = base->path;
1642 }
1643 url->port = base->port;
1644 state = kFragment;
1645 break;
1646 default:
1647 if (special_back_slash) {
1648 state = kRelativeSlash;
1649 } else {
1650 if (base->flags & URL_FLAGS_HAS_USERNAME) {
1651 url->flags |= URL_FLAGS_HAS_USERNAME;
1652 url->username = base->username;
1653 }
1654 if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1655 url->flags |= URL_FLAGS_HAS_PASSWORD;
1656 url->password = base->password;
1657 }
1658 if (base->flags & URL_FLAGS_HAS_HOST) {
1659 url->flags |= URL_FLAGS_HAS_HOST;
1660 url->host = base->host;
1661 }
1662 if (base->flags & URL_FLAGS_HAS_PATH) {
1663 url->flags |= URL_FLAGS_HAS_PATH;
1664 url->path = base->path;
1665 ShortenUrlPath(url);
1666 }
1667 url->port = base->port;
1668 state = kPath;
1669 continue;
1670 }
1671 }
1672 break;
1673 case kRelativeSlash:
1674 if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) {
1675 state = kSpecialAuthorityIgnoreSlashes;
1676 } else if (ch == '/') {
1677 state = kAuthority;
1678 } else {
1679 if (base->flags & URL_FLAGS_HAS_USERNAME) {
1680 url->flags |= URL_FLAGS_HAS_USERNAME;
1681 url->username = base->username;
1682 }
1683 if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1684 url->flags |= URL_FLAGS_HAS_PASSWORD;
1685 url->password = base->password;
1686 }
1687 if (base->flags & URL_FLAGS_HAS_HOST) {
1688 url->flags |= URL_FLAGS_HAS_HOST;
1689 url->host = base->host;
1690 }
1691 url->port = base->port;
1692 state = kPath;
1693 continue;
1694 }
1695 break;
1696 case kSpecialAuthoritySlashes:
1697 state = kSpecialAuthorityIgnoreSlashes;
1698 if (ch == '/' && p[1] == '/') {
1699 p++;
1700 } else {
1701 continue;
1702 }
1703 break;
1704 case kSpecialAuthorityIgnoreSlashes:
1705 if (ch != '/' && ch != '\\') {
1706 state = kAuthority;
1707 continue;
1708 }
1709 break;
1710 case kAuthority:
1711 if (ch == '@') {
1712 if (atflag) {
1713 buffer.reserve(buffer.size() + 3);
1714 buffer.insert(0, "%40");
1715 }
1716 atflag = true;
1717 const size_t blen = buffer.size();
1718 if (blen > 0 && buffer[0] != ':') {
1719 url->flags |= URL_FLAGS_HAS_USERNAME;
1720 }
1721 for (size_t n = 0; n < blen; n++) {
1722 const char bch = buffer[n];
1723 if (bch == ':') {
1724 url->flags |= URL_FLAGS_HAS_PASSWORD;
1725 if (!password_token_seen_flag) {
1726 password_token_seen_flag = true;
1727 continue;
1728 }
1729 }
1730 if (password_token_seen_flag) {
1731 AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET);
1732 } else {
1733 AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET);
1734 }
1735 }
1736 buffer.clear();
1737 } else if (ch == kEOL ||
1738 ch == '/' ||
1739 ch == '?' ||
1740 ch == '#' ||
1741 special_back_slash) {
1742 if (atflag && buffer.size() == 0) {
1743 url->flags |= URL_FLAGS_FAILED;
1744 return;
1745 }
1746 p -= buffer.size() + 1;
1747 buffer.clear();
1748 state = kHost;
1749 } else {
1750 buffer += ch;
1751 }
1752 break;
1753 case kHost:
1754 case kHostname:
1755 if (has_state_override && url->scheme == "file:") {
1756 state = kFileHost;
1757 continue;
1758 } else if (ch == ':' && !square_bracket_flag) {
1759 if (buffer.size() == 0) {
1760 url->flags |= URL_FLAGS_FAILED;
1761 return;
1762 }
1763 url->flags |= URL_FLAGS_HAS_HOST;
1764 if (!ParseHost(buffer, &url->host, special)) {
1765 url->flags |= URL_FLAGS_FAILED;
1766 return;
1767 }
1768 buffer.clear();
1769 state = kPort;
1770 if (state_override == kHostname) {
1771 return;
1772 }
1773 } else if (ch == kEOL ||
1774 ch == '/' ||
1775 ch == '?' ||
1776 ch == '#' ||
1777 special_back_slash) {
1778 p--;
1779 if (special && buffer.size() == 0) {
1780 url->flags |= URL_FLAGS_FAILED;
1781 return;
1782 }
1783 if (has_state_override &&
1784 buffer.size() == 0 &&
1785 ((url->username.size() > 0 || url->password.size() > 0) ||
1786 url->port != -1)) {
1787 url->flags |= URL_FLAGS_TERMINATED;
1788 return;
1789 }
1790 url->flags |= URL_FLAGS_HAS_HOST;
1791 if (!ParseHost(buffer, &url->host, special)) {
1792 url->flags |= URL_FLAGS_FAILED;
1793 return;
1794 }
1795 buffer.clear();
1796 state = kPathStart;
1797 if (has_state_override) {
1798 return;
1799 }
1800 } else {
1801 if (ch == '[')
1802 square_bracket_flag = true;
1803 if (ch == ']')
1804 square_bracket_flag = false;
1805 buffer += ch;
1806 }
1807 break;
1808 case kPort:
1809 if (IsASCIIDigit(ch)) {
1810 buffer += ch;
1811 } else if (has_state_override ||
1812 ch == kEOL ||
1813 ch == '/' ||
1814 ch == '?' ||
1815 ch == '#' ||
1816 special_back_slash) {
1817 if (buffer.size() > 0) {
1818 unsigned port = 0;
1819 // the condition port <= 0xffff prevents integer overflow
1820 for (size_t i = 0; port <= 0xffff && i < buffer.size(); i++)
1821 port = port * 10 + buffer[i] - '0';
1822 if (port > 0xffff) {
1823 // TODO(TimothyGu): This hack is currently needed for the host
1824 // setter since it needs access to hostname if it is valid, and
1825 // if the FAILED flag is set the entire response to JS layer
1826 // will be empty.
1827 if (state_override == kHost)
1828 url->port = -1;
1829 else
1830 url->flags |= URL_FLAGS_FAILED;
1831 return;
1832 }
1833 // the port is valid
1834 url->port = NormalizePort(url->scheme, static_cast<int>(port));
1835 if (url->port == -1)
1836 url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT;
1837 buffer.clear();
1838 } else if (has_state_override) {
1839 // TODO(TimothyGu): Similar case as above.
1840 if (state_override == kHost)
1841 url->port = -1;
1842 else
1843 url->flags |= URL_FLAGS_TERMINATED;
1844 return;
1845 }
1846 state = kPathStart;
1847 continue;
1848 } else {
1849 url->flags |= URL_FLAGS_FAILED;
1850 return;
1851 }
1852 break;
1853 case kFile:
1854 url->scheme = "file:";
1855 if (ch == '/' || ch == '\\') {
1856 state = kFileSlash;
1857 } else if (has_base && base->scheme == "file:") {
1858 switch (ch) {
1859 case kEOL:
1860 if (base->flags & URL_FLAGS_HAS_HOST) {
1861 url->flags |= URL_FLAGS_HAS_HOST;
1862 url->host = base->host;
1863 }
1864 if (base->flags & URL_FLAGS_HAS_PATH) {
1865 url->flags |= URL_FLAGS_HAS_PATH;
1866 url->path = base->path;
1867 }
1868 if (base->flags & URL_FLAGS_HAS_QUERY) {
1869 url->flags |= URL_FLAGS_HAS_QUERY;
1870 url->query = base->query;
1871 }
1872 break;
1873 case '?':
1874 if (base->flags & URL_FLAGS_HAS_HOST) {
1875 url->flags |= URL_FLAGS_HAS_HOST;
1876 url->host = base->host;
1877 }
1878 if (base->flags & URL_FLAGS_HAS_PATH) {
1879 url->flags |= URL_FLAGS_HAS_PATH;
1880 url->path = base->path;
1881 }
1882 url->flags |= URL_FLAGS_HAS_QUERY;
1883 url->query.clear();
1884 state = kQuery;
1885 break;
1886 case '#':
1887 if (base->flags & URL_FLAGS_HAS_HOST) {
1888 url->flags |= URL_FLAGS_HAS_HOST;
1889 url->host = base->host;
1890 }
1891 if (base->flags & URL_FLAGS_HAS_PATH) {
1892 url->flags |= URL_FLAGS_HAS_PATH;
1893 url->path = base->path;
1894 }
1895 if (base->flags & URL_FLAGS_HAS_QUERY) {
1896 url->flags |= URL_FLAGS_HAS_QUERY;
1897 url->query = base->query;
1898 }
1899 url->flags |= URL_FLAGS_HAS_FRAGMENT;
1900 url->fragment.clear();
1901 state = kFragment;
1902 break;
1903 default:
1904 if (!StartsWithWindowsDriveLetter(p, end)) {
1905 if (base->flags & URL_FLAGS_HAS_HOST) {
1906 url->flags |= URL_FLAGS_HAS_HOST;
1907 url->host = base->host;
1908 }
1909 if (base->flags & URL_FLAGS_HAS_PATH) {
1910 url->flags |= URL_FLAGS_HAS_PATH;
1911 url->path = base->path;
1912 }
1913 ShortenUrlPath(url);
1914 }
1915 state = kPath;
1916 continue;
1917 }
1918 } else {
1919 state = kPath;
1920 continue;
1921 }
1922 break;
1923 case kFileSlash:
1924 if (ch == '/' || ch == '\\') {
1925 state = kFileHost;
1926 } else {
1927 if (has_base &&
1928 base->scheme == "file:" &&
1929 !StartsWithWindowsDriveLetter(p, end)) {
1930 if (IsNormalizedWindowsDriveLetter(base->path[0])) {
1931 url->flags |= URL_FLAGS_HAS_PATH;
1932 url->path.push_back(base->path[0]);
1933 } else {
1934 if (base->flags & URL_FLAGS_HAS_HOST) {
1935 url->flags |= URL_FLAGS_HAS_HOST;
1936 url->host = base->host;
1937 } else {
1938 url->flags &= ~URL_FLAGS_HAS_HOST;
1939 url->host.clear();
1940 }
1941 }
1942 }
1943 state = kPath;
1944 continue;
1945 }
1946 break;
1947 case kFileHost:
1948 if (ch == kEOL ||
1949 ch == '/' ||
1950 ch == '\\' ||
1951 ch == '?' ||
1952 ch == '#') {
1953 if (!has_state_override &&
1954 buffer.size() == 2 &&
1955 IsWindowsDriveLetter(buffer)) {
1956 state = kPath;
1957 } else if (buffer.size() == 0) {
1958 url->flags |= URL_FLAGS_HAS_HOST;
1959 url->host.clear();
1960 if (has_state_override)
1961 return;
1962 state = kPathStart;
1963 } else {
1964 std::string host;
1965 if (!ParseHost(buffer, &host, special)) {
1966 url->flags |= URL_FLAGS_FAILED;
1967 return;
1968 }
1969 if (host == "localhost")
1970 host.clear();
1971 url->flags |= URL_FLAGS_HAS_HOST;
1972 url->host = host;
1973 if (has_state_override)
1974 return;
1975 buffer.clear();
1976 state = kPathStart;
1977 }
1978 continue;
1979 } else {
1980 buffer += ch;
1981 }
1982 break;
1983 case kPathStart:
1984 if (IsSpecial(url->scheme)) {
1985 state = kPath;
1986 if (ch != '/' && ch != '\\') {
1987 continue;
1988 }
1989 } else if (!has_state_override && ch == '?') {
1990 url->flags |= URL_FLAGS_HAS_QUERY;
1991 url->query.clear();
1992 state = kQuery;
1993 } else if (!has_state_override && ch == '#') {
1994 url->flags |= URL_FLAGS_HAS_FRAGMENT;
1995 url->fragment.clear();
1996 state = kFragment;
1997 } else if (ch != kEOL) {
1998 state = kPath;
1999 if (ch != '/') {
2000 continue;
2001 }
2002 }
2003 break;
2004 case kPath:
2005 if (ch == kEOL ||
2006 ch == '/' ||
2007 special_back_slash ||
2008 (!has_state_override && (ch == '?' || ch == '#'))) {
2009 if (IsDoubleDotSegment(buffer)) {
2010 ShortenUrlPath(url);
2011 if (ch != '/' && !special_back_slash) {
2012 url->flags |= URL_FLAGS_HAS_PATH;
2013 url->path.emplace_back("");
2014 }
2015 } else if (IsSingleDotSegment(buffer) &&
2016 ch != '/' && !special_back_slash) {
2017 url->flags |= URL_FLAGS_HAS_PATH;
2018 url->path.emplace_back("");
2019 } else if (!IsSingleDotSegment(buffer)) {
2020 if (url->scheme == "file:" &&
2021 url->path.empty() &&
2022 buffer.size() == 2 &&
2023 IsWindowsDriveLetter(buffer)) {
2024 if ((url->flags & URL_FLAGS_HAS_HOST) &&
2025 !url->host.empty()) {
2026 url->host.clear();
2027 url->flags |= URL_FLAGS_HAS_HOST;
2028 }
2029 buffer[1] = ':';
2030 }
2031 url->flags |= URL_FLAGS_HAS_PATH;
2032 url->path.emplace_back(std::move(buffer));
2033 }
2034 buffer.clear();
2035 if (url->scheme == "file:" &&
2036 (ch == kEOL ||
2037 ch == '?' ||
2038 ch == '#')) {
2039 while (url->path.size() > 1 && url->path[0].length() == 0) {
2040 url->path.erase(url->path.begin());
2041 }
2042 }
2043 if (ch == '?') {
2044 url->flags |= URL_FLAGS_HAS_QUERY;
2045 state = kQuery;
2046 } else if (ch == '#') {
2047 state = kFragment;
2048 }
2049 } else {
2050 AppendOrEscape(&buffer, ch, PATH_ENCODE_SET);
2051 }
2052 break;
2053 case kCannotBeBase:
2054 switch (ch) {
2055 case '?':
2056 state = kQuery;
2057 break;
2058 case '#':
2059 state = kFragment;
2060 break;
2061 default:
2062 if (url->path.size() == 0)
2063 url->path.push_back("");
2064 if (url->path.size() > 0 && ch != kEOL)
2065 AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET);
2066 }
2067 break;
2068 case kQuery:
2069 if (ch == kEOL || (!has_state_override && ch == '#')) {
2070 url->flags |= URL_FLAGS_HAS_QUERY;
2071 url->query = std::move(buffer);
2072 buffer.clear();
2073 if (ch == '#')
2074 state = kFragment;
2075 } else {
2076 AppendOrEscape(&buffer, ch, special ? QUERY_ENCODE_SET_SPECIAL :
2077 QUERY_ENCODE_SET_NONSPECIAL);
2078 }
2079 break;
2080 case kFragment:
2081 switch (ch) {
2082 case kEOL:
2083 url->flags |= URL_FLAGS_HAS_FRAGMENT;
2084 url->fragment = std::move(buffer);
2085 break;
2086 case 0:
2087 break;
2088 default:
2089 AppendOrEscape(&buffer, ch, FRAGMENT_ENCODE_SET);
2090 }
2091 break;
2092 default:
2093 url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
2094 return;
2095 }
2096
2097 p++;
2098 }
2099 } // NOLINT(readability/fn_size)
2100
SetArgs(Environment * env,Local<Value> argv[ARG_COUNT],const struct url_data & url)2101 static inline void SetArgs(Environment* env,
2102 Local<Value> argv[ARG_COUNT],
2103 const struct url_data& url) {
2104 Isolate* isolate = env->isolate();
2105 argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
2106 argv[ARG_PROTOCOL] = OneByteString(isolate, url.scheme.c_str());
2107 if (url.flags & URL_FLAGS_HAS_USERNAME)
2108 argv[ARG_USERNAME] = Utf8String(isolate, url.username);
2109 if (url.flags & URL_FLAGS_HAS_PASSWORD)
2110 argv[ARG_PASSWORD] = Utf8String(isolate, url.password);
2111 if (url.flags & URL_FLAGS_HAS_HOST)
2112 argv[ARG_HOST] = Utf8String(isolate, url.host);
2113 if (url.flags & URL_FLAGS_HAS_QUERY)
2114 argv[ARG_QUERY] = Utf8String(isolate, url.query);
2115 if (url.flags & URL_FLAGS_HAS_FRAGMENT)
2116 argv[ARG_FRAGMENT] = Utf8String(isolate, url.fragment);
2117 if (url.port > -1)
2118 argv[ARG_PORT] = Integer::New(isolate, url.port);
2119 if (url.flags & URL_FLAGS_HAS_PATH)
2120 argv[ARG_PATH] = ToJSStringArray(env, url.path);
2121 }
2122
Parse(Environment * env,Local<Value> recv,const char * input,const size_t len,enum url_parse_state state_override,Local<Value> base_obj,Local<Value> context_obj,Local<Function> cb,Local<Value> error_cb)2123 static void Parse(Environment* env,
2124 Local<Value> recv,
2125 const char* input,
2126 const size_t len,
2127 enum url_parse_state state_override,
2128 Local<Value> base_obj,
2129 Local<Value> context_obj,
2130 Local<Function> cb,
2131 Local<Value> error_cb) {
2132 Isolate* isolate = env->isolate();
2133 Local<Context> context = env->context();
2134 HandleScope handle_scope(isolate);
2135 Context::Scope context_scope(context);
2136
2137 const bool has_context = context_obj->IsObject();
2138 const bool has_base = base_obj->IsObject();
2139
2140 url_data base;
2141 url_data url;
2142 if (has_context)
2143 url = HarvestContext(env, context_obj.As<Object>());
2144 if (has_base)
2145 base = HarvestBase(env, base_obj.As<Object>());
2146
2147 URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
2148 if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
2149 ((state_override != kUnknownState) &&
2150 (url.flags & URL_FLAGS_TERMINATED)))
2151 return;
2152
2153 // Define the return value placeholders
2154 const Local<Value> undef = Undefined(isolate);
2155 const Local<Value> null = Null(isolate);
2156 if (!(url.flags & URL_FLAGS_FAILED)) {
2157 Local<Value> argv[] = {
2158 undef,
2159 undef,
2160 undef,
2161 undef,
2162 null, // host defaults to null
2163 null, // port defaults to null
2164 undef,
2165 null, // query defaults to null
2166 null, // fragment defaults to null
2167 };
2168 SetArgs(env, argv, url);
2169 cb->Call(context, recv, arraysize(argv), argv).FromMaybe(Local<Value>());
2170 } else if (error_cb->IsFunction()) {
2171 Local<Value> argv[2] = { undef, undef };
2172 argv[ERR_ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
2173 argv[ERR_ARG_INPUT] =
2174 String::NewFromUtf8(env->isolate(),
2175 input,
2176 NewStringType::kNormal).ToLocalChecked();
2177 error_cb.As<Function>()->Call(context, recv, arraysize(argv), argv)
2178 .FromMaybe(Local<Value>());
2179 }
2180 }
2181
Parse(const FunctionCallbackInfo<Value> & args)2182 static void Parse(const FunctionCallbackInfo<Value>& args) {
2183 Environment* env = Environment::GetCurrent(args);
2184 CHECK_GE(args.Length(), 5);
2185 CHECK(args[0]->IsString()); // input
2186 CHECK(args[2]->IsUndefined() || // base context
2187 args[2]->IsNull() ||
2188 args[2]->IsObject());
2189 CHECK(args[3]->IsUndefined() || // context
2190 args[3]->IsNull() ||
2191 args[3]->IsObject());
2192 CHECK(args[4]->IsFunction()); // complete callback
2193 CHECK(args[5]->IsUndefined() || args[5]->IsFunction()); // error callback
2194
2195 Utf8Value input(env->isolate(), args[0]);
2196 enum url_parse_state state_override = kUnknownState;
2197 if (args[1]->IsNumber()) {
2198 state_override = static_cast<enum url_parse_state>(
2199 args[1]->Uint32Value(env->context()).FromJust());
2200 }
2201
2202 Parse(env, args.This(),
2203 *input, input.length(),
2204 state_override,
2205 args[2],
2206 args[3],
2207 args[4].As<Function>(),
2208 args[5]);
2209 }
2210
EncodeAuthSet(const FunctionCallbackInfo<Value> & args)2211 static void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) {
2212 Environment* env = Environment::GetCurrent(args);
2213 CHECK_GE(args.Length(), 1);
2214 CHECK(args[0]->IsString());
2215 Utf8Value value(env->isolate(), args[0]);
2216 std::string output;
2217 const size_t len = value.length();
2218 output.reserve(len);
2219 for (size_t n = 0; n < len; n++) {
2220 const char ch = (*value)[n];
2221 AppendOrEscape(&output, ch, USERINFO_ENCODE_SET);
2222 }
2223 args.GetReturnValue().Set(
2224 String::NewFromUtf8(env->isolate(),
2225 output.c_str(),
2226 NewStringType::kNormal).ToLocalChecked());
2227 }
2228
ToUSVString(const FunctionCallbackInfo<Value> & args)2229 static void ToUSVString(const FunctionCallbackInfo<Value>& args) {
2230 Environment* env = Environment::GetCurrent(args);
2231 CHECK_GE(args.Length(), 2);
2232 CHECK(args[0]->IsString());
2233 CHECK(args[1]->IsNumber());
2234
2235 TwoByteValue value(env->isolate(), args[0]);
2236 const size_t n = value.length();
2237
2238 const int64_t start = args[1]->IntegerValue(env->context()).FromJust();
2239 CHECK_GE(start, 0);
2240
2241 for (size_t i = start; i < n; i++) {
2242 char16_t c = value[i];
2243 if (!IsUnicodeSurrogate(c)) {
2244 continue;
2245 } else if (IsUnicodeSurrogateTrail(c) || i == n - 1) {
2246 value[i] = kUnicodeReplacementCharacter;
2247 } else {
2248 char16_t d = value[i + 1];
2249 if (IsUnicodeTrail(d)) {
2250 i++;
2251 } else {
2252 value[i] = kUnicodeReplacementCharacter;
2253 }
2254 }
2255 }
2256
2257 args.GetReturnValue().Set(
2258 String::NewFromTwoByte(env->isolate(),
2259 *value,
2260 NewStringType::kNormal,
2261 n).ToLocalChecked());
2262 }
2263
DomainToASCII(const FunctionCallbackInfo<Value> & args)2264 static void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
2265 Environment* env = Environment::GetCurrent(args);
2266 CHECK_GE(args.Length(), 1);
2267 CHECK(args[0]->IsString());
2268 Utf8Value value(env->isolate(), args[0]);
2269
2270 URLHost host;
2271 // Assuming the host is used for a special scheme.
2272 host.ParseHost(*value, value.length(), true);
2273 if (host.ParsingFailed()) {
2274 args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
2275 return;
2276 }
2277 std::string out = host.ToStringMove();
2278 args.GetReturnValue().Set(
2279 String::NewFromUtf8(env->isolate(),
2280 out.c_str(),
2281 NewStringType::kNormal).ToLocalChecked());
2282 }
2283
DomainToUnicode(const FunctionCallbackInfo<Value> & args)2284 static void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
2285 Environment* env = Environment::GetCurrent(args);
2286 CHECK_GE(args.Length(), 1);
2287 CHECK(args[0]->IsString());
2288 Utf8Value value(env->isolate(), args[0]);
2289
2290 URLHost host;
2291 // Assuming the host is used for a special scheme.
2292 host.ParseHost(*value, value.length(), true, true);
2293 if (host.ParsingFailed()) {
2294 args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
2295 return;
2296 }
2297 std::string out = host.ToStringMove();
2298 args.GetReturnValue().Set(
2299 String::NewFromUtf8(env->isolate(),
2300 out.c_str(),
2301 NewStringType::kNormal).ToLocalChecked());
2302 }
2303
ToFilePath() const2304 std::string URL::ToFilePath() const {
2305 if (context_.scheme != "file:") {
2306 return "";
2307 }
2308
2309 #ifdef _WIN32
2310 const char* slash = "\\";
2311 auto is_slash = [] (char ch) {
2312 return ch == '/' || ch == '\\';
2313 };
2314 #else
2315 const char* slash = "/";
2316 auto is_slash = [] (char ch) {
2317 return ch == '/';
2318 };
2319 if ((context_.flags & URL_FLAGS_HAS_HOST) &&
2320 context_.host.length() > 0) {
2321 return "";
2322 }
2323 #endif
2324 std::string decoded_path;
2325 for (const std::string& part : context_.path) {
2326 std::string decoded = PercentDecode(part.c_str(), part.length());
2327 for (char& ch : decoded) {
2328 if (is_slash(ch)) {
2329 return "";
2330 }
2331 }
2332 decoded_path += slash + decoded;
2333 }
2334
2335 #ifdef _WIN32
2336 // TODO(TimothyGu): Use "\\?\" long paths on Windows.
2337
2338 // If hostname is set, then we have a UNC path. Pass the hostname through
2339 // ToUnicode just in case it is an IDN using punycode encoding. We do not
2340 // need to worry about percent encoding because the URL parser will have
2341 // already taken care of that for us. Note that this only causes IDNs with an
2342 // appropriate `xn--` prefix to be decoded.
2343 if ((context_.flags & URL_FLAGS_HAS_HOST) &&
2344 context_.host.length() > 0) {
2345 std::string unicode_host;
2346 if (!ToUnicode(context_.host, &unicode_host)) {
2347 return "";
2348 }
2349 return "\\\\" + unicode_host + decoded_path;
2350 }
2351 // Otherwise, it's a local path that requires a drive letter.
2352 if (decoded_path.length() < 3) {
2353 return "";
2354 }
2355 if (decoded_path[2] != ':' ||
2356 !IsASCIIAlpha(decoded_path[1])) {
2357 return "";
2358 }
2359 // Strip out the leading '\'.
2360 return decoded_path.substr(1);
2361 #else
2362 return decoded_path;
2363 #endif
2364 }
2365
FromFilePath(const std::string & file_path)2366 URL URL::FromFilePath(const std::string& file_path) {
2367 URL url("file://");
2368 std::string escaped_file_path;
2369 for (size_t i = 0; i < file_path.length(); ++i) {
2370 escaped_file_path += file_path[i];
2371 if (file_path[i] == '%')
2372 escaped_file_path += "25";
2373 }
2374 URL::Parse(escaped_file_path.c_str(), escaped_file_path.length(), kPathStart,
2375 &url.context_, true, nullptr, false);
2376 return url;
2377 }
2378
2379 // This function works by calling out to a JS function that creates and
2380 // returns the JS URL object. Be mindful of the JS<->Native boundary
2381 // crossing that is required.
ToObject(Environment * env) const2382 const Local<Value> URL::ToObject(Environment* env) const {
2383 Isolate* isolate = env->isolate();
2384 Local<Context> context = env->context();
2385 Context::Scope context_scope(context);
2386
2387 const Local<Value> undef = Undefined(isolate);
2388 const Local<Value> null = Null(isolate);
2389
2390 if (context_.flags & URL_FLAGS_FAILED)
2391 return Local<Value>();
2392
2393 Local<Value> argv[] = {
2394 undef,
2395 undef,
2396 undef,
2397 undef,
2398 null, // host defaults to null
2399 null, // port defaults to null
2400 undef,
2401 null, // query defaults to null
2402 null, // fragment defaults to null
2403 };
2404 SetArgs(env, argv, context_);
2405
2406 MaybeLocal<Value> ret;
2407 {
2408 FatalTryCatch try_catch(env);
2409
2410 // The SetURLConstructor method must have been called already to
2411 // set the constructor function used below. SetURLConstructor is
2412 // called automatically when the internal/url.js module is loaded
2413 // during the internal/bootstrap/node.js processing.
2414 ret = env->url_constructor_function()
2415 ->Call(env->context(), undef, arraysize(argv), argv);
2416 }
2417
2418 return ret.ToLocalChecked();
2419 }
2420
SetURLConstructor(const FunctionCallbackInfo<Value> & args)2421 static void SetURLConstructor(const FunctionCallbackInfo<Value>& args) {
2422 Environment* env = Environment::GetCurrent(args);
2423 CHECK_EQ(args.Length(), 1);
2424 CHECK(args[0]->IsFunction());
2425 env->set_url_constructor_function(args[0].As<Function>());
2426 }
2427
Initialize(Local<Object> target,Local<Value> unused,Local<Context> context,void * priv)2428 static void Initialize(Local<Object> target,
2429 Local<Value> unused,
2430 Local<Context> context,
2431 void* priv) {
2432 Environment* env = Environment::GetCurrent(context);
2433 env->SetMethod(target, "parse", Parse);
2434 env->SetMethodNoSideEffect(target, "encodeAuth", EncodeAuthSet);
2435 env->SetMethodNoSideEffect(target, "toUSVString", ToUSVString);
2436 env->SetMethodNoSideEffect(target, "domainToASCII", DomainToASCII);
2437 env->SetMethodNoSideEffect(target, "domainToUnicode", DomainToUnicode);
2438 env->SetMethod(target, "setURLConstructor", SetURLConstructor);
2439
2440 #define XX(name, _) NODE_DEFINE_CONSTANT(target, name);
2441 FLAGS(XX)
2442 #undef XX
2443
2444 #define XX(name) NODE_DEFINE_CONSTANT(target, name);
2445 PARSESTATES(XX)
2446 #undef XX
2447 }
2448 } // namespace url
2449 } // namespace node
2450
2451 NODE_BUILTIN_MODULE_CONTEXT_AWARE(url, node::url::Initialize)
2452