1 #include "node_url.h"
2 #include "node_internals.h"
3 #include "base_object-inl.h"
4 #include "node_i18n.h"
5 
6 #include <string>
7 #include <vector>
8 #include <stdio.h>
9 #include <cmath>
10 
11 namespace node {
12 
13 using v8::Array;
14 using v8::Context;
15 using v8::Function;
16 using v8::FunctionCallbackInfo;
17 using v8::HandleScope;
18 using v8::Int32;
19 using v8::Integer;
20 using v8::Isolate;
21 using v8::Local;
22 using v8::MaybeLocal;
23 using v8::NewStringType;
24 using v8::Null;
25 using v8::Object;
26 using v8::String;
27 using v8::TryCatch;
28 using v8::Undefined;
29 using v8::Value;
30 
Utf8String(Isolate * isolate,const std::string & str)31 inline Local<String> Utf8String(Isolate* isolate, const std::string& str) {
32   return String::NewFromUtf8(isolate,
33                              str.data(),
34                              NewStringType::kNormal,
35                              str.length()).ToLocalChecked();
36 }
37 
38 namespace url {
39 
40 namespace {
41 
42 // https://url.spec.whatwg.org/#eof-code-point
43 const char kEOL = -1;
44 
45 // Used in ToUSVString().
46 const char16_t kUnicodeReplacementCharacter = 0xFFFD;
47 
48 // https://url.spec.whatwg.org/#concept-host
49 class URLHost {
50  public:
51   ~URLHost();
52 
53   void ParseIPv4Host(const char* input, size_t length, bool* is_ipv4);
54   void ParseIPv6Host(const char* input, size_t length);
55   void ParseOpaqueHost(const char* input, size_t length);
56   void ParseHost(const char* input,
57                  size_t length,
58                  bool is_special,
59                  bool unicode = false);
60 
ParsingFailed() const61   inline bool ParsingFailed() const { return type_ == HostType::H_FAILED; }
62   std::string ToString() const;
63   // Like ToString(), but avoids a copy in exchange for invalidating `*this`.
64   std::string ToStringMove();
65 
66  private:
67   enum class HostType {
68     H_FAILED,
69     H_DOMAIN,
70     H_IPV4,
71     H_IPV6,
72     H_OPAQUE,
73   };
74 
75   union Value {
76     std::string domain_or_opaque;
77     uint32_t ipv4;
78     uint16_t ipv6[8];
79 
~Value()80     ~Value() {}
Value()81     Value() : ipv4(0) {}
82   };
83 
84   Value value_;
85   HostType type_ = HostType::H_FAILED;
86 
Reset()87   inline void Reset() {
88     using string = std::string;
89     switch (type_) {
90       case HostType::H_DOMAIN:
91       case HostType::H_OPAQUE:
92         value_.domain_or_opaque.~string();
93         break;
94       default:
95         break;
96     }
97     type_ = HostType::H_FAILED;
98   }
99 
100   // Setting the string members of the union with = is brittle because
101   // it relies on them being initialized to a state that requires no
102   // destruction of old data.
103   // For a long time, that worked well enough because ParseIPv6Host() happens
104   // to zero-fill `value_`, but that really is relying on standard library
105   // internals too much.
106   // These helpers are the easiest solution but we might want to consider
107   // just not forcing strings into an union.
SetOpaque(std::string && string)108   inline void SetOpaque(std::string&& string) {
109     Reset();
110     type_ = HostType::H_OPAQUE;
111     new(&value_.domain_or_opaque) std::string(std::move(string));
112   }
113 
SetDomain(std::string && string)114   inline void SetDomain(std::string&& string) {
115     Reset();
116     type_ = HostType::H_DOMAIN;
117     new(&value_.domain_or_opaque) std::string(std::move(string));
118   }
119 };
120 
~URLHost()121 URLHost::~URLHost() {
122   Reset();
123 }
124 
125 #define ARGS(XX)                                                              \
126   XX(ARG_FLAGS)                                                               \
127   XX(ARG_PROTOCOL)                                                            \
128   XX(ARG_USERNAME)                                                            \
129   XX(ARG_PASSWORD)                                                            \
130   XX(ARG_HOST)                                                                \
131   XX(ARG_PORT)                                                                \
132   XX(ARG_PATH)                                                                \
133   XX(ARG_QUERY)                                                               \
134   XX(ARG_FRAGMENT)                                                            \
135   XX(ARG_COUNT)  // This one has to be last.
136 
137 #define ERR_ARGS(XX)                                                          \
138   XX(ERR_ARG_FLAGS)                                                           \
139   XX(ERR_ARG_INPUT)                                                           \
140 
141 enum url_cb_args {
142 #define XX(name) name,
143   ARGS(XX)
144 #undef XX
145 };
146 
147 enum url_error_cb_args {
148 #define XX(name) name,
149   ERR_ARGS(XX)
150 #undef XX
151 };
152 
153 #define CHAR_TEST(bits, name, expr)                                           \
154   template <typename T>                                                       \
155   inline bool name(const T ch) {                                              \
156     static_assert(sizeof(ch) >= (bits) / 8,                                   \
157                   "Character must be wider than " #bits " bits");             \
158     return (expr);                                                            \
159   }
160 
161 #define TWO_CHAR_STRING_TEST(bits, name, expr)                                \
162   template <typename T>                                                       \
163   inline bool name(const T ch1, const T ch2) {                                \
164     static_assert(sizeof(ch1) >= (bits) / 8,                                  \
165                   "Character must be wider than " #bits " bits");             \
166     return (expr);                                                            \
167   }                                                                           \
168   template <typename T>                                                       \
169   inline bool name(const std::basic_string<T>& str) {                         \
170     static_assert(sizeof(str[0]) >= (bits) / 8,                               \
171                   "Character must be wider than " #bits " bits");             \
172     return str.length() >= 2 && name(str[0], str[1]);                         \
173   }
174 
175 // https://infra.spec.whatwg.org/#ascii-tab-or-newline
176 CHAR_TEST(8, IsASCIITabOrNewline, (ch == '\t' || ch == '\n' || ch == '\r'))
177 
178 // https://infra.spec.whatwg.org/#c0-control-or-space
179 CHAR_TEST(8, IsC0ControlOrSpace, (ch >= '\0' && ch <= ' '))
180 
181 // https://infra.spec.whatwg.org/#ascii-digit
182 CHAR_TEST(8, IsASCIIDigit, (ch >= '0' && ch <= '9'))
183 
184 // https://infra.spec.whatwg.org/#ascii-hex-digit
185 CHAR_TEST(8, IsASCIIHexDigit, (IsASCIIDigit(ch) ||
186                                (ch >= 'A' && ch <= 'F') ||
187                                (ch >= 'a' && ch <= 'f')))
188 
189 // https://infra.spec.whatwg.org/#ascii-alpha
190 CHAR_TEST(8, IsASCIIAlpha, ((ch >= 'A' && ch <= 'Z') ||
191                             (ch >= 'a' && ch <= 'z')))
192 
193 // https://infra.spec.whatwg.org/#ascii-alphanumeric
194 CHAR_TEST(8, IsASCIIAlphanumeric, (IsASCIIDigit(ch) || IsASCIIAlpha(ch)))
195 
196 // https://infra.spec.whatwg.org/#ascii-lowercase
197 template <typename T>
ASCIILowercase(T ch)198 inline T ASCIILowercase(T ch) {
199   return IsASCIIAlpha(ch) ? (ch | 0x20) : ch;
200 }
201 
202 // https://url.spec.whatwg.org/#forbidden-host-code-point
203 CHAR_TEST(8, IsForbiddenHostCodePoint,
204           ch == '\0' || ch == '\t' || ch == '\n' || ch == '\r' ||
205           ch == ' ' || ch == '#' || ch == '%' || ch == '/' ||
206           ch == ':' || ch == '?' || ch == '@' || ch == '[' ||
207           ch == '\\' || ch == ']')
208 
209 // https://url.spec.whatwg.org/#windows-drive-letter
210 TWO_CHAR_STRING_TEST(8, IsWindowsDriveLetter,
211                      (IsASCIIAlpha(ch1) && (ch2 == ':' || ch2 == '|')))
212 
213 // https://url.spec.whatwg.org/#normalized-windows-drive-letter
214 TWO_CHAR_STRING_TEST(8, IsNormalizedWindowsDriveLetter,
215                      (IsASCIIAlpha(ch1) && ch2 == ':'))
216 
217 // If a UTF-16 character is a low/trailing surrogate.
218 CHAR_TEST(16, IsUnicodeTrail, (ch & 0xFC00) == 0xDC00)
219 
220 // If a UTF-16 character is a surrogate.
221 CHAR_TEST(16, IsUnicodeSurrogate, (ch & 0xF800) == 0xD800)
222 
223 // If a UTF-16 surrogate is a low/trailing one.
224 CHAR_TEST(16, IsUnicodeSurrogateTrail, (ch & 0x400) != 0)
225 
226 #undef CHAR_TEST
227 #undef TWO_CHAR_STRING_TEST
228 
229 const char* hex[256] = {
230   "%00", "%01", "%02", "%03", "%04", "%05", "%06", "%07",
231   "%08", "%09", "%0A", "%0B", "%0C", "%0D", "%0E", "%0F",
232   "%10", "%11", "%12", "%13", "%14", "%15", "%16", "%17",
233   "%18", "%19", "%1A", "%1B", "%1C", "%1D", "%1E", "%1F",
234   "%20", "%21", "%22", "%23", "%24", "%25", "%26", "%27",
235   "%28", "%29", "%2A", "%2B", "%2C", "%2D", "%2E", "%2F",
236   "%30", "%31", "%32", "%33", "%34", "%35", "%36", "%37",
237   "%38", "%39", "%3A", "%3B", "%3C", "%3D", "%3E", "%3F",
238   "%40", "%41", "%42", "%43", "%44", "%45", "%46", "%47",
239   "%48", "%49", "%4A", "%4B", "%4C", "%4D", "%4E", "%4F",
240   "%50", "%51", "%52", "%53", "%54", "%55", "%56", "%57",
241   "%58", "%59", "%5A", "%5B", "%5C", "%5D", "%5E", "%5F",
242   "%60", "%61", "%62", "%63", "%64", "%65", "%66", "%67",
243   "%68", "%69", "%6A", "%6B", "%6C", "%6D", "%6E", "%6F",
244   "%70", "%71", "%72", "%73", "%74", "%75", "%76", "%77",
245   "%78", "%79", "%7A", "%7B", "%7C", "%7D", "%7E", "%7F",
246   "%80", "%81", "%82", "%83", "%84", "%85", "%86", "%87",
247   "%88", "%89", "%8A", "%8B", "%8C", "%8D", "%8E", "%8F",
248   "%90", "%91", "%92", "%93", "%94", "%95", "%96", "%97",
249   "%98", "%99", "%9A", "%9B", "%9C", "%9D", "%9E", "%9F",
250   "%A0", "%A1", "%A2", "%A3", "%A4", "%A5", "%A6", "%A7",
251   "%A8", "%A9", "%AA", "%AB", "%AC", "%AD", "%AE", "%AF",
252   "%B0", "%B1", "%B2", "%B3", "%B4", "%B5", "%B6", "%B7",
253   "%B8", "%B9", "%BA", "%BB", "%BC", "%BD", "%BE", "%BF",
254   "%C0", "%C1", "%C2", "%C3", "%C4", "%C5", "%C6", "%C7",
255   "%C8", "%C9", "%CA", "%CB", "%CC", "%CD", "%CE", "%CF",
256   "%D0", "%D1", "%D2", "%D3", "%D4", "%D5", "%D6", "%D7",
257   "%D8", "%D9", "%DA", "%DB", "%DC", "%DD", "%DE", "%DF",
258   "%E0", "%E1", "%E2", "%E3", "%E4", "%E5", "%E6", "%E7",
259   "%E8", "%E9", "%EA", "%EB", "%EC", "%ED", "%EE", "%EF",
260   "%F0", "%F1", "%F2", "%F3", "%F4", "%F5", "%F6", "%F7",
261   "%F8", "%F9", "%FA", "%FB", "%FC", "%FD", "%FE", "%FF"
262 };
263 
264 const uint8_t C0_CONTROL_ENCODE_SET[32] = {
265   // 00     01     02     03     04     05     06     07
266     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
267   // 08     09     0A     0B     0C     0D     0E     0F
268     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
269   // 10     11     12     13     14     15     16     17
270     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
271   // 18     19     1A     1B     1C     1D     1E     1F
272     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
273   // 20     21     22     23     24     25     26     27
274     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
275   // 28     29     2A     2B     2C     2D     2E     2F
276     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
277   // 30     31     32     33     34     35     36     37
278     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
279   // 38     39     3A     3B     3C     3D     3E     3F
280     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
281   // 40     41     42     43     44     45     46     47
282     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
283   // 48     49     4A     4B     4C     4D     4E     4F
284     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
285   // 50     51     52     53     54     55     56     57
286     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
287   // 58     59     5A     5B     5C     5D     5E     5F
288     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
289   // 60     61     62     63     64     65     66     67
290     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
291   // 68     69     6A     6B     6C     6D     6E     6F
292     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
293   // 70     71     72     73     74     75     76     77
294     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
295   // 78     79     7A     7B     7C     7D     7E     7F
296     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
297   // 80     81     82     83     84     85     86     87
298     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
299   // 88     89     8A     8B     8C     8D     8E     8F
300     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
301   // 90     91     92     93     94     95     96     97
302     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
303   // 98     99     9A     9B     9C     9D     9E     9F
304     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
305   // A0     A1     A2     A3     A4     A5     A6     A7
306     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
307   // A8     A9     AA     AB     AC     AD     AE     AF
308     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
309   // B0     B1     B2     B3     B4     B5     B6     B7
310     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
311   // B8     B9     BA     BB     BC     BD     BE     BF
312     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
313   // C0     C1     C2     C3     C4     C5     C6     C7
314     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
315   // C8     C9     CA     CB     CC     CD     CE     CF
316     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
317   // D0     D1     D2     D3     D4     D5     D6     D7
318     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
319   // D8     D9     DA     DB     DC     DD     DE     DF
320     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
321   // E0     E1     E2     E3     E4     E5     E6     E7
322     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
323   // E8     E9     EA     EB     EC     ED     EE     EF
324     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
325   // F0     F1     F2     F3     F4     F5     F6     F7
326     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
327   // F8     F9     FA     FB     FC     FD     FE     FF
328     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
329 };
330 
331 const uint8_t FRAGMENT_ENCODE_SET[32] = {
332   // 00     01     02     03     04     05     06     07
333     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
334   // 08     09     0A     0B     0C     0D     0E     0F
335     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
336   // 10     11     12     13     14     15     16     17
337     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
338   // 18     19     1A     1B     1C     1D     1E     1F
339     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
340   // 20     21     22     23     24     25     26     27
341     0x01 | 0x00 | 0x04 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
342   // 28     29     2A     2B     2C     2D     2E     2F
343     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
344   // 30     31     32     33     34     35     36     37
345     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
346   // 38     39     3A     3B     3C     3D     3E     3F
347     0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
348   // 40     41     42     43     44     45     46     47
349     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
350   // 48     49     4A     4B     4C     4D     4E     4F
351     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
352   // 50     51     52     53     54     55     56     57
353     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
354   // 58     59     5A     5B     5C     5D     5E     5F
355     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
356   // 60     61     62     63     64     65     66     67
357     0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
358   // 68     69     6A     6B     6C     6D     6E     6F
359     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
360   // 70     71     72     73     74     75     76     77
361     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
362   // 78     79     7A     7B     7C     7D     7E     7F
363     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
364   // 80     81     82     83     84     85     86     87
365     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
366   // 88     89     8A     8B     8C     8D     8E     8F
367     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
368   // 90     91     92     93     94     95     96     97
369     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
370   // 98     99     9A     9B     9C     9D     9E     9F
371     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
372   // A0     A1     A2     A3     A4     A5     A6     A7
373     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
374   // A8     A9     AA     AB     AC     AD     AE     AF
375     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
376   // B0     B1     B2     B3     B4     B5     B6     B7
377     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
378   // B8     B9     BA     BB     BC     BD     BE     BF
379     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
380   // C0     C1     C2     C3     C4     C5     C6     C7
381     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
382   // C8     C9     CA     CB     CC     CD     CE     CF
383     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
384   // D0     D1     D2     D3     D4     D5     D6     D7
385     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
386   // D8     D9     DA     DB     DC     DD     DE     DF
387     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
388   // E0     E1     E2     E3     E4     E5     E6     E7
389     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
390   // E8     E9     EA     EB     EC     ED     EE     EF
391     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
392   // F0     F1     F2     F3     F4     F5     F6     F7
393     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
394   // F8     F9     FA     FB     FC     FD     FE     FF
395     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
396 };
397 
398 
399 const uint8_t PATH_ENCODE_SET[32] = {
400   // 00     01     02     03     04     05     06     07
401     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
402   // 08     09     0A     0B     0C     0D     0E     0F
403     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
404   // 10     11     12     13     14     15     16     17
405     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
406   // 18     19     1A     1B     1C     1D     1E     1F
407     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
408   // 20     21     22     23     24     25     26     27
409     0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
410   // 28     29     2A     2B     2C     2D     2E     2F
411     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
412   // 30     31     32     33     34     35     36     37
413     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
414   // 38     39     3A     3B     3C     3D     3E     3F
415     0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x80,
416   // 40     41     42     43     44     45     46     47
417     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
418   // 48     49     4A     4B     4C     4D     4E     4F
419     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
420   // 50     51     52     53     54     55     56     57
421     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
422   // 58     59     5A     5B     5C     5D     5E     5F
423     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
424   // 60     61     62     63     64     65     66     67
425     0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
426   // 68     69     6A     6B     6C     6D     6E     6F
427     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
428   // 70     71     72     73     74     75     76     77
429     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
430   // 78     79     7A     7B     7C     7D     7E     7F
431     0x00 | 0x00 | 0x00 | 0x08 | 0x00 | 0x20 | 0x00 | 0x80,
432   // 80     81     82     83     84     85     86     87
433     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
434   // 88     89     8A     8B     8C     8D     8E     8F
435     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
436   // 90     91     92     93     94     95     96     97
437     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
438   // 98     99     9A     9B     9C     9D     9E     9F
439     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
440   // A0     A1     A2     A3     A4     A5     A6     A7
441     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
442   // A8     A9     AA     AB     AC     AD     AE     AF
443     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
444   // B0     B1     B2     B3     B4     B5     B6     B7
445     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
446   // B8     B9     BA     BB     BC     BD     BE     BF
447     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
448   // C0     C1     C2     C3     C4     C5     C6     C7
449     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
450   // C8     C9     CA     CB     CC     CD     CE     CF
451     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
452   // D0     D1     D2     D3     D4     D5     D6     D7
453     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
454   // D8     D9     DA     DB     DC     DD     DE     DF
455     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
456   // E0     E1     E2     E3     E4     E5     E6     E7
457     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
458   // E8     E9     EA     EB     EC     ED     EE     EF
459     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
460   // F0     F1     F2     F3     F4     F5     F6     F7
461     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
462   // F8     F9     FA     FB     FC     FD     FE     FF
463     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
464 };
465 
466 const uint8_t USERINFO_ENCODE_SET[32] = {
467   // 00     01     02     03     04     05     06     07
468     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
469   // 08     09     0A     0B     0C     0D     0E     0F
470     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
471   // 10     11     12     13     14     15     16     17
472     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
473   // 18     19     1A     1B     1C     1D     1E     1F
474     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
475   // 20     21     22     23     24     25     26     27
476     0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
477   // 28     29     2A     2B     2C     2D     2E     2F
478     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
479   // 30     31     32     33     34     35     36     37
480     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
481   // 38     39     3A     3B     3C     3D     3E     3F
482     0x00 | 0x00 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
483   // 40     41     42     43     44     45     46     47
484     0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
485   // 48     49     4A     4B     4C     4D     4E     4F
486     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
487   // 50     51     52     53     54     55     56     57
488     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
489   // 58     59     5A     5B     5C     5D     5E     5F
490     0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x40 | 0x00,
491   // 60     61     62     63     64     65     66     67
492     0x01 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
493   // 68     69     6A     6B     6C     6D     6E     6F
494     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
495   // 70     71     72     73     74     75     76     77
496     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
497   // 78     79     7A     7B     7C     7D     7E     7F
498     0x00 | 0x00 | 0x00 | 0x08 | 0x10 | 0x20 | 0x00 | 0x80,
499   // 80     81     82     83     84     85     86     87
500     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
501   // 88     89     8A     8B     8C     8D     8E     8F
502     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
503   // 90     91     92     93     94     95     96     97
504     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
505   // 98     99     9A     9B     9C     9D     9E     9F
506     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
507   // A0     A1     A2     A3     A4     A5     A6     A7
508     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
509   // A8     A9     AA     AB     AC     AD     AE     AF
510     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
511   // B0     B1     B2     B3     B4     B5     B6     B7
512     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
513   // B8     B9     BA     BB     BC     BD     BE     BF
514     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
515   // C0     C1     C2     C3     C4     C5     C6     C7
516     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
517   // C8     C9     CA     CB     CC     CD     CE     CF
518     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
519   // D0     D1     D2     D3     D4     D5     D6     D7
520     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
521   // D8     D9     DA     DB     DC     DD     DE     DF
522     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
523   // E0     E1     E2     E3     E4     E5     E6     E7
524     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
525   // E8     E9     EA     EB     EC     ED     EE     EF
526     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
527   // F0     F1     F2     F3     F4     F5     F6     F7
528     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
529   // F8     F9     FA     FB     FC     FD     FE     FF
530     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
531 };
532 
533 const uint8_t QUERY_ENCODE_SET_NONSPECIAL[32] = {
534   // 00     01     02     03     04     05     06     07
535     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
536   // 08     09     0A     0B     0C     0D     0E     0F
537     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
538   // 10     11     12     13     14     15     16     17
539     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
540   // 18     19     1A     1B     1C     1D     1E     1F
541     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
542   // 20     21     22     23     24     25     26     27
543     0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x00,
544   // 28     29     2A     2B     2C     2D     2E     2F
545     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
546   // 30     31     32     33     34     35     36     37
547     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
548   // 38     39     3A     3B     3C     3D     3E     3F
549     0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
550   // 40     41     42     43     44     45     46     47
551     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
552   // 48     49     4A     4B     4C     4D     4E     4F
553     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
554   // 50     51     52     53     54     55     56     57
555     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
556   // 58     59     5A     5B     5C     5D     5E     5F
557     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
558   // 60     61     62     63     64     65     66     67
559     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
560   // 68     69     6A     6B     6C     6D     6E     6F
561     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
562   // 70     71     72     73     74     75     76     77
563     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
564   // 78     79     7A     7B     7C     7D     7E     7F
565     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
566   // 80     81     82     83     84     85     86     87
567     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
568   // 88     89     8A     8B     8C     8D     8E     8F
569     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
570   // 90     91     92     93     94     95     96     97
571     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
572   // 98     99     9A     9B     9C     9D     9E     9F
573     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
574   // A0     A1     A2     A3     A4     A5     A6     A7
575     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
576   // A8     A9     AA     AB     AC     AD     AE     AF
577     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
578   // B0     B1     B2     B3     B4     B5     B6     B7
579     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
580   // B8     B9     BA     BB     BC     BD     BE     BF
581     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
582   // C0     C1     C2     C3     C4     C5     C6     C7
583     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
584   // C8     C9     CA     CB     CC     CD     CE     CF
585     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
586   // D0     D1     D2     D3     D4     D5     D6     D7
587     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
588   // D8     D9     DA     DB     DC     DD     DE     DF
589     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
590   // E0     E1     E2     E3     E4     E5     E6     E7
591     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
592   // E8     E9     EA     EB     EC     ED     EE     EF
593     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
594   // F0     F1     F2     F3     F4     F5     F6     F7
595     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
596   // F8     F9     FA     FB     FC     FD     FE     FF
597     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
598 };
599 
600 // Same as QUERY_ENCODE_SET_NONSPECIAL, but with 0x27 (') encoded.
601 const uint8_t QUERY_ENCODE_SET_SPECIAL[32] = {
602   // 00     01     02     03     04     05     06     07
603     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
604   // 08     09     0A     0B     0C     0D     0E     0F
605     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
606   // 10     11     12     13     14     15     16     17
607     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
608   // 18     19     1A     1B     1C     1D     1E     1F
609     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
610   // 20     21     22     23     24     25     26     27
611     0x01 | 0x00 | 0x04 | 0x08 | 0x00 | 0x00 | 0x00 | 0x80,
612   // 28     29     2A     2B     2C     2D     2E     2F
613     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
614   // 30     31     32     33     34     35     36     37
615     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
616   // 38     39     3A     3B     3C     3D     3E     3F
617     0x00 | 0x00 | 0x00 | 0x00 | 0x10 | 0x00 | 0x40 | 0x00,
618   // 40     41     42     43     44     45     46     47
619     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
620   // 48     49     4A     4B     4C     4D     4E     4F
621     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
622   // 50     51     52     53     54     55     56     57
623     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
624   // 58     59     5A     5B     5C     5D     5E     5F
625     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
626   // 60     61     62     63     64     65     66     67
627     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
628   // 68     69     6A     6B     6C     6D     6E     6F
629     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
630   // 70     71     72     73     74     75     76     77
631     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00,
632   // 78     79     7A     7B     7C     7D     7E     7F
633     0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x00 | 0x80,
634   // 80     81     82     83     84     85     86     87
635     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
636   // 88     89     8A     8B     8C     8D     8E     8F
637     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
638   // 90     91     92     93     94     95     96     97
639     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
640   // 98     99     9A     9B     9C     9D     9E     9F
641     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
642   // A0     A1     A2     A3     A4     A5     A6     A7
643     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
644   // A8     A9     AA     AB     AC     AD     AE     AF
645     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
646   // B0     B1     B2     B3     B4     B5     B6     B7
647     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
648   // B8     B9     BA     BB     BC     BD     BE     BF
649     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
650   // C0     C1     C2     C3     C4     C5     C6     C7
651     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
652   // C8     C9     CA     CB     CC     CD     CE     CF
653     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
654   // D0     D1     D2     D3     D4     D5     D6     D7
655     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
656   // D8     D9     DA     DB     DC     DD     DE     DF
657     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
658   // E0     E1     E2     E3     E4     E5     E6     E7
659     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
660   // E8     E9     EA     EB     EC     ED     EE     EF
661     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
662   // F0     F1     F2     F3     F4     F5     F6     F7
663     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80,
664   // F8     F9     FA     FB     FC     FD     FE     FF
665     0x01 | 0x02 | 0x04 | 0x08 | 0x10 | 0x20 | 0x40 | 0x80
666 };
667 
BitAt(const uint8_t a[],const uint8_t i)668 inline bool BitAt(const uint8_t a[], const uint8_t i) {
669   return !!(a[i >> 3] & (1 << (i & 7)));
670 }
671 
672 // Appends ch to str. If ch position in encode_set is set, the ch will
673 // be percent-encoded then appended.
AppendOrEscape(std::string * str,const unsigned char ch,const uint8_t encode_set[])674 inline void AppendOrEscape(std::string* str,
675                            const unsigned char ch,
676                            const uint8_t encode_set[]) {
677   if (BitAt(encode_set, ch))
678     *str += hex[ch];
679   else
680     *str += ch;
681 }
682 
683 template <typename T>
hex2bin(const T ch)684 inline unsigned hex2bin(const T ch) {
685   if (ch >= '0' && ch <= '9')
686     return ch - '0';
687   if (ch >= 'A' && ch <= 'F')
688     return 10 + (ch - 'A');
689   if (ch >= 'a' && ch <= 'f')
690     return 10 + (ch - 'a');
691   return static_cast<unsigned>(-1);
692 }
693 
PercentDecode(const char * input,size_t len)694 inline std::string PercentDecode(const char* input, size_t len) {
695   std::string dest;
696   if (len == 0)
697     return dest;
698   dest.reserve(len);
699   const char* pointer = input;
700   const char* end = input + len;
701 
702   while (pointer < end) {
703     const char ch = pointer[0];
704     const size_t remaining = end - pointer - 1;
705     if (ch != '%' || remaining < 2 ||
706         (ch == '%' &&
707          (!IsASCIIHexDigit(pointer[1]) ||
708           !IsASCIIHexDigit(pointer[2])))) {
709       dest += ch;
710       pointer++;
711       continue;
712     } else {
713       unsigned a = hex2bin(pointer[1]);
714       unsigned b = hex2bin(pointer[2]);
715       char c = static_cast<char>(a * 16 + b);
716       dest += c;
717       pointer += 3;
718     }
719   }
720   return dest;
721 }
722 
723 #define SPECIALS(XX)                                                          \
724   XX("ftp:", 21)                                                              \
725   XX("file:", -1)                                                             \
726   XX("gopher:", 70)                                                           \
727   XX("http:", 80)                                                             \
728   XX("https:", 443)                                                           \
729   XX("ws:", 80)                                                               \
730   XX("wss:", 443)
731 
IsSpecial(const std::string & scheme)732 inline bool IsSpecial(const std::string& scheme) {
733 #define XX(name, _) if (scheme == name) return true;
734   SPECIALS(XX);
735 #undef XX
736   return false;
737 }
738 
739 // https://url.spec.whatwg.org/#start-with-a-windows-drive-letter
StartsWithWindowsDriveLetter(const char * p,const char * end)740 inline bool StartsWithWindowsDriveLetter(const char* p, const char* end) {
741   const size_t length = end - p;
742   return length >= 2 &&
743     IsWindowsDriveLetter(p[0], p[1]) &&
744     (length == 2 ||
745       p[2] == '/' ||
746       p[2] == '\\' ||
747       p[2] == '?' ||
748       p[2] == '#');
749 }
750 
NormalizePort(const std::string & scheme,int p)751 inline int NormalizePort(const std::string& scheme, int p) {
752 #define XX(name, port) if (scheme == name && p == port) return -1;
753   SPECIALS(XX);
754 #undef XX
755   return p;
756 }
757 
758 #if defined(NODE_HAVE_I18N_SUPPORT)
ToUnicode(const std::string & input,std::string * output)759 inline bool ToUnicode(const std::string& input, std::string* output) {
760   MaybeStackBuffer<char> buf;
761   if (i18n::ToUnicode(&buf, input.c_str(), input.length()) < 0)
762     return false;
763   output->assign(*buf, buf.length());
764   return true;
765 }
766 
ToASCII(const std::string & input,std::string * output)767 inline bool ToASCII(const std::string& input, std::string* output) {
768   MaybeStackBuffer<char> buf;
769   if (i18n::ToASCII(&buf, input.c_str(), input.length()) < 0)
770     return false;
771   output->assign(*buf, buf.length());
772   return true;
773 }
774 #else
775 // Intentional non-ops if ICU is not present.
ToUnicode(const std::string & input,std::string * output)776 inline bool ToUnicode(const std::string& input, std::string* output) {
777   *output = input;
778   return true;
779 }
780 
ToASCII(const std::string & input,std::string * output)781 inline bool ToASCII(const std::string& input, std::string* output) {
782   *output = input;
783   return true;
784 }
785 #endif
786 
ParseIPv6Host(const char * input,size_t length)787 void URLHost::ParseIPv6Host(const char* input, size_t length) {
788   CHECK_EQ(type_, HostType::H_FAILED);
789   unsigned size = arraysize(value_.ipv6);
790   for (unsigned n = 0; n < size; n++)
791     value_.ipv6[n] = 0;
792   uint16_t* piece_pointer = &value_.ipv6[0];
793   uint16_t* const buffer_end = piece_pointer + size;
794   uint16_t* compress_pointer = nullptr;
795   const char* pointer = input;
796   const char* end = pointer + length;
797   unsigned value, len, numbers_seen;
798   char ch = pointer < end ? pointer[0] : kEOL;
799   if (ch == ':') {
800     if (length < 2 || pointer[1] != ':')
801       return;
802     pointer += 2;
803     ch = pointer < end ? pointer[0] : kEOL;
804     piece_pointer++;
805     compress_pointer = piece_pointer;
806   }
807   while (ch != kEOL) {
808     if (piece_pointer >= buffer_end)
809       return;
810     if (ch == ':') {
811       if (compress_pointer != nullptr)
812         return;
813       pointer++;
814       ch = pointer < end ? pointer[0] : kEOL;
815       piece_pointer++;
816       compress_pointer = piece_pointer;
817       continue;
818     }
819     value = 0;
820     len = 0;
821     while (len < 4 && IsASCIIHexDigit(ch)) {
822       value = value * 0x10 + hex2bin(ch);
823       pointer++;
824       ch = pointer < end ? pointer[0] : kEOL;
825       len++;
826     }
827     switch (ch) {
828       case '.':
829         if (len == 0)
830           return;
831         pointer -= len;
832         ch = pointer < end ? pointer[0] : kEOL;
833         if (piece_pointer > buffer_end - 2)
834           return;
835         numbers_seen = 0;
836         while (ch != kEOL) {
837           value = 0xffffffff;
838           if (numbers_seen > 0) {
839             if (ch == '.' && numbers_seen < 4) {
840               pointer++;
841               ch = pointer < end ? pointer[0] : kEOL;
842             } else {
843               return;
844             }
845           }
846           if (!IsASCIIDigit(ch))
847             return;
848           while (IsASCIIDigit(ch)) {
849             unsigned number = ch - '0';
850             if (value == 0xffffffff) {
851               value = number;
852             } else if (value == 0) {
853               return;
854             } else {
855               value = value * 10 + number;
856             }
857             if (value > 255)
858               return;
859             pointer++;
860             ch = pointer < end ? pointer[0] : kEOL;
861           }
862           *piece_pointer = *piece_pointer * 0x100 + value;
863           numbers_seen++;
864           if (numbers_seen == 2 || numbers_seen == 4)
865             piece_pointer++;
866         }
867         if (numbers_seen != 4)
868           return;
869         continue;
870       case ':':
871         pointer++;
872         ch = pointer < end ? pointer[0] : kEOL;
873         if (ch == kEOL)
874           return;
875         break;
876       case kEOL:
877         break;
878       default:
879         return;
880     }
881     *piece_pointer = value;
882     piece_pointer++;
883   }
884 
885   if (compress_pointer != nullptr) {
886     unsigned swaps = piece_pointer - compress_pointer;
887     piece_pointer = buffer_end - 1;
888     while (piece_pointer != &value_.ipv6[0] && swaps > 0) {
889       uint16_t temp = *piece_pointer;
890       uint16_t* swap_piece = compress_pointer + swaps - 1;
891       *piece_pointer = *swap_piece;
892       *swap_piece = temp;
893        piece_pointer--;
894        swaps--;
895     }
896   } else if (compress_pointer == nullptr &&
897              piece_pointer != buffer_end) {
898     return;
899   }
900   type_ = HostType::H_IPV6;
901 }
902 
ParseNumber(const char * start,const char * end)903 inline int64_t ParseNumber(const char* start, const char* end) {
904   unsigned R = 10;
905   if (end - start >= 2 && start[0] == '0' && (start[1] | 0x20) == 'x') {
906     start += 2;
907     R = 16;
908   }
909   if (end - start == 0) {
910     return 0;
911   } else if (R == 10 && end - start > 1 && start[0] == '0') {
912     start++;
913     R = 8;
914   }
915   const char* p = start;
916 
917   while (p < end) {
918     const char ch = p[0];
919     switch (R) {
920       case 8:
921         if (ch < '0' || ch > '7')
922           return -1;
923         break;
924       case 10:
925         if (!IsASCIIDigit(ch))
926           return -1;
927         break;
928       case 16:
929         if (!IsASCIIHexDigit(ch))
930           return -1;
931         break;
932     }
933     p++;
934   }
935   return strtoll(start, nullptr, R);
936 }
937 
ParseIPv4Host(const char * input,size_t length,bool * is_ipv4)938 void URLHost::ParseIPv4Host(const char* input, size_t length, bool* is_ipv4) {
939   CHECK_EQ(type_, HostType::H_FAILED);
940   *is_ipv4 = false;
941   const char* pointer = input;
942   const char* mark = input;
943   const char* end = pointer + length;
944   int parts = 0;
945   uint32_t val = 0;
946   uint64_t numbers[4];
947   int tooBigNumbers = 0;
948   if (length == 0)
949     return;
950 
951   while (pointer <= end) {
952     const char ch = pointer < end ? pointer[0] : kEOL;
953     const int remaining = end - pointer - 1;
954     if (ch == '.' || ch == kEOL) {
955       if (++parts > static_cast<int>(arraysize(numbers)))
956         return;
957       if (pointer == mark)
958         return;
959       int64_t n = ParseNumber(mark, pointer);
960       if (n < 0)
961         return;
962 
963       if (n > 255) {
964         tooBigNumbers++;
965       }
966       numbers[parts - 1] = n;
967       mark = pointer + 1;
968       if (ch == '.' && remaining == 0)
969         break;
970     }
971     pointer++;
972   }
973   CHECK_GT(parts, 0);
974   *is_ipv4 = true;
975 
976   // If any but the last item in numbers is greater than 255, return failure.
977   // If the last item in numbers is greater than or equal to
978   // 256^(5 - the number of items in numbers), return failure.
979   if (tooBigNumbers > 1 ||
980       (tooBigNumbers == 1 && numbers[parts - 1] <= 255) ||
981       numbers[parts - 1] >= pow(256, static_cast<double>(5 - parts))) {
982     return;
983   }
984 
985   type_ = HostType::H_IPV4;
986   val = numbers[parts - 1];
987   for (int n = 0; n < parts - 1; n++) {
988     double b = 3 - n;
989     val += numbers[n] * pow(256, b);
990   }
991 
992   value_.ipv4 = val;
993 }
994 
ParseOpaqueHost(const char * input,size_t length)995 void URLHost::ParseOpaqueHost(const char* input, size_t length) {
996   CHECK_EQ(type_, HostType::H_FAILED);
997   std::string output;
998   output.reserve(length);
999   for (size_t i = 0; i < length; i++) {
1000     const char ch = input[i];
1001     if (ch != '%' && IsForbiddenHostCodePoint(ch)) {
1002       return;
1003     } else {
1004       AppendOrEscape(&output, ch, C0_CONTROL_ENCODE_SET);
1005     }
1006   }
1007 
1008   SetOpaque(std::move(output));
1009 }
1010 
ParseHost(const char * input,size_t length,bool is_special,bool unicode)1011 void URLHost::ParseHost(const char* input,
1012                         size_t length,
1013                         bool is_special,
1014                         bool unicode) {
1015   CHECK_EQ(type_, HostType::H_FAILED);
1016   const char* pointer = input;
1017 
1018   if (length == 0)
1019     return;
1020 
1021   if (pointer[0] == '[') {
1022     if (pointer[length - 1] != ']')
1023       return;
1024     return ParseIPv6Host(++pointer, length - 2);
1025   }
1026 
1027   if (!is_special)
1028     return ParseOpaqueHost(input, length);
1029 
1030   // First, we have to percent decode
1031   std::string decoded = PercentDecode(input, length);
1032 
1033   // Then we have to punycode toASCII
1034   if (!ToASCII(decoded, &decoded))
1035     return;
1036 
1037   // If any of the following characters are still present, we have to fail
1038   for (size_t n = 0; n < decoded.size(); n++) {
1039     const char ch = decoded[n];
1040     if (IsForbiddenHostCodePoint(ch)) {
1041       return;
1042     }
1043   }
1044 
1045   // Check to see if it's an IPv4 IP address
1046   bool is_ipv4;
1047   ParseIPv4Host(decoded.c_str(), decoded.length(), &is_ipv4);
1048   if (is_ipv4)
1049     return;
1050 
1051   // If the unicode flag is set, run the result through punycode ToUnicode
1052   if (unicode && !ToUnicode(decoded, &decoded))
1053     return;
1054 
1055   // It's not an IPv4 or IPv6 address, it must be a domain
1056   SetDomain(std::move(decoded));
1057 }
1058 
1059 // Locates the longest sequence of 0 segments in an IPv6 address
1060 // in order to use the :: compression when serializing
1061 template <typename T>
FindLongestZeroSequence(T * values,size_t len)1062 inline T* FindLongestZeroSequence(T* values, size_t len) {
1063   T* start = values;
1064   T* end = start + len;
1065   T* result = nullptr;
1066 
1067   T* current = nullptr;
1068   unsigned counter = 0, longest = 1;
1069 
1070   while (start < end) {
1071     if (*start == 0) {
1072       if (current == nullptr)
1073         current = start;
1074       counter++;
1075     } else {
1076       if (counter > longest) {
1077         longest = counter;
1078         result = current;
1079       }
1080       counter = 0;
1081       current = nullptr;
1082     }
1083     start++;
1084   }
1085   if (counter > longest)
1086     result = current;
1087   return result;
1088 }
1089 
ToStringMove()1090 std::string URLHost::ToStringMove() {
1091   std::string return_value;
1092   switch (type_) {
1093     case HostType::H_DOMAIN:
1094     case HostType::H_OPAQUE:
1095       return_value = std::move(value_.domain_or_opaque);
1096       break;
1097     default:
1098       return_value = ToString();
1099       break;
1100   }
1101   Reset();
1102   return return_value;
1103 }
1104 
ToString() const1105 std::string URLHost::ToString() const {
1106   std::string dest;
1107   switch (type_) {
1108     case HostType::H_DOMAIN:
1109     case HostType::H_OPAQUE:
1110       return value_.domain_or_opaque;
1111       break;
1112     case HostType::H_IPV4: {
1113       dest.reserve(15);
1114       uint32_t value = value_.ipv4;
1115       for (int n = 0; n < 4; n++) {
1116         char buf[4];
1117         snprintf(buf, sizeof(buf), "%d", value % 256);
1118         dest.insert(0, buf);
1119         if (n < 3)
1120           dest.insert(0, 1, '.');
1121         value /= 256;
1122       }
1123       break;
1124     }
1125     case HostType::H_IPV6: {
1126       dest.reserve(41);
1127       dest += '[';
1128       const uint16_t* start = &value_.ipv6[0];
1129       const uint16_t* compress_pointer =
1130           FindLongestZeroSequence(start, 8);
1131       bool ignore0 = false;
1132       for (int n = 0; n <= 7; n++) {
1133         const uint16_t* piece = &value_.ipv6[n];
1134         if (ignore0 && *piece == 0)
1135           continue;
1136         else if (ignore0)
1137           ignore0 = false;
1138         if (compress_pointer == piece) {
1139           dest += n == 0 ? "::" : ":";
1140           ignore0 = true;
1141           continue;
1142         }
1143         char buf[5];
1144         snprintf(buf, sizeof(buf), "%x", *piece);
1145         dest += buf;
1146         if (n < 7)
1147           dest += ':';
1148       }
1149       dest += ']';
1150       break;
1151     }
1152     case HostType::H_FAILED:
1153       break;
1154   }
1155   return dest;
1156 }
1157 
ParseHost(const std::string & input,std::string * output,bool is_special,bool unicode=false)1158 bool ParseHost(const std::string& input,
1159                std::string* output,
1160                bool is_special,
1161                bool unicode = false) {
1162   if (input.length() == 0) {
1163     output->clear();
1164     return true;
1165   }
1166   URLHost host;
1167   host.ParseHost(input.c_str(), input.length(), is_special, unicode);
1168   if (host.ParsingFailed())
1169     return false;
1170   *output = host.ToStringMove();
1171   return true;
1172 }
1173 
FromJSStringArray(Environment * env,Local<Array> array)1174 inline std::vector<std::string> FromJSStringArray(Environment* env,
1175                                                   Local<Array> array) {
1176   std::vector<std::string> vec;
1177   const int32_t len = array->Length();
1178   if (len == 0)
1179     return vec;  // nothing to copy
1180   vec.reserve(len);
1181   for (int32_t n = 0; n < len; n++) {
1182     Local<Value> val = array->Get(env->context(), n).ToLocalChecked();
1183     if (val->IsString()) {
1184       Utf8Value value(env->isolate(), val.As<String>());
1185       vec.emplace_back(*value, value.length());
1186     }
1187   }
1188   return vec;
1189 }
1190 
ToJSStringArray(Environment * env,const std::vector<std::string> & vec)1191 inline Local<Array> ToJSStringArray(Environment* env,
1192                                     const std::vector<std::string>& vec) {
1193   Isolate* isolate = env->isolate();
1194   Local<Array> array = Array::New(isolate, vec.size());
1195   for (size_t n = 0; n < vec.size(); n++)
1196     array->Set(env->context(), n, Utf8String(isolate, vec[n])).FromJust();
1197   return array;
1198 }
1199 
HarvestBase(Environment * env,Local<Object> base_obj)1200 inline url_data HarvestBase(Environment* env, Local<Object> base_obj) {
1201   url_data base;
1202   Local<Context> context = env->context();
1203   Local<Value> flags =
1204       base_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
1205   if (flags->IsInt32())
1206     base.flags = flags->Int32Value(context).FromJust();
1207 
1208   Local<Value> scheme =
1209       base_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
1210   base.scheme = Utf8Value(env->isolate(), scheme).out();
1211 
1212   auto GetStr = [&](std::string url_data::*member,
1213                     int flag,
1214                     Local<String> name,
1215                     bool empty_as_present) {
1216     Local<Value> value = base_obj->Get(env->context(), name).ToLocalChecked();
1217     if (value->IsString()) {
1218       Utf8Value utf8value(env->isolate(), value.As<String>());
1219       (base.*member).assign(*utf8value, utf8value.length());
1220       if (empty_as_present || value.As<String>()->Length() != 0) {
1221         base.flags |= flag;
1222       }
1223     }
1224   };
1225   GetStr(&url_data::username,
1226          URL_FLAGS_HAS_USERNAME,
1227          env->username_string(),
1228          false);
1229   GetStr(&url_data::password,
1230          URL_FLAGS_HAS_PASSWORD,
1231          env->password_string(),
1232          false);
1233   GetStr(&url_data::host, URL_FLAGS_HAS_HOST, env->host_string(), true);
1234   GetStr(&url_data::query, URL_FLAGS_HAS_QUERY, env->query_string(), true);
1235   GetStr(&url_data::fragment,
1236          URL_FLAGS_HAS_FRAGMENT,
1237          env->fragment_string(),
1238          true);
1239 
1240   Local<Value> port =
1241       base_obj->Get(env->context(), env->port_string()).ToLocalChecked();
1242   if (port->IsInt32())
1243     base.port = port.As<Int32>()->Value();
1244 
1245   Local<Value>
1246       path = base_obj->Get(env->context(), env->path_string()).ToLocalChecked();
1247   if (path->IsArray()) {
1248     base.flags |= URL_FLAGS_HAS_PATH;
1249     base.path = FromJSStringArray(env, path.As<Array>());
1250   }
1251   return base;
1252 }
1253 
HarvestContext(Environment * env,Local<Object> context_obj)1254 inline url_data HarvestContext(Environment* env, Local<Object> context_obj) {
1255   url_data context;
1256   Local<Value> flags =
1257       context_obj->Get(env->context(), env->flags_string()).ToLocalChecked();
1258   if (flags->IsInt32()) {
1259     static const int32_t copy_flags_mask =
1260         URL_FLAGS_SPECIAL |
1261         URL_FLAGS_CANNOT_BE_BASE |
1262         URL_FLAGS_HAS_USERNAME |
1263         URL_FLAGS_HAS_PASSWORD |
1264         URL_FLAGS_HAS_HOST;
1265     context.flags |= flags.As<Int32>()->Value() & copy_flags_mask;
1266   }
1267   Local<Value> scheme =
1268       context_obj->Get(env->context(), env->scheme_string()).ToLocalChecked();
1269   if (scheme->IsString()) {
1270     Utf8Value value(env->isolate(), scheme);
1271     context.scheme.assign(*value, value.length());
1272   }
1273   Local<Value> port =
1274       context_obj->Get(env->context(), env->port_string()).ToLocalChecked();
1275   if (port->IsInt32())
1276     context.port = port.As<Int32>()->Value();
1277   if (context.flags & URL_FLAGS_HAS_USERNAME) {
1278     Local<Value> username =
1279         context_obj->Get(env->context(),
1280                          env->username_string()).ToLocalChecked();
1281     CHECK(username->IsString());
1282     Utf8Value value(env->isolate(), username);
1283     context.username.assign(*value, value.length());
1284   }
1285   if (context.flags & URL_FLAGS_HAS_PASSWORD) {
1286     Local<Value> password =
1287         context_obj->Get(env->context(),
1288                          env->password_string()).ToLocalChecked();
1289     CHECK(password->IsString());
1290     Utf8Value value(env->isolate(), password);
1291     context.password.assign(*value, value.length());
1292   }
1293   Local<Value> host =
1294       context_obj->Get(env->context(),
1295                        env->host_string()).ToLocalChecked();
1296   if (host->IsString()) {
1297     Utf8Value value(env->isolate(), host);
1298     context.host.assign(*value, value.length());
1299   }
1300   return context;
1301 }
1302 
1303 // Single dot segment can be ".", "%2e", or "%2E"
IsSingleDotSegment(const std::string & str)1304 inline bool IsSingleDotSegment(const std::string& str) {
1305   switch (str.size()) {
1306     case 1:
1307       return str == ".";
1308     case 3:
1309       return str[0] == '%' &&
1310              str[1] == '2' &&
1311              ASCIILowercase(str[2]) == 'e';
1312     default:
1313       return false;
1314   }
1315 }
1316 
1317 // Double dot segment can be:
1318 //   "..", ".%2e", ".%2E", "%2e.", "%2E.",
1319 //   "%2e%2e", "%2E%2E", "%2e%2E", or "%2E%2e"
IsDoubleDotSegment(const std::string & str)1320 inline bool IsDoubleDotSegment(const std::string& str) {
1321   switch (str.size()) {
1322     case 2:
1323       return str == "..";
1324     case 4:
1325       if (str[0] != '.' && str[0] != '%')
1326         return false;
1327       return ((str[0] == '.' &&
1328                str[1] == '%' &&
1329                str[2] == '2' &&
1330                ASCIILowercase(str[3]) == 'e') ||
1331               (str[0] == '%' &&
1332                str[1] == '2' &&
1333                ASCIILowercase(str[2]) == 'e' &&
1334                str[3] == '.'));
1335     case 6:
1336       return (str[0] == '%' &&
1337               str[1] == '2' &&
1338               ASCIILowercase(str[2]) == 'e' &&
1339               str[3] == '%' &&
1340               str[4] == '2' &&
1341               ASCIILowercase(str[5]) == 'e');
1342     default:
1343       return false;
1344   }
1345 }
1346 
ShortenUrlPath(struct url_data * url)1347 inline void ShortenUrlPath(struct url_data* url) {
1348   if (url->path.empty()) return;
1349   if (url->path.size() == 1 && url->scheme == "file:" &&
1350       IsNormalizedWindowsDriveLetter(url->path[0])) return;
1351   url->path.pop_back();
1352 }
1353 
1354 }  // anonymous namespace
1355 
Parse(const char * input,size_t len,enum url_parse_state state_override,struct url_data * url,bool has_url,const struct url_data * base,bool has_base)1356 void URL::Parse(const char* input,
1357                 size_t len,
1358                 enum url_parse_state state_override,
1359                 struct url_data* url,
1360                 bool has_url,
1361                 const struct url_data* base,
1362                 bool has_base) {
1363   const char* p = input;
1364   const char* end = input + len;
1365 
1366   if (!has_url) {
1367     for (const char* ptr = p; ptr < end; ptr++) {
1368       if (IsC0ControlOrSpace(*ptr))
1369         p++;
1370       else
1371         break;
1372     }
1373     for (const char* ptr = end - 1; ptr >= p; ptr--) {
1374       if (IsC0ControlOrSpace(*ptr))
1375         end--;
1376       else
1377         break;
1378     }
1379     input = p;
1380     len = end - p;
1381   }
1382 
1383   // The spec says we should strip out any ASCII tabs or newlines.
1384   // In those cases, we create another std::string instance with the filtered
1385   // contents, but in the general case we avoid the overhead.
1386   std::string whitespace_stripped;
1387   for (const char* ptr = p; ptr < end; ptr++) {
1388     if (!IsASCIITabOrNewline(*ptr))
1389       continue;
1390     // Hit tab or newline. Allocate storage, copy what we have until now,
1391     // and then iterate and filter all similar characters out.
1392     whitespace_stripped.reserve(len - 1);
1393     whitespace_stripped.assign(p, ptr - p);
1394     // 'ptr + 1' skips the current char, which we know to be tab or newline.
1395     for (ptr = ptr + 1; ptr < end; ptr++) {
1396       if (!IsASCIITabOrNewline(*ptr))
1397         whitespace_stripped += *ptr;
1398     }
1399 
1400     // Update variables like they should have looked like if the string
1401     // had been stripped of whitespace to begin with.
1402     input = whitespace_stripped.c_str();
1403     len = whitespace_stripped.size();
1404     p = input;
1405     end = input + len;
1406     break;
1407   }
1408 
1409   bool atflag = false;  // Set when @ has been seen.
1410   bool square_bracket_flag = false;  // Set inside of [...]
1411   bool password_token_seen_flag = false;  // Set after a : after an username.
1412 
1413   std::string buffer;
1414 
1415   // Set the initial parse state.
1416   const bool has_state_override = state_override != kUnknownState;
1417   enum url_parse_state state = has_state_override ? state_override :
1418                                                     kSchemeStart;
1419 
1420   if (state < kSchemeStart || state > kFragment) {
1421     url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
1422     return;
1423   }
1424 
1425   while (p <= end) {
1426     const char ch = p < end ? p[0] : kEOL;
1427     bool special = (url->flags & URL_FLAGS_SPECIAL);
1428     bool cannot_be_base;
1429     const bool special_back_slash = (special && ch == '\\');
1430 
1431     switch (state) {
1432       case kSchemeStart:
1433         if (IsASCIIAlpha(ch)) {
1434           buffer += ASCIILowercase(ch);
1435           state = kScheme;
1436         } else if (!has_state_override) {
1437           state = kNoScheme;
1438           continue;
1439         } else {
1440           url->flags |= URL_FLAGS_FAILED;
1441           return;
1442         }
1443         break;
1444       case kScheme:
1445         if (IsASCIIAlphanumeric(ch) || ch == '+' || ch == '-' || ch == '.') {
1446           buffer += ASCIILowercase(ch);
1447         } else if (ch == ':' || (has_state_override && ch == kEOL)) {
1448           if (has_state_override && buffer.size() == 0) {
1449             url->flags |= URL_FLAGS_TERMINATED;
1450             return;
1451           }
1452           buffer += ':';
1453 
1454           bool new_is_special = IsSpecial(buffer);
1455 
1456           if (has_state_override) {
1457             if ((special != new_is_special) ||
1458                 ((buffer == "file:") &&
1459                  ((url->flags & URL_FLAGS_HAS_USERNAME) ||
1460                   (url->flags & URL_FLAGS_HAS_PASSWORD) ||
1461                   (url->port != -1)))) {
1462               url->flags |= URL_FLAGS_TERMINATED;
1463               return;
1464             }
1465 
1466             // File scheme && (host == empty or null) check left to JS-land
1467             // as it can be done before even entering C++ binding.
1468           }
1469 
1470           url->scheme = std::move(buffer);
1471           url->port = NormalizePort(url->scheme, url->port);
1472           if (new_is_special) {
1473             url->flags |= URL_FLAGS_SPECIAL;
1474             special = true;
1475           } else {
1476             url->flags &= ~URL_FLAGS_SPECIAL;
1477             special = false;
1478           }
1479           buffer.clear();
1480           if (has_state_override)
1481             return;
1482           if (url->scheme == "file:") {
1483             state = kFile;
1484           } else if (special &&
1485                      has_base &&
1486                      url->scheme == base->scheme) {
1487             state = kSpecialRelativeOrAuthority;
1488           } else if (special) {
1489             state = kSpecialAuthoritySlashes;
1490           } else if (p[1] == '/') {
1491             state = kPathOrAuthority;
1492             p++;
1493           } else {
1494             url->flags |= URL_FLAGS_CANNOT_BE_BASE;
1495             url->flags |= URL_FLAGS_HAS_PATH;
1496             url->path.emplace_back("");
1497             state = kCannotBeBase;
1498           }
1499         } else if (!has_state_override) {
1500           buffer.clear();
1501           state = kNoScheme;
1502           p = input;
1503           continue;
1504         } else {
1505           url->flags |= URL_FLAGS_FAILED;
1506           return;
1507         }
1508         break;
1509       case kNoScheme:
1510         cannot_be_base = has_base && (base->flags & URL_FLAGS_CANNOT_BE_BASE);
1511         if (!has_base || (cannot_be_base && ch != '#')) {
1512           url->flags |= URL_FLAGS_FAILED;
1513           return;
1514         } else if (cannot_be_base && ch == '#') {
1515           url->scheme = base->scheme;
1516           if (IsSpecial(url->scheme)) {
1517             url->flags |= URL_FLAGS_SPECIAL;
1518             special = true;
1519           } else {
1520             url->flags &= ~URL_FLAGS_SPECIAL;
1521             special = false;
1522           }
1523           if (base->flags & URL_FLAGS_HAS_PATH) {
1524             url->flags |= URL_FLAGS_HAS_PATH;
1525             url->path = base->path;
1526           }
1527           if (base->flags & URL_FLAGS_HAS_QUERY) {
1528             url->flags |= URL_FLAGS_HAS_QUERY;
1529             url->query = base->query;
1530           }
1531           if (base->flags & URL_FLAGS_HAS_FRAGMENT) {
1532             url->flags |= URL_FLAGS_HAS_FRAGMENT;
1533             url->fragment = base->fragment;
1534           }
1535           url->flags |= URL_FLAGS_CANNOT_BE_BASE;
1536           state = kFragment;
1537         } else if (has_base &&
1538                    base->scheme != "file:") {
1539           state = kRelative;
1540           continue;
1541         } else {
1542           url->scheme = "file:";
1543           url->flags |= URL_FLAGS_SPECIAL;
1544           special = true;
1545           state = kFile;
1546           continue;
1547         }
1548         break;
1549       case kSpecialRelativeOrAuthority:
1550         if (ch == '/' && p[1] == '/') {
1551           state = kSpecialAuthorityIgnoreSlashes;
1552           p++;
1553         } else {
1554           state = kRelative;
1555           continue;
1556         }
1557         break;
1558       case kPathOrAuthority:
1559         if (ch == '/') {
1560           state = kAuthority;
1561         } else {
1562           state = kPath;
1563           continue;
1564         }
1565         break;
1566       case kRelative:
1567         url->scheme = base->scheme;
1568         if (IsSpecial(url->scheme)) {
1569           url->flags |= URL_FLAGS_SPECIAL;
1570           special = true;
1571         } else {
1572           url->flags &= ~URL_FLAGS_SPECIAL;
1573           special = false;
1574         }
1575         switch (ch) {
1576           case kEOL:
1577             if (base->flags & URL_FLAGS_HAS_USERNAME) {
1578               url->flags |= URL_FLAGS_HAS_USERNAME;
1579               url->username = base->username;
1580             }
1581             if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1582               url->flags |= URL_FLAGS_HAS_PASSWORD;
1583               url->password = base->password;
1584             }
1585             if (base->flags & URL_FLAGS_HAS_HOST) {
1586               url->flags |= URL_FLAGS_HAS_HOST;
1587               url->host = base->host;
1588             }
1589             if (base->flags & URL_FLAGS_HAS_QUERY) {
1590               url->flags |= URL_FLAGS_HAS_QUERY;
1591               url->query = base->query;
1592             }
1593             if (base->flags & URL_FLAGS_HAS_PATH) {
1594               url->flags |= URL_FLAGS_HAS_PATH;
1595               url->path = base->path;
1596             }
1597             url->port = base->port;
1598             break;
1599           case '/':
1600             state = kRelativeSlash;
1601             break;
1602           case '?':
1603             if (base->flags & URL_FLAGS_HAS_USERNAME) {
1604               url->flags |= URL_FLAGS_HAS_USERNAME;
1605               url->username = base->username;
1606             }
1607             if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1608               url->flags |= URL_FLAGS_HAS_PASSWORD;
1609               url->password = base->password;
1610             }
1611             if (base->flags & URL_FLAGS_HAS_HOST) {
1612               url->flags |= URL_FLAGS_HAS_HOST;
1613               url->host = base->host;
1614             }
1615             if (base->flags & URL_FLAGS_HAS_PATH) {
1616               url->flags |= URL_FLAGS_HAS_PATH;
1617               url->path = base->path;
1618             }
1619             url->port = base->port;
1620             state = kQuery;
1621             break;
1622           case '#':
1623             if (base->flags & URL_FLAGS_HAS_USERNAME) {
1624               url->flags |= URL_FLAGS_HAS_USERNAME;
1625               url->username = base->username;
1626             }
1627             if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1628               url->flags |= URL_FLAGS_HAS_PASSWORD;
1629               url->password = base->password;
1630             }
1631             if (base->flags & URL_FLAGS_HAS_HOST) {
1632               url->flags |= URL_FLAGS_HAS_HOST;
1633               url->host = base->host;
1634             }
1635             if (base->flags & URL_FLAGS_HAS_QUERY) {
1636               url->flags |= URL_FLAGS_HAS_QUERY;
1637               url->query = base->query;
1638             }
1639             if (base->flags & URL_FLAGS_HAS_PATH) {
1640               url->flags |= URL_FLAGS_HAS_PATH;
1641               url->path = base->path;
1642             }
1643             url->port = base->port;
1644             state = kFragment;
1645             break;
1646           default:
1647             if (special_back_slash) {
1648               state = kRelativeSlash;
1649             } else {
1650               if (base->flags & URL_FLAGS_HAS_USERNAME) {
1651                 url->flags |= URL_FLAGS_HAS_USERNAME;
1652                 url->username = base->username;
1653               }
1654               if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1655                 url->flags |= URL_FLAGS_HAS_PASSWORD;
1656                 url->password = base->password;
1657               }
1658               if (base->flags & URL_FLAGS_HAS_HOST) {
1659                 url->flags |= URL_FLAGS_HAS_HOST;
1660                 url->host = base->host;
1661               }
1662               if (base->flags & URL_FLAGS_HAS_PATH) {
1663                 url->flags |= URL_FLAGS_HAS_PATH;
1664                 url->path = base->path;
1665                 ShortenUrlPath(url);
1666               }
1667               url->port = base->port;
1668               state = kPath;
1669               continue;
1670             }
1671         }
1672         break;
1673       case kRelativeSlash:
1674         if (IsSpecial(url->scheme) && (ch == '/' || ch == '\\')) {
1675           state = kSpecialAuthorityIgnoreSlashes;
1676         } else if (ch == '/') {
1677           state = kAuthority;
1678         } else {
1679           if (base->flags & URL_FLAGS_HAS_USERNAME) {
1680             url->flags |= URL_FLAGS_HAS_USERNAME;
1681             url->username = base->username;
1682           }
1683           if (base->flags & URL_FLAGS_HAS_PASSWORD) {
1684             url->flags |= URL_FLAGS_HAS_PASSWORD;
1685             url->password = base->password;
1686           }
1687           if (base->flags & URL_FLAGS_HAS_HOST) {
1688             url->flags |= URL_FLAGS_HAS_HOST;
1689             url->host = base->host;
1690           }
1691           url->port = base->port;
1692           state = kPath;
1693           continue;
1694         }
1695         break;
1696       case kSpecialAuthoritySlashes:
1697         state = kSpecialAuthorityIgnoreSlashes;
1698         if (ch == '/' && p[1] == '/') {
1699           p++;
1700         } else {
1701           continue;
1702         }
1703         break;
1704       case kSpecialAuthorityIgnoreSlashes:
1705         if (ch != '/' && ch != '\\') {
1706           state = kAuthority;
1707           continue;
1708         }
1709         break;
1710       case kAuthority:
1711         if (ch == '@') {
1712           if (atflag) {
1713             buffer.reserve(buffer.size() + 3);
1714             buffer.insert(0, "%40");
1715           }
1716           atflag = true;
1717           const size_t blen = buffer.size();
1718           if (blen > 0 && buffer[0] != ':') {
1719             url->flags |= URL_FLAGS_HAS_USERNAME;
1720           }
1721           for (size_t n = 0; n < blen; n++) {
1722             const char bch = buffer[n];
1723             if (bch == ':') {
1724               url->flags |= URL_FLAGS_HAS_PASSWORD;
1725               if (!password_token_seen_flag) {
1726                 password_token_seen_flag = true;
1727                 continue;
1728               }
1729             }
1730             if (password_token_seen_flag) {
1731               AppendOrEscape(&url->password, bch, USERINFO_ENCODE_SET);
1732             } else {
1733               AppendOrEscape(&url->username, bch, USERINFO_ENCODE_SET);
1734             }
1735           }
1736           buffer.clear();
1737         } else if (ch == kEOL ||
1738                    ch == '/' ||
1739                    ch == '?' ||
1740                    ch == '#' ||
1741                    special_back_slash) {
1742           if (atflag && buffer.size() == 0) {
1743             url->flags |= URL_FLAGS_FAILED;
1744             return;
1745           }
1746           p -= buffer.size() + 1;
1747           buffer.clear();
1748           state = kHost;
1749         } else {
1750           buffer += ch;
1751         }
1752         break;
1753       case kHost:
1754       case kHostname:
1755         if (has_state_override && url->scheme == "file:") {
1756           state = kFileHost;
1757           continue;
1758         } else if (ch == ':' && !square_bracket_flag) {
1759           if (buffer.size() == 0) {
1760             url->flags |= URL_FLAGS_FAILED;
1761             return;
1762           }
1763           url->flags |= URL_FLAGS_HAS_HOST;
1764           if (!ParseHost(buffer, &url->host, special)) {
1765             url->flags |= URL_FLAGS_FAILED;
1766             return;
1767           }
1768           buffer.clear();
1769           state = kPort;
1770           if (state_override == kHostname) {
1771             return;
1772           }
1773         } else if (ch == kEOL ||
1774                    ch == '/' ||
1775                    ch == '?' ||
1776                    ch == '#' ||
1777                    special_back_slash) {
1778           p--;
1779           if (special && buffer.size() == 0) {
1780             url->flags |= URL_FLAGS_FAILED;
1781             return;
1782           }
1783           if (has_state_override &&
1784               buffer.size() == 0 &&
1785               ((url->username.size() > 0 || url->password.size() > 0) ||
1786                url->port != -1)) {
1787             url->flags |= URL_FLAGS_TERMINATED;
1788             return;
1789           }
1790           url->flags |= URL_FLAGS_HAS_HOST;
1791           if (!ParseHost(buffer, &url->host, special)) {
1792             url->flags |= URL_FLAGS_FAILED;
1793             return;
1794           }
1795           buffer.clear();
1796           state = kPathStart;
1797           if (has_state_override) {
1798             return;
1799           }
1800         } else {
1801           if (ch == '[')
1802             square_bracket_flag = true;
1803           if (ch == ']')
1804             square_bracket_flag = false;
1805           buffer += ch;
1806         }
1807         break;
1808       case kPort:
1809         if (IsASCIIDigit(ch)) {
1810           buffer += ch;
1811         } else if (has_state_override ||
1812                    ch == kEOL ||
1813                    ch == '/' ||
1814                    ch == '?' ||
1815                    ch == '#' ||
1816                    special_back_slash) {
1817           if (buffer.size() > 0) {
1818             unsigned port = 0;
1819             // the condition port <= 0xffff prevents integer overflow
1820             for (size_t i = 0; port <= 0xffff && i < buffer.size(); i++)
1821               port = port * 10 + buffer[i] - '0';
1822             if (port > 0xffff) {
1823               // TODO(TimothyGu): This hack is currently needed for the host
1824               // setter since it needs access to hostname if it is valid, and
1825               // if the FAILED flag is set the entire response to JS layer
1826               // will be empty.
1827               if (state_override == kHost)
1828                 url->port = -1;
1829               else
1830                 url->flags |= URL_FLAGS_FAILED;
1831               return;
1832             }
1833             // the port is valid
1834             url->port = NormalizePort(url->scheme, static_cast<int>(port));
1835             if (url->port == -1)
1836               url->flags |= URL_FLAGS_IS_DEFAULT_SCHEME_PORT;
1837             buffer.clear();
1838           } else if (has_state_override) {
1839             // TODO(TimothyGu): Similar case as above.
1840             if (state_override == kHost)
1841               url->port = -1;
1842             else
1843               url->flags |= URL_FLAGS_TERMINATED;
1844             return;
1845           }
1846           state = kPathStart;
1847           continue;
1848         } else {
1849           url->flags |= URL_FLAGS_FAILED;
1850           return;
1851         }
1852         break;
1853       case kFile:
1854         url->scheme = "file:";
1855         if (ch == '/' || ch == '\\') {
1856           state = kFileSlash;
1857         } else if (has_base && base->scheme == "file:") {
1858           switch (ch) {
1859             case kEOL:
1860               if (base->flags & URL_FLAGS_HAS_HOST) {
1861                 url->flags |= URL_FLAGS_HAS_HOST;
1862                 url->host = base->host;
1863               }
1864               if (base->flags & URL_FLAGS_HAS_PATH) {
1865                 url->flags |= URL_FLAGS_HAS_PATH;
1866                 url->path = base->path;
1867               }
1868               if (base->flags & URL_FLAGS_HAS_QUERY) {
1869                 url->flags |= URL_FLAGS_HAS_QUERY;
1870                 url->query = base->query;
1871               }
1872               break;
1873             case '?':
1874               if (base->flags & URL_FLAGS_HAS_HOST) {
1875                 url->flags |= URL_FLAGS_HAS_HOST;
1876                 url->host = base->host;
1877               }
1878               if (base->flags & URL_FLAGS_HAS_PATH) {
1879                 url->flags |= URL_FLAGS_HAS_PATH;
1880                 url->path = base->path;
1881               }
1882               url->flags |= URL_FLAGS_HAS_QUERY;
1883               url->query.clear();
1884               state = kQuery;
1885               break;
1886             case '#':
1887               if (base->flags & URL_FLAGS_HAS_HOST) {
1888                 url->flags |= URL_FLAGS_HAS_HOST;
1889                 url->host = base->host;
1890               }
1891               if (base->flags & URL_FLAGS_HAS_PATH) {
1892                 url->flags |= URL_FLAGS_HAS_PATH;
1893                 url->path = base->path;
1894               }
1895               if (base->flags & URL_FLAGS_HAS_QUERY) {
1896                 url->flags |= URL_FLAGS_HAS_QUERY;
1897                 url->query = base->query;
1898               }
1899               url->flags |= URL_FLAGS_HAS_FRAGMENT;
1900               url->fragment.clear();
1901               state = kFragment;
1902               break;
1903             default:
1904               if (!StartsWithWindowsDriveLetter(p, end)) {
1905                 if (base->flags & URL_FLAGS_HAS_HOST) {
1906                   url->flags |= URL_FLAGS_HAS_HOST;
1907                   url->host = base->host;
1908                 }
1909                 if (base->flags & URL_FLAGS_HAS_PATH) {
1910                   url->flags |= URL_FLAGS_HAS_PATH;
1911                   url->path = base->path;
1912                 }
1913                 ShortenUrlPath(url);
1914               }
1915               state = kPath;
1916               continue;
1917           }
1918         } else {
1919           state = kPath;
1920           continue;
1921         }
1922         break;
1923       case kFileSlash:
1924         if (ch == '/' || ch == '\\') {
1925           state = kFileHost;
1926         } else {
1927           if (has_base &&
1928               base->scheme == "file:" &&
1929               !StartsWithWindowsDriveLetter(p, end)) {
1930             if (IsNormalizedWindowsDriveLetter(base->path[0])) {
1931               url->flags |= URL_FLAGS_HAS_PATH;
1932               url->path.push_back(base->path[0]);
1933             } else {
1934               if (base->flags & URL_FLAGS_HAS_HOST) {
1935                 url->flags |= URL_FLAGS_HAS_HOST;
1936                 url->host = base->host;
1937               } else {
1938                 url->flags &= ~URL_FLAGS_HAS_HOST;
1939                 url->host.clear();
1940               }
1941             }
1942           }
1943           state = kPath;
1944           continue;
1945         }
1946         break;
1947       case kFileHost:
1948         if (ch == kEOL ||
1949             ch == '/' ||
1950             ch == '\\' ||
1951             ch == '?' ||
1952             ch == '#') {
1953           if (!has_state_override &&
1954               buffer.size() == 2 &&
1955               IsWindowsDriveLetter(buffer)) {
1956             state = kPath;
1957           } else if (buffer.size() == 0) {
1958             url->flags |= URL_FLAGS_HAS_HOST;
1959             url->host.clear();
1960             if (has_state_override)
1961               return;
1962             state = kPathStart;
1963           } else {
1964             std::string host;
1965             if (!ParseHost(buffer, &host, special)) {
1966               url->flags |= URL_FLAGS_FAILED;
1967               return;
1968             }
1969             if (host == "localhost")
1970               host.clear();
1971             url->flags |= URL_FLAGS_HAS_HOST;
1972             url->host = host;
1973             if (has_state_override)
1974               return;
1975             buffer.clear();
1976             state = kPathStart;
1977           }
1978           continue;
1979         } else {
1980           buffer += ch;
1981         }
1982         break;
1983       case kPathStart:
1984         if (IsSpecial(url->scheme)) {
1985           state = kPath;
1986           if (ch != '/' && ch != '\\') {
1987             continue;
1988           }
1989         } else if (!has_state_override && ch == '?') {
1990           url->flags |= URL_FLAGS_HAS_QUERY;
1991           url->query.clear();
1992           state = kQuery;
1993         } else if (!has_state_override && ch == '#') {
1994           url->flags |= URL_FLAGS_HAS_FRAGMENT;
1995           url->fragment.clear();
1996           state = kFragment;
1997         } else if (ch != kEOL) {
1998           state = kPath;
1999           if (ch != '/') {
2000             continue;
2001           }
2002         }
2003         break;
2004       case kPath:
2005         if (ch == kEOL ||
2006             ch == '/' ||
2007             special_back_slash ||
2008             (!has_state_override && (ch == '?' || ch == '#'))) {
2009           if (IsDoubleDotSegment(buffer)) {
2010             ShortenUrlPath(url);
2011             if (ch != '/' && !special_back_slash) {
2012               url->flags |= URL_FLAGS_HAS_PATH;
2013               url->path.emplace_back("");
2014             }
2015           } else if (IsSingleDotSegment(buffer) &&
2016                      ch != '/' && !special_back_slash) {
2017             url->flags |= URL_FLAGS_HAS_PATH;
2018             url->path.emplace_back("");
2019           } else if (!IsSingleDotSegment(buffer)) {
2020             if (url->scheme == "file:" &&
2021                 url->path.empty() &&
2022                 buffer.size() == 2 &&
2023                 IsWindowsDriveLetter(buffer)) {
2024               if ((url->flags & URL_FLAGS_HAS_HOST) &&
2025                   !url->host.empty()) {
2026                 url->host.clear();
2027                 url->flags |= URL_FLAGS_HAS_HOST;
2028               }
2029               buffer[1] = ':';
2030             }
2031             url->flags |= URL_FLAGS_HAS_PATH;
2032             url->path.emplace_back(std::move(buffer));
2033           }
2034           buffer.clear();
2035           if (url->scheme == "file:" &&
2036               (ch == kEOL ||
2037                ch == '?' ||
2038                ch == '#')) {
2039             while (url->path.size() > 1 && url->path[0].length() == 0) {
2040               url->path.erase(url->path.begin());
2041             }
2042           }
2043           if (ch == '?') {
2044             url->flags |= URL_FLAGS_HAS_QUERY;
2045             state = kQuery;
2046           } else if (ch == '#') {
2047             state = kFragment;
2048           }
2049         } else {
2050           AppendOrEscape(&buffer, ch, PATH_ENCODE_SET);
2051         }
2052         break;
2053       case kCannotBeBase:
2054         switch (ch) {
2055           case '?':
2056             state = kQuery;
2057             break;
2058           case '#':
2059             state = kFragment;
2060             break;
2061           default:
2062             if (url->path.size() == 0)
2063               url->path.push_back("");
2064             if (url->path.size() > 0 && ch != kEOL)
2065               AppendOrEscape(&url->path[0], ch, C0_CONTROL_ENCODE_SET);
2066         }
2067         break;
2068       case kQuery:
2069         if (ch == kEOL || (!has_state_override && ch == '#')) {
2070           url->flags |= URL_FLAGS_HAS_QUERY;
2071           url->query = std::move(buffer);
2072           buffer.clear();
2073           if (ch == '#')
2074             state = kFragment;
2075         } else {
2076           AppendOrEscape(&buffer, ch, special ? QUERY_ENCODE_SET_SPECIAL :
2077                                                 QUERY_ENCODE_SET_NONSPECIAL);
2078         }
2079         break;
2080       case kFragment:
2081         switch (ch) {
2082           case kEOL:
2083             url->flags |= URL_FLAGS_HAS_FRAGMENT;
2084             url->fragment = std::move(buffer);
2085             break;
2086           case 0:
2087             break;
2088           default:
2089             AppendOrEscape(&buffer, ch, FRAGMENT_ENCODE_SET);
2090         }
2091         break;
2092       default:
2093         url->flags |= URL_FLAGS_INVALID_PARSE_STATE;
2094         return;
2095     }
2096 
2097     p++;
2098   }
2099 }  // NOLINT(readability/fn_size)
2100 
SetArgs(Environment * env,Local<Value> argv[ARG_COUNT],const struct url_data & url)2101 static inline void SetArgs(Environment* env,
2102                            Local<Value> argv[ARG_COUNT],
2103                            const struct url_data& url) {
2104   Isolate* isolate = env->isolate();
2105   argv[ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
2106   argv[ARG_PROTOCOL] = OneByteString(isolate, url.scheme.c_str());
2107   if (url.flags & URL_FLAGS_HAS_USERNAME)
2108     argv[ARG_USERNAME] = Utf8String(isolate, url.username);
2109   if (url.flags & URL_FLAGS_HAS_PASSWORD)
2110     argv[ARG_PASSWORD] = Utf8String(isolate, url.password);
2111   if (url.flags & URL_FLAGS_HAS_HOST)
2112     argv[ARG_HOST] = Utf8String(isolate, url.host);
2113   if (url.flags & URL_FLAGS_HAS_QUERY)
2114     argv[ARG_QUERY] = Utf8String(isolate, url.query);
2115   if (url.flags & URL_FLAGS_HAS_FRAGMENT)
2116     argv[ARG_FRAGMENT] = Utf8String(isolate, url.fragment);
2117   if (url.port > -1)
2118     argv[ARG_PORT] = Integer::New(isolate, url.port);
2119   if (url.flags & URL_FLAGS_HAS_PATH)
2120     argv[ARG_PATH] = ToJSStringArray(env, url.path);
2121 }
2122 
Parse(Environment * env,Local<Value> recv,const char * input,const size_t len,enum url_parse_state state_override,Local<Value> base_obj,Local<Value> context_obj,Local<Function> cb,Local<Value> error_cb)2123 static void Parse(Environment* env,
2124                   Local<Value> recv,
2125                   const char* input,
2126                   const size_t len,
2127                   enum url_parse_state state_override,
2128                   Local<Value> base_obj,
2129                   Local<Value> context_obj,
2130                   Local<Function> cb,
2131                   Local<Value> error_cb) {
2132   Isolate* isolate = env->isolate();
2133   Local<Context> context = env->context();
2134   HandleScope handle_scope(isolate);
2135   Context::Scope context_scope(context);
2136 
2137   const bool has_context = context_obj->IsObject();
2138   const bool has_base = base_obj->IsObject();
2139 
2140   url_data base;
2141   url_data url;
2142   if (has_context)
2143     url = HarvestContext(env, context_obj.As<Object>());
2144   if (has_base)
2145     base = HarvestBase(env, base_obj.As<Object>());
2146 
2147   URL::Parse(input, len, state_override, &url, has_context, &base, has_base);
2148   if ((url.flags & URL_FLAGS_INVALID_PARSE_STATE) ||
2149       ((state_override != kUnknownState) &&
2150        (url.flags & URL_FLAGS_TERMINATED)))
2151     return;
2152 
2153   // Define the return value placeholders
2154   const Local<Value> undef = Undefined(isolate);
2155   const Local<Value> null = Null(isolate);
2156   if (!(url.flags & URL_FLAGS_FAILED)) {
2157     Local<Value> argv[] = {
2158       undef,
2159       undef,
2160       undef,
2161       undef,
2162       null,  // host defaults to null
2163       null,  // port defaults to null
2164       undef,
2165       null,  // query defaults to null
2166       null,  // fragment defaults to null
2167     };
2168     SetArgs(env, argv, url);
2169     cb->Call(context, recv, arraysize(argv), argv).FromMaybe(Local<Value>());
2170   } else if (error_cb->IsFunction()) {
2171     Local<Value> argv[2] = { undef, undef };
2172     argv[ERR_ARG_FLAGS] = Integer::NewFromUnsigned(isolate, url.flags);
2173     argv[ERR_ARG_INPUT] =
2174       String::NewFromUtf8(env->isolate(),
2175                           input,
2176                           NewStringType::kNormal).ToLocalChecked();
2177     error_cb.As<Function>()->Call(context, recv, arraysize(argv), argv)
2178         .FromMaybe(Local<Value>());
2179   }
2180 }
2181 
Parse(const FunctionCallbackInfo<Value> & args)2182 static void Parse(const FunctionCallbackInfo<Value>& args) {
2183   Environment* env = Environment::GetCurrent(args);
2184   CHECK_GE(args.Length(), 5);
2185   CHECK(args[0]->IsString());  // input
2186   CHECK(args[2]->IsUndefined() ||  // base context
2187         args[2]->IsNull() ||
2188         args[2]->IsObject());
2189   CHECK(args[3]->IsUndefined() ||  // context
2190         args[3]->IsNull() ||
2191         args[3]->IsObject());
2192   CHECK(args[4]->IsFunction());  // complete callback
2193   CHECK(args[5]->IsUndefined() || args[5]->IsFunction());  // error callback
2194 
2195   Utf8Value input(env->isolate(), args[0]);
2196   enum url_parse_state state_override = kUnknownState;
2197   if (args[1]->IsNumber()) {
2198     state_override = static_cast<enum url_parse_state>(
2199         args[1]->Uint32Value(env->context()).FromJust());
2200   }
2201 
2202   Parse(env, args.This(),
2203         *input, input.length(),
2204         state_override,
2205         args[2],
2206         args[3],
2207         args[4].As<Function>(),
2208         args[5]);
2209 }
2210 
EncodeAuthSet(const FunctionCallbackInfo<Value> & args)2211 static void EncodeAuthSet(const FunctionCallbackInfo<Value>& args) {
2212   Environment* env = Environment::GetCurrent(args);
2213   CHECK_GE(args.Length(), 1);
2214   CHECK(args[0]->IsString());
2215   Utf8Value value(env->isolate(), args[0]);
2216   std::string output;
2217   const size_t len = value.length();
2218   output.reserve(len);
2219   for (size_t n = 0; n < len; n++) {
2220     const char ch = (*value)[n];
2221     AppendOrEscape(&output, ch, USERINFO_ENCODE_SET);
2222   }
2223   args.GetReturnValue().Set(
2224       String::NewFromUtf8(env->isolate(),
2225                           output.c_str(),
2226                           NewStringType::kNormal).ToLocalChecked());
2227 }
2228 
ToUSVString(const FunctionCallbackInfo<Value> & args)2229 static void ToUSVString(const FunctionCallbackInfo<Value>& args) {
2230   Environment* env = Environment::GetCurrent(args);
2231   CHECK_GE(args.Length(), 2);
2232   CHECK(args[0]->IsString());
2233   CHECK(args[1]->IsNumber());
2234 
2235   TwoByteValue value(env->isolate(), args[0]);
2236   const size_t n = value.length();
2237 
2238   const int64_t start = args[1]->IntegerValue(env->context()).FromJust();
2239   CHECK_GE(start, 0);
2240 
2241   for (size_t i = start; i < n; i++) {
2242     char16_t c = value[i];
2243     if (!IsUnicodeSurrogate(c)) {
2244       continue;
2245     } else if (IsUnicodeSurrogateTrail(c) || i == n - 1) {
2246       value[i] = kUnicodeReplacementCharacter;
2247     } else {
2248       char16_t d = value[i + 1];
2249       if (IsUnicodeTrail(d)) {
2250         i++;
2251       } else {
2252         value[i] = kUnicodeReplacementCharacter;
2253       }
2254     }
2255   }
2256 
2257   args.GetReturnValue().Set(
2258       String::NewFromTwoByte(env->isolate(),
2259                              *value,
2260                              NewStringType::kNormal,
2261                              n).ToLocalChecked());
2262 }
2263 
DomainToASCII(const FunctionCallbackInfo<Value> & args)2264 static void DomainToASCII(const FunctionCallbackInfo<Value>& args) {
2265   Environment* env = Environment::GetCurrent(args);
2266   CHECK_GE(args.Length(), 1);
2267   CHECK(args[0]->IsString());
2268   Utf8Value value(env->isolate(), args[0]);
2269 
2270   URLHost host;
2271   // Assuming the host is used for a special scheme.
2272   host.ParseHost(*value, value.length(), true);
2273   if (host.ParsingFailed()) {
2274     args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
2275     return;
2276   }
2277   std::string out = host.ToStringMove();
2278   args.GetReturnValue().Set(
2279       String::NewFromUtf8(env->isolate(),
2280                           out.c_str(),
2281                           NewStringType::kNormal).ToLocalChecked());
2282 }
2283 
DomainToUnicode(const FunctionCallbackInfo<Value> & args)2284 static void DomainToUnicode(const FunctionCallbackInfo<Value>& args) {
2285   Environment* env = Environment::GetCurrent(args);
2286   CHECK_GE(args.Length(), 1);
2287   CHECK(args[0]->IsString());
2288   Utf8Value value(env->isolate(), args[0]);
2289 
2290   URLHost host;
2291   // Assuming the host is used for a special scheme.
2292   host.ParseHost(*value, value.length(), true, true);
2293   if (host.ParsingFailed()) {
2294     args.GetReturnValue().Set(FIXED_ONE_BYTE_STRING(env->isolate(), ""));
2295     return;
2296   }
2297   std::string out = host.ToStringMove();
2298   args.GetReturnValue().Set(
2299       String::NewFromUtf8(env->isolate(),
2300                           out.c_str(),
2301                           NewStringType::kNormal).ToLocalChecked());
2302 }
2303 
ToFilePath() const2304 std::string URL::ToFilePath() const {
2305   if (context_.scheme != "file:") {
2306     return "";
2307   }
2308 
2309 #ifdef _WIN32
2310   const char* slash = "\\";
2311   auto is_slash = [] (char ch) {
2312     return ch == '/' || ch == '\\';
2313   };
2314 #else
2315   const char* slash = "/";
2316   auto is_slash = [] (char ch) {
2317     return ch == '/';
2318   };
2319   if ((context_.flags & URL_FLAGS_HAS_HOST) &&
2320       context_.host.length() > 0) {
2321     return "";
2322   }
2323 #endif
2324   std::string decoded_path;
2325   for (const std::string& part : context_.path) {
2326     std::string decoded = PercentDecode(part.c_str(), part.length());
2327     for (char& ch : decoded) {
2328       if (is_slash(ch)) {
2329         return "";
2330       }
2331     }
2332     decoded_path += slash + decoded;
2333   }
2334 
2335 #ifdef _WIN32
2336   // TODO(TimothyGu): Use "\\?\" long paths on Windows.
2337 
2338   // If hostname is set, then we have a UNC path. Pass the hostname through
2339   // ToUnicode just in case it is an IDN using punycode encoding. We do not
2340   // need to worry about percent encoding because the URL parser will have
2341   // already taken care of that for us. Note that this only causes IDNs with an
2342   // appropriate `xn--` prefix to be decoded.
2343   if ((context_.flags & URL_FLAGS_HAS_HOST) &&
2344       context_.host.length() > 0) {
2345     std::string unicode_host;
2346     if (!ToUnicode(context_.host, &unicode_host)) {
2347       return "";
2348     }
2349     return "\\\\" + unicode_host + decoded_path;
2350   }
2351   // Otherwise, it's a local path that requires a drive letter.
2352   if (decoded_path.length() < 3) {
2353     return "";
2354   }
2355   if (decoded_path[2] != ':' ||
2356       !IsASCIIAlpha(decoded_path[1])) {
2357     return "";
2358   }
2359   // Strip out the leading '\'.
2360   return decoded_path.substr(1);
2361 #else
2362   return decoded_path;
2363 #endif
2364 }
2365 
FromFilePath(const std::string & file_path)2366 URL URL::FromFilePath(const std::string& file_path) {
2367   URL url("file://");
2368   std::string escaped_file_path;
2369   for (size_t i = 0; i < file_path.length(); ++i) {
2370     escaped_file_path += file_path[i];
2371     if (file_path[i] == '%')
2372       escaped_file_path += "25";
2373   }
2374   URL::Parse(escaped_file_path.c_str(), escaped_file_path.length(), kPathStart,
2375              &url.context_, true, nullptr, false);
2376   return url;
2377 }
2378 
2379 // This function works by calling out to a JS function that creates and
2380 // returns the JS URL object. Be mindful of the JS<->Native boundary
2381 // crossing that is required.
ToObject(Environment * env) const2382 const Local<Value> URL::ToObject(Environment* env) const {
2383   Isolate* isolate = env->isolate();
2384   Local<Context> context = env->context();
2385   Context::Scope context_scope(context);
2386 
2387   const Local<Value> undef = Undefined(isolate);
2388   const Local<Value> null = Null(isolate);
2389 
2390   if (context_.flags & URL_FLAGS_FAILED)
2391     return Local<Value>();
2392 
2393   Local<Value> argv[] = {
2394     undef,
2395     undef,
2396     undef,
2397     undef,
2398     null,  // host defaults to null
2399     null,  // port defaults to null
2400     undef,
2401     null,  // query defaults to null
2402     null,  // fragment defaults to null
2403   };
2404   SetArgs(env, argv, context_);
2405 
2406   MaybeLocal<Value> ret;
2407   {
2408     FatalTryCatch try_catch(env);
2409 
2410     // The SetURLConstructor method must have been called already to
2411     // set the constructor function used below. SetURLConstructor is
2412     // called automatically when the internal/url.js module is loaded
2413     // during the internal/bootstrap/node.js processing.
2414     ret = env->url_constructor_function()
2415         ->Call(env->context(), undef, arraysize(argv), argv);
2416   }
2417 
2418   return ret.ToLocalChecked();
2419 }
2420 
SetURLConstructor(const FunctionCallbackInfo<Value> & args)2421 static void SetURLConstructor(const FunctionCallbackInfo<Value>& args) {
2422   Environment* env = Environment::GetCurrent(args);
2423   CHECK_EQ(args.Length(), 1);
2424   CHECK(args[0]->IsFunction());
2425   env->set_url_constructor_function(args[0].As<Function>());
2426 }
2427 
Initialize(Local<Object> target,Local<Value> unused,Local<Context> context,void * priv)2428 static void Initialize(Local<Object> target,
2429                        Local<Value> unused,
2430                        Local<Context> context,
2431                        void* priv) {
2432   Environment* env = Environment::GetCurrent(context);
2433   env->SetMethod(target, "parse", Parse);
2434   env->SetMethodNoSideEffect(target, "encodeAuth", EncodeAuthSet);
2435   env->SetMethodNoSideEffect(target, "toUSVString", ToUSVString);
2436   env->SetMethodNoSideEffect(target, "domainToASCII", DomainToASCII);
2437   env->SetMethodNoSideEffect(target, "domainToUnicode", DomainToUnicode);
2438   env->SetMethod(target, "setURLConstructor", SetURLConstructor);
2439 
2440 #define XX(name, _) NODE_DEFINE_CONSTANT(target, name);
2441   FLAGS(XX)
2442 #undef XX
2443 
2444 #define XX(name) NODE_DEFINE_CONSTANT(target, name);
2445   PARSESTATES(XX)
2446 #undef XX
2447 }
2448 }  // namespace url
2449 }  // namespace node
2450 
2451 NODE_BUILTIN_MODULE_CONTEXT_AWARE(url, node::url::Initialize)
2452