1 // Licensed to the .NET Foundation under one or more agreements. 2 // The .NET Foundation licenses this file to you under the MIT license. 3 // See the LICENSE file in the project root for more information. 4 5 using System.Diagnostics; 6 using System.Diagnostics.Contracts; 7 using System.Globalization; 8 using System.Text; 9 10 namespace System.Net.Http 11 { 12 internal static class HttpRuleParser 13 { 14 private static readonly bool[] s_tokenChars = CreateTokenChars(); 15 private const int maxNestedCount = 5; 16 private static readonly string[] s_dateFormats = new string[] { 17 // "r", // RFC 1123, required output format but too strict for input 18 "ddd, d MMM yyyy H:m:s 'GMT'", // RFC 1123 (r, except it allows both 1 and 01 for date and time) 19 "ddd, d MMM yyyy H:m:s", // RFC 1123, no zone - assume GMT 20 "d MMM yyyy H:m:s 'GMT'", // RFC 1123, no day-of-week 21 "d MMM yyyy H:m:s", // RFC 1123, no day-of-week, no zone 22 "ddd, d MMM yy H:m:s 'GMT'", // RFC 1123, short year 23 "ddd, d MMM yy H:m:s", // RFC 1123, short year, no zone 24 "d MMM yy H:m:s 'GMT'", // RFC 1123, no day-of-week, short year 25 "d MMM yy H:m:s", // RFC 1123, no day-of-week, short year, no zone 26 27 "dddd, d'-'MMM'-'yy H:m:s 'GMT'", // RFC 850 28 "dddd, d'-'MMM'-'yy H:m:s", // RFC 850 no zone 29 "ddd MMM d H:m:s yyyy", // ANSI C's asctime() format 30 31 "ddd, d MMM yyyy H:m:s zzz", // RFC 5322 32 "ddd, d MMM yyyy H:m:s", // RFC 5322 no zone 33 "d MMM yyyy H:m:s zzz", // RFC 5322 no day-of-week 34 "d MMM yyyy H:m:s", // RFC 5322 no day-of-week, no zone 35 }; 36 37 internal const char CR = (char)13; 38 internal const char LF = (char)10; 39 internal const int MaxInt64Digits = 19; 40 internal const int MaxInt32Digits = 10; 41 42 // iso-8859-1, Western European (ISO) 43 #if uap 44 internal static readonly Encoding DefaultHttpEncoding = Encoding.GetEncoding("iso-8859-1"); 45 #else 46 internal static readonly Encoding DefaultHttpEncoding = Encoding.GetEncoding(28591); 47 #endif 48 CreateTokenChars()49 private static bool[] CreateTokenChars() 50 { 51 // token = 1*<any CHAR except CTLs or separators> 52 // CTL = <any US-ASCII control character (octets 0 - 31) and DEL (127)> 53 54 var tokenChars = new bool[128]; // All elements default to "false". 55 56 for (int i = 33; i < 127; i++) // Skip Space (32) & DEL (127). 57 { 58 tokenChars[i] = true; 59 } 60 61 // Remove separators: these are not valid token characters. 62 tokenChars[(byte)'('] = false; 63 tokenChars[(byte)')'] = false; 64 tokenChars[(byte)'<'] = false; 65 tokenChars[(byte)'>'] = false; 66 tokenChars[(byte)'@'] = false; 67 tokenChars[(byte)','] = false; 68 tokenChars[(byte)';'] = false; 69 tokenChars[(byte)':'] = false; 70 tokenChars[(byte)'\\'] = false; 71 tokenChars[(byte)'"'] = false; 72 tokenChars[(byte)'/'] = false; 73 tokenChars[(byte)'['] = false; 74 tokenChars[(byte)']'] = false; 75 tokenChars[(byte)'?'] = false; 76 tokenChars[(byte)'='] = false; 77 tokenChars[(byte)'{'] = false; 78 tokenChars[(byte)'}'] = false; 79 80 return tokenChars; 81 } 82 IsTokenChar(char character)83 internal static bool IsTokenChar(char character) 84 { 85 // Must be between 'space' (32) and 'DEL' (127). 86 if (character > 127) 87 { 88 return false; 89 } 90 91 return s_tokenChars[character]; 92 } 93 94 [Pure] GetTokenLength(string input, int startIndex)95 internal static int GetTokenLength(string input, int startIndex) 96 { 97 Debug.Assert(input != null); 98 Contract.Ensures((Contract.Result<int>() >= 0) && (Contract.Result<int>() <= (input.Length - startIndex))); 99 100 if (startIndex >= input.Length) 101 { 102 return 0; 103 } 104 105 int current = startIndex; 106 107 while (current < input.Length) 108 { 109 if (!IsTokenChar(input[current])) 110 { 111 return current - startIndex; 112 } 113 current++; 114 } 115 return input.Length - startIndex; 116 } 117 118 [Pure] IsToken(string input)119 internal static bool IsToken(string input) 120 { 121 for (int i = 0; i < input.Length; i++) 122 { 123 if (!IsTokenChar(input[i])) 124 { 125 return false; 126 } 127 } 128 129 return true; 130 } 131 132 [Pure] IsToken(ReadOnlySpan<byte> input)133 internal static bool IsToken(ReadOnlySpan<byte> input) 134 { 135 for (int i = 0; i < input.Length; i++) 136 { 137 if (!IsTokenChar((char) input[i])) 138 { 139 return false; 140 } 141 } 142 143 return true; 144 } 145 GetWhitespaceLength(string input, int startIndex)146 internal static int GetWhitespaceLength(string input, int startIndex) 147 { 148 Debug.Assert(input != null); 149 Contract.Ensures((Contract.Result<int>() >= 0) && (Contract.Result<int>() <= (input.Length - startIndex))); 150 151 if (startIndex >= input.Length) 152 { 153 return 0; 154 } 155 156 int current = startIndex; 157 158 char c; 159 while (current < input.Length) 160 { 161 c = input[current]; 162 163 if ((c == ' ') || (c == '\t')) 164 { 165 current++; 166 continue; 167 } 168 169 if (c == '\r') 170 { 171 // If we have a #13 char, it must be followed by #10 and then at least one SP or HT. 172 if ((current + 2 < input.Length) && (input[current + 1] == '\n')) 173 { 174 char spaceOrTab = input[current + 2]; 175 if ((spaceOrTab == ' ') || (spaceOrTab == '\t')) 176 { 177 current += 3; 178 continue; 179 } 180 } 181 } 182 183 return current - startIndex; 184 } 185 186 // All characters between startIndex and the end of the string are LWS characters. 187 return input.Length - startIndex; 188 } 189 ContainsInvalidNewLine(string value)190 internal static bool ContainsInvalidNewLine(string value) 191 { 192 return ContainsInvalidNewLine(value, 0); 193 } 194 ContainsInvalidNewLine(string value, int startIndex)195 internal static bool ContainsInvalidNewLine(string value, int startIndex) 196 { 197 // Search for newlines followed by non-whitespace: This is not allowed in any header (be it a known or 198 // custom header). E.g. "value\r\nbadformat: header" is invalid. However "value\r\n goodformat: header" 199 // is valid: newlines followed by whitespace are allowed in header values. 200 int current = startIndex; 201 while (current < value.Length) 202 { 203 if (value[current] == '\r') 204 { 205 int char10Index = current + 1; 206 if ((char10Index < value.Length) && (value[char10Index] == '\n')) 207 { 208 current = char10Index + 1; 209 210 if (current == value.Length) 211 { 212 return true; // We have a string terminating with \r\n. This is invalid. 213 } 214 215 char c = value[current]; 216 if ((c != ' ') && (c != '\t')) 217 { 218 return true; 219 } 220 } 221 } 222 current++; 223 } 224 225 return false; 226 } 227 GetNumberLength(string input, int startIndex, bool allowDecimal)228 internal static int GetNumberLength(string input, int startIndex, bool allowDecimal) 229 { 230 Debug.Assert(input != null); 231 Debug.Assert((startIndex >= 0) && (startIndex < input.Length)); 232 Contract.Ensures((Contract.Result<int>() >= 0) && (Contract.Result<int>() <= (input.Length - startIndex))); 233 234 int current = startIndex; 235 char c; 236 237 // If decimal values are not allowed, we pretend to have read the '.' character already. I.e. if a dot is 238 // found in the string, parsing will be aborted. 239 bool haveDot = !allowDecimal; 240 241 // The RFC doesn't allow decimal values starting with dot. I.e. value ".123" is invalid. It must be in the 242 // form "0.123". Also, there are no negative values defined in the RFC. So we'll just parse non-negative 243 // values. 244 // The RFC only allows decimal dots not ',' characters as decimal separators. Therefore value "1,23" is 245 // considered invalid and must be represented as "1.23". 246 if (input[current] == '.') 247 { 248 return 0; 249 } 250 251 while (current < input.Length) 252 { 253 c = input[current]; 254 if ((c >= '0') && (c <= '9')) 255 { 256 current++; 257 } 258 else if (!haveDot && (c == '.')) 259 { 260 // Note that value "1." is valid. 261 haveDot = true; 262 current++; 263 } 264 else 265 { 266 break; 267 } 268 } 269 270 return current - startIndex; 271 } 272 GetHostLength(string input, int startIndex, bool allowToken, out string host)273 internal static int GetHostLength(string input, int startIndex, bool allowToken, out string host) 274 { 275 Debug.Assert(input != null); 276 Debug.Assert(startIndex >= 0); 277 Contract.Ensures((Contract.Result<int>() >= 0) && (Contract.Result<int>() <= (input.Length - startIndex))); 278 279 host = null; 280 if (startIndex >= input.Length) 281 { 282 return 0; 283 } 284 285 // A 'host' is either a token (if 'allowToken' == true) or a valid host name as defined by the URI RFC. 286 // So we first iterate through the string and search for path delimiters and whitespace. When found, stop 287 // and try to use the substring as token or URI host name. If it works, we have a host name, otherwise not. 288 int current = startIndex; 289 bool isToken = true; 290 while (current < input.Length) 291 { 292 char c = input[current]; 293 if (c == '/') 294 { 295 return 0; // Host header must not contain paths. 296 } 297 298 if ((c == ' ') || (c == '\t') || (c == '\r') || (c == ',')) 299 { 300 break; // We hit a delimiter (',' or whitespace). Stop here. 301 } 302 303 isToken = isToken && IsTokenChar(c); 304 305 current++; 306 } 307 308 int length = current - startIndex; 309 if (length == 0) 310 { 311 return 0; 312 } 313 314 string result = input.Substring(startIndex, length); 315 if ((!allowToken || !isToken) && !IsValidHostName(result)) 316 { 317 return 0; 318 } 319 320 host = result; 321 return length; 322 } 323 GetCommentLength(string input, int startIndex, out int length)324 internal static HttpParseResult GetCommentLength(string input, int startIndex, out int length) 325 { 326 int nestedCount = 0; 327 return GetExpressionLength(input, startIndex, '(', ')', true, ref nestedCount, out length); 328 } 329 GetQuotedStringLength(string input, int startIndex, out int length)330 internal static HttpParseResult GetQuotedStringLength(string input, int startIndex, out int length) 331 { 332 int nestedCount = 0; 333 return GetExpressionLength(input, startIndex, '"', '"', false, ref nestedCount, out length); 334 } 335 336 // quoted-pair = "\" CHAR 337 // CHAR = <any US-ASCII character (octets 0 - 127)> GetQuotedPairLength(string input, int startIndex, out int length)338 internal static HttpParseResult GetQuotedPairLength(string input, int startIndex, out int length) 339 { 340 Debug.Assert(input != null); 341 Debug.Assert((startIndex >= 0) && (startIndex < input.Length)); 342 Contract.Ensures((Contract.ValueAtReturn(out length) >= 0) && 343 (Contract.ValueAtReturn(out length) <= (input.Length - startIndex))); 344 345 length = 0; 346 347 if (input[startIndex] != '\\') 348 { 349 return HttpParseResult.NotParsed; 350 } 351 352 // Quoted-char has 2 characters. Check whether there are 2 chars left ('\' + char) 353 // If so, check whether the character is in the range 0-127. If not, it's an invalid value. 354 if ((startIndex + 2 > input.Length) || (input[startIndex + 1] > 127)) 355 { 356 return HttpParseResult.InvalidFormat; 357 } 358 359 // It doesn't matter what the char next to '\' is so we can skip along. 360 length = 2; 361 return HttpParseResult.Parsed; 362 } 363 DateToString(DateTimeOffset dateTime)364 internal static string DateToString(DateTimeOffset dateTime) 365 { 366 // Format according to RFC1123; 'r' uses invariant info (DateTimeFormatInfo.InvariantInfo). 367 return dateTime.ToUniversalTime().ToString("r", CultureInfo.InvariantCulture); 368 } 369 TryStringToDate(string input, out DateTimeOffset result)370 internal static bool TryStringToDate(string input, out DateTimeOffset result) 371 { 372 // Try the various date formats in the order listed above. 373 // We should accept a wide variety of common formats, but only output RFC 1123 style dates. 374 if (DateTimeOffset.TryParseExact(input, s_dateFormats, DateTimeFormatInfo.InvariantInfo, 375 DateTimeStyles.AllowWhiteSpaces | DateTimeStyles.AssumeUniversal, out result)) 376 { 377 return true; 378 } 379 380 return false; 381 } 382 383 // TEXT = <any OCTET except CTLs, but including LWS> 384 // LWS = [CRLF] 1*( SP | HT ) 385 // CTL = <any US-ASCII control character (octets 0 - 31) and DEL (127)> 386 // 387 // Since we don't really care about the content of a quoted string or comment, we're more tolerant and 388 // allow these characters. We only want to find the delimiters ('"' for quoted string and '(', ')' for comment). 389 // 390 // 'nestedCount': Comments can be nested. We allow a depth of up to 5 nested comments, i.e. something like 391 // "(((((comment)))))". If we wouldn't define a limit an attacker could send a comment with hundreds of nested 392 // comments, resulting in a stack overflow exception. In addition having more than 1 nested comment (if any) 393 // is unusual. GetExpressionLength(string input, int startIndex, char openChar, char closeChar, bool supportsNesting, ref int nestedCount, out int length)394 private static HttpParseResult GetExpressionLength(string input, int startIndex, char openChar, 395 char closeChar, bool supportsNesting, ref int nestedCount, out int length) 396 { 397 Debug.Assert(input != null); 398 Debug.Assert((startIndex >= 0) && (startIndex < input.Length)); 399 Contract.Ensures((Contract.Result<HttpParseResult>() != HttpParseResult.Parsed) || 400 (Contract.ValueAtReturn<int>(out length) > 0)); 401 402 length = 0; 403 404 if (input[startIndex] != openChar) 405 { 406 return HttpParseResult.NotParsed; 407 } 408 409 int current = startIndex + 1; // Start parsing with the character next to the first open-char. 410 while (current < input.Length) 411 { 412 // Only check whether we have a quoted char, if we have at least 3 characters left to read (i.e. 413 // quoted char + closing char). Otherwise the closing char may be considered part of the quoted char. 414 int quotedPairLength = 0; 415 if ((current + 2 < input.Length) && 416 (GetQuotedPairLength(input, current, out quotedPairLength) == HttpParseResult.Parsed)) 417 { 418 // We ignore invalid quoted-pairs. Invalid quoted-pairs may mean that it looked like a quoted pair, 419 // but we actually have a quoted-string: e.g. "\ü" ('\' followed by a char >127 - quoted-pair only 420 // allows ASCII chars after '\'; qdtext allows both '\' and >127 chars). 421 current = current + quotedPairLength; 422 continue; 423 } 424 425 // If we support nested expressions and we find an open-char, then parse the nested expressions. 426 if (supportsNesting && (input[current] == openChar)) 427 { 428 nestedCount++; 429 try 430 { 431 // Check if we exceeded the number of nested calls. 432 if (nestedCount > maxNestedCount) 433 { 434 return HttpParseResult.InvalidFormat; 435 } 436 437 int nestedLength = 0; 438 HttpParseResult nestedResult = GetExpressionLength(input, current, openChar, closeChar, 439 supportsNesting, ref nestedCount, out nestedLength); 440 441 switch (nestedResult) 442 { 443 case HttpParseResult.Parsed: 444 current += nestedLength; // Add the length of the nested expression and continue. 445 break; 446 447 case HttpParseResult.NotParsed: 448 Debug.Assert(false, "'NotParsed' is unexpected: We started nested expression " + 449 "parsing, because we found the open-char. So either it's a valid nested " + 450 "expression or it has invalid format."); 451 break; 452 453 case HttpParseResult.InvalidFormat: 454 // If the nested expression is invalid, we can't continue, so we fail with invalid format. 455 return HttpParseResult.InvalidFormat; 456 457 default: 458 Debug.Assert(false, "Unknown enum result: " + nestedResult); 459 break; 460 } 461 } 462 finally 463 { 464 nestedCount--; 465 } 466 } 467 468 if (input[current] == closeChar) 469 { 470 length = current - startIndex + 1; 471 return HttpParseResult.Parsed; 472 } 473 current++; 474 } 475 476 // We didn't find the final quote, therefore we have an invalid expression string. 477 return HttpParseResult.InvalidFormat; 478 } 479 IsValidHostName(string host)480 private static bool IsValidHostName(string host) 481 { 482 // Also add user info (u@) to make sure 'host' doesn't include user info. 483 Uri hostUri; 484 return Uri.TryCreate("http://u@" + host + "/", UriKind.Absolute, out hostUri); 485 } 486 } 487 } 488