1 //------------------------------------------------------------------------------ 2 // <copyright file="HtmlParameterEncoder.cs" company="Microsoft"> 3 // Copyright (c) Microsoft Corporation. All rights reserved. 4 // </copyright> 5 //------------------------------------------------------------------------------ 6 7 namespace System.Web.Security.AntiXss { 8 using System; 9 using System.Collections; 10 using System.Text; 11 using System.Threading; 12 13 /// <summary> 14 /// The type of space encoding to use. 15 /// </summary> 16 internal enum EncodingType { 17 /// <summary> 18 /// Encode spaces for use in query strings 19 /// </summary> 20 QueryString = 1, 21 22 /// <summary> 23 /// Encode spaces for use in form data 24 /// </summary> 25 HtmlForm = 2 26 } 27 28 /// <summary> 29 /// Provides Html Parameter Encoding methods. 30 /// </summary> 31 internal static class HtmlParameterEncoder { 32 33 /// <summary> 34 /// The value to use when encoding a space for query strings. 35 /// </summary> 36 private static readonly char[] QueryStringSpace = "%20".ToCharArray(); 37 38 /// <summary> 39 /// The value to use when encoding a space for form data. 40 /// </summary> 41 private static readonly char[] FormStringSpace = "+".ToCharArray(); 42 43 /// <summary> 44 /// The values to output for each character. 45 /// </summary> 46 private static Lazy<char[][]> characterValuesLazy = new Lazy<char[][]>(InitialiseSafeList); 47 48 /// <summary> 49 /// Encodes a string for query string encoding and returns the encoded string. 50 /// </summary> 51 /// <param name="s">The text to URL-encode.</param> 52 /// <param name="encoding">The encoding for the text parameter.</param> 53 /// <returns>The URL-encoded text.</returns> 54 /// <remarks>URL encoding ensures that all browsers will correctly transmit text in URL strings. 55 /// Characters such as a question mark (?), ampersand (&), slash mark (/), and spaces might be truncated or corrupted by some browsers. 56 /// As a result, these characters must be encoded in <a> tags or in query strings where the strings can be re-sent by a browser 57 /// in a request string.</remarks> 58 /// <exception cref="ArgumentNullException">Thrown if the encoding is null.</exception> QueryStringParameterEncode(string s, Encoding encoding)59 internal static string QueryStringParameterEncode(string s, Encoding encoding) { 60 return FormQueryEncode(s, encoding, EncodingType.QueryString); 61 } 62 63 /// <summary> 64 /// Encodes a string for form URL encoding and returns the encoded string. 65 /// </summary> 66 /// <param name="s">The text to URL-encode.</param> 67 /// <param name="encoding">The encoding for the text parameter.</param> 68 /// <returns>The URL-encoded text.</returns> 69 /// <remarks>URL encoding ensures that all browsers will correctly transmit text in URL strings. 70 /// Characters such as a question mark (?), ampersand (&), slash mark (/), and spaces might be truncated or corrupted by some browsers. 71 /// As a result, these characters must be encoded in <a> tags or in query strings where the strings can be re-sent by a browser 72 /// in a request string.</remarks> 73 /// <exception cref="ArgumentNullException">Thrown if the encoding is null.</exception> FormStringParameterEncode(string s, Encoding encoding)74 internal static string FormStringParameterEncode(string s, Encoding encoding) { 75 return FormQueryEncode(s, encoding, EncodingType.HtmlForm); 76 } 77 78 /// <summary> 79 /// Encodes a string for Query String or Form Data encoding. 80 /// </summary> 81 /// <param name="s">The text to URL-encode.</param> 82 /// <param name="encoding">The encoding for the text parameter.</param> 83 /// <param name="encodingType">The encoding type to use.</param> 84 /// <returns>The encoded text.</returns> FormQueryEncode(string s, Encoding encoding, EncodingType encodingType)85 private static string FormQueryEncode(string s, Encoding encoding, EncodingType encodingType) { 86 return FormQueryEncode(s, encoding, encodingType, characterValuesLazy); 87 } 88 FormQueryEncode(string s, Encoding encoding, EncodingType encodingType, Lazy<char[][]> characterValuesLazy)89 private static string FormQueryEncode(string s, Encoding encoding, EncodingType encodingType, Lazy<char[][]> characterValuesLazy) { 90 if (string.IsNullOrEmpty(s)) { 91 return s; 92 } 93 94 if (encoding == null) { 95 throw new ArgumentNullException("encoding"); 96 } 97 98 var characterValues = characterValuesLazy.Value; 99 100 // RFC 3986 states strings must be converted to their UTF8 value before URL encoding. 101 // See http://tools.ietf.org/html/rfc3986 102 // Conversion to char[] keeps null characters inline. 103 byte[] utf8Bytes = encoding.GetBytes(s.ToCharArray()); 104 char[] encodedInput = new char[utf8Bytes.Length * 3]; // Each byte can potentially be encoded as %xx 105 int outputLength = 0; 106 107 for (int characterPosition = 0; characterPosition < utf8Bytes.Length; characterPosition++) { 108 byte currentCharacter = utf8Bytes[characterPosition]; 109 110 if (currentCharacter == 0x00 || currentCharacter == 0x20 || currentCharacter > characterValues.Length || characterValues[currentCharacter] != null) { 111 // character needs to be encoded 112 char[] encodedCharacter; 113 114 if (currentCharacter == 0x20) { 115 switch (encodingType) { 116 case EncodingType.QueryString: 117 encodedCharacter = QueryStringSpace; 118 break; 119 120 // Special case for Html Form data, from http://www.w3.org/TR/html401/appendix/notes.html#non-ascii-chars 121 case EncodingType.HtmlForm: 122 encodedCharacter = FormStringSpace; 123 break; 124 125 default: 126 throw new ArgumentOutOfRangeException("encodingType"); 127 } 128 } 129 else { 130 encodedCharacter = characterValues[currentCharacter]; 131 } 132 133 for (int j = 0; j < encodedCharacter.Length; j++) { 134 encodedInput[outputLength++] = encodedCharacter[j]; 135 } 136 } 137 else { 138 // character does not need encoding 139 encodedInput[outputLength++] = (char)currentCharacter; 140 } 141 } 142 143 return new string(encodedInput, 0, outputLength); 144 } 145 146 /// <summary> 147 /// Initializes the HTML safe list. 148 /// </summary> InitialiseSafeList()149 private static char[][] InitialiseSafeList() { 150 char[][] result = SafeList.Generate(255, SafeList.PercentThenHexValueGenerator); 151 SafeList.PunchSafeList(ref result, UrlParameterSafeList()); 152 return result; 153 } 154 155 /// <summary> 156 /// Provides the safe characters for URL parameter encoding. 157 /// </summary> 158 /// <returns>The safe characters for URL parameter encoding.</returns> UrlParameterSafeList()159 private static IEnumerable UrlParameterSafeList() { 160 // Hyphen 161 yield return 0x2D; 162 163 // Full stop/period 164 yield return 0x2E; 165 166 // Digits 167 for (int i = 0x30; i <= 0x39; i++) { 168 yield return i; 169 } 170 171 // Upper case alphabet 172 for (int i = 0x41; i <= 0x5A; i++) { 173 yield return i; 174 } 175 176 // Underscore 177 yield return 0x5F; 178 179 // Lower case alphabet 180 for (int i = 0x61; i <= 0x7A; i++) { 181 yield return i; 182 } 183 184 // Tilde 185 yield return 0x7E; 186 } 187 188 #region UrlPathEncode Helpers 189 190 /// <summary> 191 /// The values to output for each character. 192 /// </summary> 193 private static Lazy<char[][]> pathCharacterValuesLazy = new Lazy<char[][]>(InitialisePathSafeList); 194 UrlPathEncode(string s, Encoding encoding)195 internal static string UrlPathEncode(string s, Encoding encoding) { 196 return FormQueryEncode(s, encoding, EncodingType.QueryString, pathCharacterValuesLazy); 197 } 198 199 /// <summary> 200 /// Initializes the HTML safe list. 201 /// </summary> InitialisePathSafeList()202 private static char[][] InitialisePathSafeList() { 203 char[][] result = SafeList.Generate(255, SafeList.PercentThenHexValueGenerator); 204 SafeList.PunchSafeList(ref result, UrlPathSafeList()); 205 return result; 206 } 207 208 /// <summary> 209 /// Provides the safe characters for URL path encoding. 210 /// </summary> 211 /// <returns>The safe characters for URL path encoding.</returns> UrlPathSafeList()212 private static IEnumerable UrlPathSafeList() { 213 214 foreach (var c in UrlParameterSafeList()) { 215 yield return c; 216 } 217 218 // Hash 219 yield return 0x23; 220 221 // Percent 222 yield return 0x25; 223 224 // Forward slash 225 yield return 0x2F; 226 227 // Backwards slash 228 yield return 0x5C; 229 230 // Left parenthesis 231 yield return 0x28; 232 233 //Right parenthesis 234 yield return 0x29; 235 } 236 237 #endregion 238 } 239 } 240