1 //------------------------------------------------------------------------------
2 // <copyright file="HtmlParameterEncoder.cs" company="Microsoft">
3 //     Copyright (c) Microsoft Corporation.  All rights reserved.
4 // </copyright>
5 //------------------------------------------------------------------------------
6 
7 namespace System.Web.Security.AntiXss {
8     using System;
9     using System.Collections;
10     using System.Text;
11     using System.Threading;
12 
13     /// <summary>
14     /// The type of space encoding to use.
15     /// </summary>
16     internal enum EncodingType {
17         /// <summary>
18         /// Encode spaces for use in query strings
19         /// </summary>
20         QueryString = 1,
21 
22         /// <summary>
23         /// Encode spaces for use in form data
24         /// </summary>
25         HtmlForm = 2
26     }
27 
28     /// <summary>
29     /// Provides Html Parameter Encoding methods.
30     /// </summary>
31     internal static class HtmlParameterEncoder {
32 
33         /// <summary>
34         /// The value to use when encoding a space for query strings.
35         /// </summary>
36         private static readonly char[] QueryStringSpace = "%20".ToCharArray();
37 
38         /// <summary>
39         /// The value to use when encoding a space for form data.
40         /// </summary>
41         private static readonly char[] FormStringSpace = "+".ToCharArray();
42 
43         /// <summary>
44         /// The values to output for each character.
45         /// </summary>
46         private static Lazy<char[][]> characterValuesLazy = new Lazy<char[][]>(InitialiseSafeList);
47 
48         /// <summary>
49         /// Encodes a string for query string encoding and returns the encoded string.
50         /// </summary>
51         /// <param name="s">The text to URL-encode.</param>
52         /// <param name="encoding">The encoding for the text parameter.</param>
53         /// <returns>The URL-encoded text.</returns>
54         /// <remarks>URL encoding ensures that all browsers will correctly transmit text in URL strings.
55         /// Characters such as a question mark (?), ampersand (&amp;), slash mark (/), and spaces might be truncated or corrupted by some browsers.
56         /// As a result, these characters must be encoded in &lt;a&gt; tags or in query strings where the strings can be re-sent by a browser
57         /// in a request string.</remarks>
58         /// <exception cref="ArgumentNullException">Thrown if the encoding is null.</exception>
QueryStringParameterEncode(string s, Encoding encoding)59         internal static string QueryStringParameterEncode(string s, Encoding encoding) {
60             return FormQueryEncode(s, encoding, EncodingType.QueryString);
61         }
62 
63         /// <summary>
64         /// Encodes a string for form URL encoding and returns the encoded string.
65         /// </summary>
66         /// <param name="s">The text to URL-encode.</param>
67         /// <param name="encoding">The encoding for the text parameter.</param>
68         /// <returns>The URL-encoded text.</returns>
69         /// <remarks>URL encoding ensures that all browsers will correctly transmit text in URL strings.
70         /// Characters such as a question mark (?), ampersand (&amp;), slash mark (/), and spaces might be truncated or corrupted by some browsers.
71         /// As a result, these characters must be encoded in &lt;a&gt; tags or in query strings where the strings can be re-sent by a browser
72         /// in a request string.</remarks>
73         /// <exception cref="ArgumentNullException">Thrown if the encoding is null.</exception>
FormStringParameterEncode(string s, Encoding encoding)74         internal static string FormStringParameterEncode(string s, Encoding encoding) {
75             return FormQueryEncode(s, encoding, EncodingType.HtmlForm);
76         }
77 
78         /// <summary>
79         /// Encodes a string for Query String or Form Data encoding.
80         /// </summary>
81         /// <param name="s">The text to URL-encode.</param>
82         /// <param name="encoding">The encoding for the text parameter.</param>
83         /// <param name="encodingType">The encoding type to use.</param>
84         /// <returns>The encoded text.</returns>
FormQueryEncode(string s, Encoding encoding, EncodingType encodingType)85         private static string FormQueryEncode(string s, Encoding encoding, EncodingType encodingType) {
86             return FormQueryEncode(s, encoding, encodingType, characterValuesLazy);
87         }
88 
FormQueryEncode(string s, Encoding encoding, EncodingType encodingType, Lazy<char[][]> characterValuesLazy)89         private static string FormQueryEncode(string s, Encoding encoding, EncodingType encodingType, Lazy<char[][]> characterValuesLazy) {
90             if (string.IsNullOrEmpty(s)) {
91                 return s;
92             }
93 
94             if (encoding == null) {
95                 throw new ArgumentNullException("encoding");
96             }
97 
98             var characterValues = characterValuesLazy.Value;
99 
100             // RFC 3986 states strings must be converted to their UTF8 value before URL encoding.
101             // See http://tools.ietf.org/html/rfc3986
102             // Conversion to char[] keeps null characters inline.
103             byte[] utf8Bytes = encoding.GetBytes(s.ToCharArray());
104             char[] encodedInput = new char[utf8Bytes.Length * 3]; // Each byte can potentially be encoded as %xx
105             int outputLength = 0;
106 
107             for (int characterPosition = 0; characterPosition < utf8Bytes.Length; characterPosition++) {
108                 byte currentCharacter = utf8Bytes[characterPosition];
109 
110                 if (currentCharacter == 0x00 || currentCharacter == 0x20 || currentCharacter > characterValues.Length || characterValues[currentCharacter] != null) {
111                     // character needs to be encoded
112                     char[] encodedCharacter;
113 
114                     if (currentCharacter == 0x20) {
115                         switch (encodingType) {
116                             case EncodingType.QueryString:
117                                 encodedCharacter = QueryStringSpace;
118                                 break;
119 
120                             // Special case for Html Form data, from http://www.w3.org/TR/html401/appendix/notes.html#non-ascii-chars
121                             case EncodingType.HtmlForm:
122                                 encodedCharacter = FormStringSpace;
123                                 break;
124 
125                             default:
126                                 throw new ArgumentOutOfRangeException("encodingType");
127                         }
128                     }
129                     else {
130                         encodedCharacter = characterValues[currentCharacter];
131                     }
132 
133                     for (int j = 0; j < encodedCharacter.Length; j++) {
134                         encodedInput[outputLength++] = encodedCharacter[j];
135                     }
136                 }
137                 else {
138                     // character does not need encoding
139                     encodedInput[outputLength++] = (char)currentCharacter;
140                 }
141             }
142 
143             return new string(encodedInput, 0, outputLength);
144         }
145 
146         /// <summary>
147         /// Initializes the HTML safe list.
148         /// </summary>
InitialiseSafeList()149         private static char[][] InitialiseSafeList() {
150             char[][] result = SafeList.Generate(255, SafeList.PercentThenHexValueGenerator);
151             SafeList.PunchSafeList(ref result, UrlParameterSafeList());
152             return result;
153         }
154 
155         /// <summary>
156         /// Provides the safe characters for URL parameter encoding.
157         /// </summary>
158         /// <returns>The safe characters for URL parameter encoding.</returns>
UrlParameterSafeList()159         private static IEnumerable UrlParameterSafeList() {
160             // Hyphen
161             yield return 0x2D;
162 
163             // Full stop/period
164             yield return 0x2E;
165 
166             // Digits
167             for (int i = 0x30; i <= 0x39; i++) {
168                 yield return i;
169             }
170 
171             // Upper case alphabet
172             for (int i = 0x41; i <= 0x5A; i++) {
173                 yield return i;
174             }
175 
176             // Underscore
177             yield return 0x5F;
178 
179             // Lower case alphabet
180             for (int i = 0x61; i <= 0x7A; i++) {
181                 yield return i;
182             }
183 
184             // Tilde
185             yield return 0x7E;
186         }
187 
188         #region UrlPathEncode Helpers
189 
190         /// <summary>
191         /// The values to output for each character.
192         /// </summary>
193         private static Lazy<char[][]> pathCharacterValuesLazy = new Lazy<char[][]>(InitialisePathSafeList);
194 
UrlPathEncode(string s, Encoding encoding)195         internal static string UrlPathEncode(string s, Encoding encoding) {
196             return FormQueryEncode(s, encoding, EncodingType.QueryString, pathCharacterValuesLazy);
197         }
198 
199         /// <summary>
200         /// Initializes the HTML safe list.
201         /// </summary>
InitialisePathSafeList()202         private static char[][] InitialisePathSafeList() {
203             char[][] result = SafeList.Generate(255, SafeList.PercentThenHexValueGenerator);
204             SafeList.PunchSafeList(ref result, UrlPathSafeList());
205             return result;
206         }
207 
208         /// <summary>
209         /// Provides the safe characters for URL path encoding.
210         /// </summary>
211         /// <returns>The safe characters for URL path encoding.</returns>
UrlPathSafeList()212         private static IEnumerable UrlPathSafeList() {
213 
214             foreach (var c in UrlParameterSafeList()) {
215                 yield return c;
216             }
217 
218             // Hash
219             yield return 0x23;
220 
221             // Percent
222             yield return 0x25;
223 
224             // Forward slash
225             yield return 0x2F;
226 
227             // Backwards slash
228             yield return 0x5C;
229 
230             // Left parenthesis
231             yield return 0x28;
232 
233             //Right parenthesis
234             yield return 0x29;
235         }
236 
237         #endregion
238     }
239 }
240