1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4 
5 using System;
6 using System.Collections.Generic;
7 using System.Globalization;
8 using System.IO;
9 using System.Text.Encodings.Web;
10 using System.Text.Unicode;
11 using Xunit;
12 
13 namespace Microsoft.Framework.WebEncoders
14 {
15     public class UnicodeEncoderBaseTests
16     {
17         [Fact]
Ctor_WithCustomFilters()18         public void Ctor_WithCustomFilters()
19         {
20             // Arrange
21             var filter = new TextEncoderSettings();
22             filter.AllowCharacters('a', 'b');
23             filter.AllowCharacters('\0', '&', '\uFFFF', 'd');
24             UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(filter);
25 
26             // Act & assert
27             Assert.Equal("a", encoder.Encode("a"));
28             Assert.Equal("b", encoder.Encode("b"));
29             Assert.Equal("[U+0063]", encoder.Encode("c"));
30             Assert.Equal("d", encoder.Encode("d"));
31             Assert.Equal("[U+0000]", encoder.Encode("\0")); // we still always encode control chars
32             Assert.Equal("[U+0026]", encoder.Encode("&")); // we still always encode HTML-special chars
33             Assert.Equal("[U+FFFF]", encoder.Encode("\uFFFF")); // we still always encode non-chars and other forbidden chars
34         }
35 
36         [Fact]
Ctor_WithUnicodeRanges()37         public void Ctor_WithUnicodeRanges()
38         {
39             // Arrange
40             UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(new TextEncoderSettings(UnicodeRanges.Latin1Supplement, UnicodeRanges.MiscellaneousSymbols));
41 
42             // Act & assert
43             Assert.Equal("[U+0061]", encoder.Encode("a"));
44             Assert.Equal("\u00E9", encoder.Encode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */));
45             Assert.Equal("\u2601", encoder.Encode("\u2601" /* CLOUD */));
46         }
47 
48         [Fact]
Encode_AllRangesAllowed_StillEncodesForbiddenChars_Simple()49         public void Encode_AllRangesAllowed_StillEncodesForbiddenChars_Simple()
50         {
51             // Arrange
52             UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
53             const string input = "Hello <>&\'\"+ there!";
54             const string expected = "Hello [U+003C][U+003E][U+0026][U+0027][U+0022][U+002B] there!";
55 
56             // Act & assert
57             Assert.Equal(expected, encoder.Encode(input));
58         }
59 
60         [Fact]
Encode_AllRangesAllowed_StillEncodesForbiddenChars_Extended()61         public void Encode_AllRangesAllowed_StillEncodesForbiddenChars_Extended()
62         {
63             // Arrange
64             UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
65 
66             // Act & assert - BMP chars
67             for (int i = 0; i <= 0xFFFF; i++)
68             {
69                 string input = new String((char)i, 1);
70                 string expected;
71                 if (IsSurrogateCodePoint(i))
72                 {
73                     expected = "\uFFFD"; // unpaired surrogate -> Unicode replacement char
74                 }
75                 else
76                 {
77                     bool mustEncode = false;
78                     switch (i)
79                     {
80                         case '<':
81                         case '>':
82                         case '&':
83                         case '\"':
84                         case '\'':
85                         case '+':
86                             mustEncode = true;
87                             break;
88                     }
89 
90                     if (i <= 0x001F || (0x007F <= i && i <= 0x9F))
91                     {
92                         mustEncode = true; // control char
93                     }
94                     else if (!UnicodeHelpers.IsCharacterDefined((char)i))
95                     {
96                         mustEncode = true; // undefined (or otherwise disallowed) char
97                     }
98 
99                     if (mustEncode)
100                     {
101                         expected = String.Format(CultureInfo.InvariantCulture, "[U+{0:X4}]", i);
102                     }
103                     else
104                     {
105                         expected = input; // no encoding
106                     }
107                 }
108 
109                 string retVal = encoder.Encode(input);
110                 Assert.Equal(expected, retVal);
111             }
112 
113             // Act & assert - astral chars
114             for (int i = 0x10000; i <= 0x10FFFF; i++)
115             {
116                 string input = Char.ConvertFromUtf32(i);
117                 string expected = String.Format(CultureInfo.InvariantCulture, "[U+{0:X}]", i);
118                 string retVal = encoder.Encode(input);
119                 Assert.Equal(expected, retVal);
120             }
121         }
122 
123         [Fact]
Encode_BadSurrogates_ReturnsUnicodeReplacementChar()124         public void Encode_BadSurrogates_ReturnsUnicodeReplacementChar()
125         {
126             // Arrange
127             UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All); // allow all codepoints
128 
129             // "a<unpaired leading>b<unpaired trailing>c<trailing before leading>d<unpaired trailing><valid>e<high at end of string>"
130             const string input = "a\uD800b\uDFFFc\uDFFF\uD800d\uDFFF\uD800\uDFFFe\uD800";
131             const string expected = "a\uFFFDb\uFFFDc\uFFFD\uFFFDd\uFFFD[U+103FF]e\uFFFD";
132 
133             // Act
134             string retVal = encoder.Encode(input);
135 
136             // Assert
137             Assert.Equal(expected, retVal);
138         }
139 
140         [Fact]
Encode_EmptyStringInput_ReturnsEmptyString()141         public void Encode_EmptyStringInput_ReturnsEmptyString()
142         {
143             // Arrange
144             UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
145 
146             // Act & assert
147             Assert.Equal("", encoder.Encode(""));
148         }
149 
150         [Fact]
Encode_InputDoesNotRequireEncoding_ReturnsOriginalStringInstance()151         public void Encode_InputDoesNotRequireEncoding_ReturnsOriginalStringInstance()
152         {
153             // Arrange
154             UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
155             string input = "Hello, there!";
156 
157             // Act & assert
158             Assert.Same(input, encoder.Encode(input));
159         }
160 
161         [Fact]
Encode_NullInput_ReturnsNull()162         public void Encode_NullInput_ReturnsNull()
163         {
164             // Arrange
165             UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
166 
167             // Act & assert
168             Assert.Null(encoder.Encode(null));
169         }
170 
171         [Fact]
Encode_WithCharsRequiringEncodingAtBeginning()172         public void Encode_WithCharsRequiringEncodingAtBeginning()
173         {
174             Assert.Equal("[U+0026]Hello, there!", new CustomUnicodeEncoderBase(UnicodeRanges.All).Encode("&Hello, there!"));
175         }
176 
177         [Fact]
Encode_WithCharsRequiringEncodingAtEnd()178         public void Encode_WithCharsRequiringEncodingAtEnd()
179         {
180             Assert.Equal("Hello, there![U+0026]", new CustomUnicodeEncoderBase(UnicodeRanges.All).Encode("Hello, there!&"));
181         }
182 
183         [Fact]
Encode_WithCharsRequiringEncodingInMiddle()184         public void Encode_WithCharsRequiringEncodingInMiddle()
185         {
186             Assert.Equal("Hello, [U+0026]there!", new CustomUnicodeEncoderBase(UnicodeRanges.All).Encode("Hello, &there!"));
187         }
188 
189         [Fact]
Encode_WithCharsRequiringEncodingInterspersed()190         public void Encode_WithCharsRequiringEncodingInterspersed()
191         {
192             Assert.Equal("Hello, [U+003C]there[U+003E]!", new CustomUnicodeEncoderBase(UnicodeRanges.All).Encode("Hello, <there>!"));
193         }
194 
195         [Fact]
Encode_CharArray_ParameterChecking_NegativeTestCases()196         public void Encode_CharArray_ParameterChecking_NegativeTestCases()
197         {
198             // Arrange
199             CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase();
200 
201             // Act & assert
202             Assert.Throws<ArgumentNullException>(() => encoder.Encode((char[])null, 0, 0, new StringWriter()));
203             Assert.Throws<ArgumentNullException>(() => encoder.Encode("abc".ToCharArray(), 0, 3, null));
204             Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), -1, 2, new StringWriter()));
205             Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), 2, 2, new StringWriter()));
206             Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), 4, 0, new StringWriter()));
207             Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), 2, -1, new StringWriter()));
208             Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), 1, 3, new StringWriter()));
209         }
210 
211         //[Fact]
212         //public void Encode_CharArray_ZeroCount_DoesNotCallIntoTextWriter()
213         //{
214         //    // Arrange
215         //    CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase();
216         //    TextWriter output = new Mock<TextWriter>(MockBehavior.Strict).Object;
217 
218         //    // Act
219         //    encoder.Encode("abc".ToCharArray(), 2, 0, output);
220 
221         //    // Assert
222         //    // If we got this far (without TextWriter throwing), success!
223         //}
224 
225         [Fact]
Encode_CharArray_AllCharsValid()226         public void Encode_CharArray_AllCharsValid()
227         {
228             // Arrange
229             CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
230             StringWriter output = new StringWriter();
231 
232             // Act
233             encoder.Encode("abc&xyz".ToCharArray(), 4, 2, output);
234 
235             // Assert
236             Assert.Equal("xy", output.ToString());
237         }
238 
239         [Fact]
Encode_CharArray_AllCharsInvalid()240         public void Encode_CharArray_AllCharsInvalid()
241         {
242             // Arrange
243             CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase();
244             StringWriter output = new StringWriter();
245 
246             // Act
247             encoder.Encode("abc&xyz".ToCharArray(), 4, 2, output);
248 
249             // Assert
250             Assert.Equal("[U+0078][U+0079]", output.ToString());
251         }
252 
253         [Fact]
Encode_CharArray_SomeCharsValid()254         public void Encode_CharArray_SomeCharsValid()
255         {
256             // Arrange
257             CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
258             StringWriter output = new StringWriter();
259 
260             // Act
261             encoder.Encode("abc&xyz".ToCharArray(), 2, 3, output);
262 
263             // Assert
264             Assert.Equal("c[U+0026]x", output.ToString());
265         }
266 
267         [Fact]
Encode_StringSubstring_ParameterChecking_NegativeTestCases()268         public void Encode_StringSubstring_ParameterChecking_NegativeTestCases()
269         {
270             // Arrange
271             CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase();
272 
273             // Act & assert
274             Assert.Throws<ArgumentNullException>(() => encoder.Encode((string)null, 0, 0, new StringWriter()));
275             Assert.Throws<ArgumentNullException>(() => encoder.Encode("abc", 0, 3, null));
276             Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", -1, 2, new StringWriter()));
277             Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", 2, 2, new StringWriter()));
278             Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", 4, 0, new StringWriter()));
279             Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", 2, -1, new StringWriter()));
280             Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", 1, 3, new StringWriter()));
281         }
282 
283         //[Fact]
284         //public void Encode_StringSubstring_ZeroCount_DoesNotCallIntoTextWriter()
285         //{
286         //    // Arrange
287         //    CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase();
288         //    TextWriter output = new Mock<TextWriter>(MockBehavior.Strict).Object;
289 
290         //    // Act
291         //    encoder.Encode("abc", 2, 0, output);
292 
293         //    // Assert
294         //    // If we got this far (without TextWriter throwing), success!
295         //}
296 
297         [Fact]
Encode_StringSubstring_AllCharsValid()298         public void Encode_StringSubstring_AllCharsValid()
299         {
300             // Arrange
301             CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
302             StringWriter output = new StringWriter();
303 
304             // Act
305             encoder.Encode("abc&xyz", 4, 2, output);
306 
307             // Assert
308             Assert.Equal("xy", output.ToString());
309         }
310 
311         //[Fact]
312         //public void Encode_StringSubstring_EntireString_AllCharsValid_ForwardDirectlyToOutput()
313         //{
314         //    // Arrange
315         //    CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
316         //    var mockWriter = new Mock<TextWriter>(MockBehavior.Strict);
317         //    mockWriter.Setup(o => o.Write("abc")).Verifiable();
318 
319         //    // Act
320         //    encoder.Encode("abc", 0, 3, mockWriter.Object);
321 
322         //    // Assert
323         //    mockWriter.Verify();
324         //}
325 
326         [Fact]
Encode_StringSubstring_AllCharsInvalid()327         public void Encode_StringSubstring_AllCharsInvalid()
328         {
329             // Arrange
330             CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase();
331             StringWriter output = new StringWriter();
332 
333             // Act
334             encoder.Encode("abc&xyz", 4, 2, output);
335 
336             // Assert
337             Assert.Equal("[U+0078][U+0079]", output.ToString());
338         }
339 
340         [Fact]
Encode_StringSubstring_SomeCharsValid()341         public void Encode_StringSubstring_SomeCharsValid()
342         {
343             // Arrange
344             CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
345             StringWriter output = new StringWriter();
346 
347             // Act
348             encoder.Encode("abc&xyz", 2, 3, output);
349 
350             // Assert
351             Assert.Equal("c[U+0026]x", output.ToString());
352         }
353 
354         [Fact]
Encode_StringSubstring_EntireString_SomeCharsValid()355         public void Encode_StringSubstring_EntireString_SomeCharsValid()
356         {
357             // Arrange
358             CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All);
359             StringWriter output = new StringWriter();
360 
361             // Act
362             const string input = "abc&xyz";
363             encoder.Encode(input, 0, input.Length, output);
364 
365             // Assert
366             Assert.Equal("abc[U+0026]xyz", output.ToString());
367         }
368 
IsSurrogateCodePoint(int codePoint)369         private static bool IsSurrogateCodePoint(int codePoint)
370         {
371             return (0xD800 <= codePoint && codePoint <= 0xDFFF);
372         }
373 
374         private sealed class CustomTextEncoderSettings : TextEncoderSettings
375         {
376             private readonly int[] _allowedCodePoints;
377 
CustomTextEncoderSettings(params int[] allowedCodePoints)378             public CustomTextEncoderSettings(params int[] allowedCodePoints)
379             {
380                 _allowedCodePoints = allowedCodePoints;
381             }
382 
GetAllowedCodePoints()383             public override IEnumerable<int> GetAllowedCodePoints()
384             {
385                 return _allowedCodePoints;
386             }
387         }
388 
389         private sealed class CustomUnicodeEncoderBase : UnicodeEncoderBase
390         {
391             // We pass a (known bad) value of 1 for 'max output chars per input char',
392             // which also tests that the code behaves properly even if the original
393             // estimate is incorrect.
CustomUnicodeEncoderBase(TextEncoderSettings filter)394             public CustomUnicodeEncoderBase(TextEncoderSettings filter)
395                 : base(filter, maxOutputCharsPerInputChar: 1)
396             {
397             }
398 
CustomUnicodeEncoderBase(params UnicodeRange[] allowedRanges)399             public CustomUnicodeEncoderBase(params UnicodeRange[] allowedRanges)
400                 : this(new TextEncoderSettings(allowedRanges))
401             {
402             }
403 
WriteEncodedScalar(ref Writer writer, uint value)404             protected override void WriteEncodedScalar(ref Writer writer, uint value)
405             {
406                 writer.Write(String.Format(CultureInfo.InvariantCulture, "[U+{0:X4}]", value));
407             }
408         }
409     }
410 }
411