1 // Licensed to the .NET Foundation under one or more agreements. 2 // The .NET Foundation licenses this file to you under the MIT license. 3 // See the LICENSE file in the project root for more information. 4 5 using System; 6 using System.Collections.Generic; 7 using System.Globalization; 8 using System.IO; 9 using System.Text.Encodings.Web; 10 using System.Text.Unicode; 11 using Xunit; 12 13 namespace Microsoft.Framework.WebEncoders 14 { 15 public class UnicodeEncoderBaseTests 16 { 17 [Fact] Ctor_WithCustomFilters()18 public void Ctor_WithCustomFilters() 19 { 20 // Arrange 21 var filter = new TextEncoderSettings(); 22 filter.AllowCharacters('a', 'b'); 23 filter.AllowCharacters('\0', '&', '\uFFFF', 'd'); 24 UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(filter); 25 26 // Act & assert 27 Assert.Equal("a", encoder.Encode("a")); 28 Assert.Equal("b", encoder.Encode("b")); 29 Assert.Equal("[U+0063]", encoder.Encode("c")); 30 Assert.Equal("d", encoder.Encode("d")); 31 Assert.Equal("[U+0000]", encoder.Encode("\0")); // we still always encode control chars 32 Assert.Equal("[U+0026]", encoder.Encode("&")); // we still always encode HTML-special chars 33 Assert.Equal("[U+FFFF]", encoder.Encode("\uFFFF")); // we still always encode non-chars and other forbidden chars 34 } 35 36 [Fact] Ctor_WithUnicodeRanges()37 public void Ctor_WithUnicodeRanges() 38 { 39 // Arrange 40 UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(new TextEncoderSettings(UnicodeRanges.Latin1Supplement, UnicodeRanges.MiscellaneousSymbols)); 41 42 // Act & assert 43 Assert.Equal("[U+0061]", encoder.Encode("a")); 44 Assert.Equal("\u00E9", encoder.Encode("\u00E9" /* LATIN SMALL LETTER E WITH ACUTE */)); 45 Assert.Equal("\u2601", encoder.Encode("\u2601" /* CLOUD */)); 46 } 47 48 [Fact] Encode_AllRangesAllowed_StillEncodesForbiddenChars_Simple()49 public void Encode_AllRangesAllowed_StillEncodesForbiddenChars_Simple() 50 { 51 // Arrange 52 UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All); 53 const string input = "Hello <>&\'\"+ there!"; 54 const string expected = "Hello [U+003C][U+003E][U+0026][U+0027][U+0022][U+002B] there!"; 55 56 // Act & assert 57 Assert.Equal(expected, encoder.Encode(input)); 58 } 59 60 [Fact] Encode_AllRangesAllowed_StillEncodesForbiddenChars_Extended()61 public void Encode_AllRangesAllowed_StillEncodesForbiddenChars_Extended() 62 { 63 // Arrange 64 UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All); 65 66 // Act & assert - BMP chars 67 for (int i = 0; i <= 0xFFFF; i++) 68 { 69 string input = new String((char)i, 1); 70 string expected; 71 if (IsSurrogateCodePoint(i)) 72 { 73 expected = "\uFFFD"; // unpaired surrogate -> Unicode replacement char 74 } 75 else 76 { 77 bool mustEncode = false; 78 switch (i) 79 { 80 case '<': 81 case '>': 82 case '&': 83 case '\"': 84 case '\'': 85 case '+': 86 mustEncode = true; 87 break; 88 } 89 90 if (i <= 0x001F || (0x007F <= i && i <= 0x9F)) 91 { 92 mustEncode = true; // control char 93 } 94 else if (!UnicodeHelpers.IsCharacterDefined((char)i)) 95 { 96 mustEncode = true; // undefined (or otherwise disallowed) char 97 } 98 99 if (mustEncode) 100 { 101 expected = String.Format(CultureInfo.InvariantCulture, "[U+{0:X4}]", i); 102 } 103 else 104 { 105 expected = input; // no encoding 106 } 107 } 108 109 string retVal = encoder.Encode(input); 110 Assert.Equal(expected, retVal); 111 } 112 113 // Act & assert - astral chars 114 for (int i = 0x10000; i <= 0x10FFFF; i++) 115 { 116 string input = Char.ConvertFromUtf32(i); 117 string expected = String.Format(CultureInfo.InvariantCulture, "[U+{0:X}]", i); 118 string retVal = encoder.Encode(input); 119 Assert.Equal(expected, retVal); 120 } 121 } 122 123 [Fact] Encode_BadSurrogates_ReturnsUnicodeReplacementChar()124 public void Encode_BadSurrogates_ReturnsUnicodeReplacementChar() 125 { 126 // Arrange 127 UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All); // allow all codepoints 128 129 // "a<unpaired leading>b<unpaired trailing>c<trailing before leading>d<unpaired trailing><valid>e<high at end of string>" 130 const string input = "a\uD800b\uDFFFc\uDFFF\uD800d\uDFFF\uD800\uDFFFe\uD800"; 131 const string expected = "a\uFFFDb\uFFFDc\uFFFD\uFFFDd\uFFFD[U+103FF]e\uFFFD"; 132 133 // Act 134 string retVal = encoder.Encode(input); 135 136 // Assert 137 Assert.Equal(expected, retVal); 138 } 139 140 [Fact] Encode_EmptyStringInput_ReturnsEmptyString()141 public void Encode_EmptyStringInput_ReturnsEmptyString() 142 { 143 // Arrange 144 UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All); 145 146 // Act & assert 147 Assert.Equal("", encoder.Encode("")); 148 } 149 150 [Fact] Encode_InputDoesNotRequireEncoding_ReturnsOriginalStringInstance()151 public void Encode_InputDoesNotRequireEncoding_ReturnsOriginalStringInstance() 152 { 153 // Arrange 154 UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All); 155 string input = "Hello, there!"; 156 157 // Act & assert 158 Assert.Same(input, encoder.Encode(input)); 159 } 160 161 [Fact] Encode_NullInput_ReturnsNull()162 public void Encode_NullInput_ReturnsNull() 163 { 164 // Arrange 165 UnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All); 166 167 // Act & assert 168 Assert.Null(encoder.Encode(null)); 169 } 170 171 [Fact] Encode_WithCharsRequiringEncodingAtBeginning()172 public void Encode_WithCharsRequiringEncodingAtBeginning() 173 { 174 Assert.Equal("[U+0026]Hello, there!", new CustomUnicodeEncoderBase(UnicodeRanges.All).Encode("&Hello, there!")); 175 } 176 177 [Fact] Encode_WithCharsRequiringEncodingAtEnd()178 public void Encode_WithCharsRequiringEncodingAtEnd() 179 { 180 Assert.Equal("Hello, there![U+0026]", new CustomUnicodeEncoderBase(UnicodeRanges.All).Encode("Hello, there!&")); 181 } 182 183 [Fact] Encode_WithCharsRequiringEncodingInMiddle()184 public void Encode_WithCharsRequiringEncodingInMiddle() 185 { 186 Assert.Equal("Hello, [U+0026]there!", new CustomUnicodeEncoderBase(UnicodeRanges.All).Encode("Hello, &there!")); 187 } 188 189 [Fact] Encode_WithCharsRequiringEncodingInterspersed()190 public void Encode_WithCharsRequiringEncodingInterspersed() 191 { 192 Assert.Equal("Hello, [U+003C]there[U+003E]!", new CustomUnicodeEncoderBase(UnicodeRanges.All).Encode("Hello, <there>!")); 193 } 194 195 [Fact] Encode_CharArray_ParameterChecking_NegativeTestCases()196 public void Encode_CharArray_ParameterChecking_NegativeTestCases() 197 { 198 // Arrange 199 CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(); 200 201 // Act & assert 202 Assert.Throws<ArgumentNullException>(() => encoder.Encode((char[])null, 0, 0, new StringWriter())); 203 Assert.Throws<ArgumentNullException>(() => encoder.Encode("abc".ToCharArray(), 0, 3, null)); 204 Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), -1, 2, new StringWriter())); 205 Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), 2, 2, new StringWriter())); 206 Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), 4, 0, new StringWriter())); 207 Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), 2, -1, new StringWriter())); 208 Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc".ToCharArray(), 1, 3, new StringWriter())); 209 } 210 211 //[Fact] 212 //public void Encode_CharArray_ZeroCount_DoesNotCallIntoTextWriter() 213 //{ 214 // // Arrange 215 // CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(); 216 // TextWriter output = new Mock<TextWriter>(MockBehavior.Strict).Object; 217 218 // // Act 219 // encoder.Encode("abc".ToCharArray(), 2, 0, output); 220 221 // // Assert 222 // // If we got this far (without TextWriter throwing), success! 223 //} 224 225 [Fact] Encode_CharArray_AllCharsValid()226 public void Encode_CharArray_AllCharsValid() 227 { 228 // Arrange 229 CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All); 230 StringWriter output = new StringWriter(); 231 232 // Act 233 encoder.Encode("abc&xyz".ToCharArray(), 4, 2, output); 234 235 // Assert 236 Assert.Equal("xy", output.ToString()); 237 } 238 239 [Fact] Encode_CharArray_AllCharsInvalid()240 public void Encode_CharArray_AllCharsInvalid() 241 { 242 // Arrange 243 CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(); 244 StringWriter output = new StringWriter(); 245 246 // Act 247 encoder.Encode("abc&xyz".ToCharArray(), 4, 2, output); 248 249 // Assert 250 Assert.Equal("[U+0078][U+0079]", output.ToString()); 251 } 252 253 [Fact] Encode_CharArray_SomeCharsValid()254 public void Encode_CharArray_SomeCharsValid() 255 { 256 // Arrange 257 CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All); 258 StringWriter output = new StringWriter(); 259 260 // Act 261 encoder.Encode("abc&xyz".ToCharArray(), 2, 3, output); 262 263 // Assert 264 Assert.Equal("c[U+0026]x", output.ToString()); 265 } 266 267 [Fact] Encode_StringSubstring_ParameterChecking_NegativeTestCases()268 public void Encode_StringSubstring_ParameterChecking_NegativeTestCases() 269 { 270 // Arrange 271 CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(); 272 273 // Act & assert 274 Assert.Throws<ArgumentNullException>(() => encoder.Encode((string)null, 0, 0, new StringWriter())); 275 Assert.Throws<ArgumentNullException>(() => encoder.Encode("abc", 0, 3, null)); 276 Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", -1, 2, new StringWriter())); 277 Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", 2, 2, new StringWriter())); 278 Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", 4, 0, new StringWriter())); 279 Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", 2, -1, new StringWriter())); 280 Assert.Throws<ArgumentOutOfRangeException>(() => encoder.Encode("abc", 1, 3, new StringWriter())); 281 } 282 283 //[Fact] 284 //public void Encode_StringSubstring_ZeroCount_DoesNotCallIntoTextWriter() 285 //{ 286 // // Arrange 287 // CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(); 288 // TextWriter output = new Mock<TextWriter>(MockBehavior.Strict).Object; 289 290 // // Act 291 // encoder.Encode("abc", 2, 0, output); 292 293 // // Assert 294 // // If we got this far (without TextWriter throwing), success! 295 //} 296 297 [Fact] Encode_StringSubstring_AllCharsValid()298 public void Encode_StringSubstring_AllCharsValid() 299 { 300 // Arrange 301 CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All); 302 StringWriter output = new StringWriter(); 303 304 // Act 305 encoder.Encode("abc&xyz", 4, 2, output); 306 307 // Assert 308 Assert.Equal("xy", output.ToString()); 309 } 310 311 //[Fact] 312 //public void Encode_StringSubstring_EntireString_AllCharsValid_ForwardDirectlyToOutput() 313 //{ 314 // // Arrange 315 // CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All); 316 // var mockWriter = new Mock<TextWriter>(MockBehavior.Strict); 317 // mockWriter.Setup(o => o.Write("abc")).Verifiable(); 318 319 // // Act 320 // encoder.Encode("abc", 0, 3, mockWriter.Object); 321 322 // // Assert 323 // mockWriter.Verify(); 324 //} 325 326 [Fact] Encode_StringSubstring_AllCharsInvalid()327 public void Encode_StringSubstring_AllCharsInvalid() 328 { 329 // Arrange 330 CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(); 331 StringWriter output = new StringWriter(); 332 333 // Act 334 encoder.Encode("abc&xyz", 4, 2, output); 335 336 // Assert 337 Assert.Equal("[U+0078][U+0079]", output.ToString()); 338 } 339 340 [Fact] Encode_StringSubstring_SomeCharsValid()341 public void Encode_StringSubstring_SomeCharsValid() 342 { 343 // Arrange 344 CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All); 345 StringWriter output = new StringWriter(); 346 347 // Act 348 encoder.Encode("abc&xyz", 2, 3, output); 349 350 // Assert 351 Assert.Equal("c[U+0026]x", output.ToString()); 352 } 353 354 [Fact] Encode_StringSubstring_EntireString_SomeCharsValid()355 public void Encode_StringSubstring_EntireString_SomeCharsValid() 356 { 357 // Arrange 358 CustomUnicodeEncoderBase encoder = new CustomUnicodeEncoderBase(UnicodeRanges.All); 359 StringWriter output = new StringWriter(); 360 361 // Act 362 const string input = "abc&xyz"; 363 encoder.Encode(input, 0, input.Length, output); 364 365 // Assert 366 Assert.Equal("abc[U+0026]xyz", output.ToString()); 367 } 368 IsSurrogateCodePoint(int codePoint)369 private static bool IsSurrogateCodePoint(int codePoint) 370 { 371 return (0xD800 <= codePoint && codePoint <= 0xDFFF); 372 } 373 374 private sealed class CustomTextEncoderSettings : TextEncoderSettings 375 { 376 private readonly int[] _allowedCodePoints; 377 CustomTextEncoderSettings(params int[] allowedCodePoints)378 public CustomTextEncoderSettings(params int[] allowedCodePoints) 379 { 380 _allowedCodePoints = allowedCodePoints; 381 } 382 GetAllowedCodePoints()383 public override IEnumerable<int> GetAllowedCodePoints() 384 { 385 return _allowedCodePoints; 386 } 387 } 388 389 private sealed class CustomUnicodeEncoderBase : UnicodeEncoderBase 390 { 391 // We pass a (known bad) value of 1 for 'max output chars per input char', 392 // which also tests that the code behaves properly even if the original 393 // estimate is incorrect. CustomUnicodeEncoderBase(TextEncoderSettings filter)394 public CustomUnicodeEncoderBase(TextEncoderSettings filter) 395 : base(filter, maxOutputCharsPerInputChar: 1) 396 { 397 } 398 CustomUnicodeEncoderBase(params UnicodeRange[] allowedRanges)399 public CustomUnicodeEncoderBase(params UnicodeRange[] allowedRanges) 400 : this(new TextEncoderSettings(allowedRanges)) 401 { 402 } 403 WriteEncodedScalar(ref Writer writer, uint value)404 protected override void WriteEncodedScalar(ref Writer writer, uint value) 405 { 406 writer.Write(String.Format(CultureInfo.InvariantCulture, "[U+{0:X4}]", value)); 407 } 408 } 409 } 410 } 411