1 // Licensed to the .NET Foundation under one or more agreements. 2 // The .NET Foundation licenses this file to you under the MIT license. 3 // See the LICENSE file in the project root for more information. 4 5 using System.Text; 6 using System; 7 using System.Diagnostics; 8 using System.Runtime.InteropServices; 9 10 namespace System.Text 11 { 12 // A Decoder is used to decode a sequence of blocks of bytes into a 13 // sequence of blocks of characters. Following instantiation of a decoder, 14 // sequential blocks of bytes are converted into blocks of characters through 15 // calls to the GetChars method. The decoder maintains state between the 16 // conversions, allowing it to correctly decode byte sequences that span 17 // adjacent blocks. 18 // 19 // Instances of specific implementations of the Decoder abstract base 20 // class are typically obtained through calls to the GetDecoder method 21 // of Encoding objects. 22 // 23 public abstract class Decoder 24 { 25 internal DecoderFallback _fallback = null; 26 27 internal DecoderFallbackBuffer _fallbackBuffer = null; 28 Decoder()29 protected Decoder() 30 { 31 // We don't call default reset because default reset probably isn't good if we aren't initialized. 32 } 33 34 public DecoderFallback Fallback 35 { 36 get 37 { 38 return _fallback; 39 } 40 41 set 42 { 43 if (value == null) 44 throw new ArgumentNullException(nameof(value)); 45 46 // Can't change fallback if buffer is wrong 47 if (_fallbackBuffer != null && _fallbackBuffer.Remaining > 0) 48 throw new ArgumentException( 49 SR.Argument_FallbackBufferNotEmpty, nameof(value)); 50 51 _fallback = value; 52 _fallbackBuffer = null; 53 } 54 } 55 56 // Note: we don't test for threading here because async access to Encoders and Decoders 57 // doesn't work anyway. 58 public DecoderFallbackBuffer FallbackBuffer 59 { 60 get 61 { 62 if (_fallbackBuffer == null) 63 { 64 if (_fallback != null) 65 _fallbackBuffer = _fallback.CreateFallbackBuffer(); 66 else 67 _fallbackBuffer = DecoderFallback.ReplacementFallback.CreateFallbackBuffer(); 68 } 69 70 return _fallbackBuffer; 71 } 72 } 73 74 internal bool InternalHasFallbackBuffer 75 { 76 get 77 { 78 return _fallbackBuffer != null; 79 } 80 } 81 82 // Reset the Decoder 83 // 84 // Normally if we call GetChars() and an error is thrown we don't change the state of the Decoder. This 85 // would allow the caller to correct the error condition and try again (such as if they need a bigger buffer.) 86 // 87 // If the caller doesn't want to try again after GetChars() throws an error, then they need to call Reset(). 88 // 89 // Virtual implementation has to call GetChars with flush and a big enough buffer to clear a 0 byte string 90 // We avoid GetMaxCharCount() because a) we can't call the base encoder and b) it might be really big. Reset()91 public virtual void Reset() 92 { 93 byte[] byteTemp = Array.Empty<byte>(); 94 char[] charTemp = new char[GetCharCount(byteTemp, 0, 0, true)]; 95 GetChars(byteTemp, 0, 0, charTemp, 0, true); 96 _fallbackBuffer?.Reset(); 97 } 98 99 // Returns the number of characters the next call to GetChars will 100 // produce if presented with the given range of bytes. The returned value 101 // takes into account the state in which the decoder was left following the 102 // last call to GetChars. The state of the decoder is not affected 103 // by a call to this method. 104 // GetCharCount(byte[] bytes, int index, int count)105 public abstract int GetCharCount(byte[] bytes, int index, int count); 106 GetCharCount(byte[] bytes, int index, int count, bool flush)107 public virtual int GetCharCount(byte[] bytes, int index, int count, bool flush) 108 { 109 return GetCharCount(bytes, index, count); 110 } 111 112 // We expect this to be the workhorse for NLS Encodings, but for existing 113 // ones we need a working (if slow) default implementation) 114 [CLSCompliant(false)] GetCharCount(byte* bytes, int count, bool flush)115 public virtual unsafe int GetCharCount(byte* bytes, int count, bool flush) 116 { 117 // Validate input parameters 118 if (bytes == null) 119 throw new ArgumentNullException(nameof(bytes), 120 SR.ArgumentNull_Array); 121 122 if (count < 0) 123 throw new ArgumentOutOfRangeException(nameof(count), 124 SR.ArgumentOutOfRange_NeedNonNegNum); 125 126 byte[] arrbyte = new byte[count]; 127 int index; 128 129 for (index = 0; index < count; index++) 130 arrbyte[index] = bytes[index]; 131 132 return GetCharCount(arrbyte, 0, count); 133 } 134 GetCharCount(ReadOnlySpan<byte> bytes, bool flush)135 public virtual unsafe int GetCharCount(ReadOnlySpan<byte> bytes, bool flush) 136 { 137 fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes)) 138 { 139 return GetCharCount(bytesPtr, bytes.Length, flush); 140 } 141 } 142 143 // Decodes a range of bytes in a byte array into a range of characters 144 // in a character array. The method decodes byteCount bytes from 145 // bytes starting at index byteIndex, storing the resulting 146 // characters in chars starting at index charIndex. The 147 // decoding takes into account the state in which the decoder was left 148 // following the last call to this method. 149 // 150 // An exception occurs if the character array is not large enough to 151 // hold the complete decoding of the bytes. The GetCharCount method 152 // can be used to determine the exact number of characters that will be 153 // produced for a given range of bytes. Alternatively, the 154 // GetMaxCharCount method of the Encoding that produced this 155 // decoder can be used to determine the maximum number of characters that 156 // will be produced for a given number of bytes, regardless of the actual 157 // byte values. 158 // GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)159 public abstract int GetChars(byte[] bytes, int byteIndex, int byteCount, 160 char[] chars, int charIndex); 161 GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, bool flush)162 public virtual int GetChars(byte[] bytes, int byteIndex, int byteCount, 163 char[] chars, int charIndex, bool flush) 164 { 165 return GetChars(bytes, byteIndex, byteCount, chars, charIndex); 166 } 167 168 // We expect this to be the workhorse for NLS Encodings, but for existing 169 // ones we need a working (if slow) default implementation) 170 // 171 // WARNING WARNING WARNING 172 // 173 // WARNING: If this breaks it could be a security threat. Obviously we 174 // call this internally, so you need to make sure that your pointers, counts 175 // and indexes are correct when you call this method. 176 // 177 // In addition, we have internal code, which will be marked as "safe" calling 178 // this code. However this code is dependent upon the implementation of an 179 // external GetChars() method, which could be overridden by a third party and 180 // the results of which cannot be guaranteed. We use that result to copy 181 // the char[] to our char* output buffer. If the result count was wrong, we 182 // could easily overflow our output buffer. Therefore we do an extra test 183 // when we copy the buffer so that we don't overflow charCount either. 184 [CLSCompliant(false)] GetChars(byte* bytes, int byteCount, char* chars, int charCount, bool flush)185 public virtual unsafe int GetChars(byte* bytes, int byteCount, 186 char* chars, int charCount, bool flush) 187 { 188 // Validate input parameters 189 if (chars == null || bytes == null) 190 throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes), 191 SR.ArgumentNull_Array); 192 193 if (byteCount < 0 || charCount < 0) 194 throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)), 195 SR.ArgumentOutOfRange_NeedNonNegNum); 196 197 // Get the byte array to convert 198 byte[] arrByte = new byte[byteCount]; 199 200 int index; 201 for (index = 0; index < byteCount; index++) 202 arrByte[index] = bytes[index]; 203 204 // Get the char array to fill 205 char[] arrChar = new char[charCount]; 206 207 // Do the work 208 int result = GetChars(arrByte, 0, byteCount, arrChar, 0, flush); 209 210 Debug.Assert(result <= charCount, "Returned more chars than we have space for"); 211 212 // Copy the char array 213 // WARNING: We MUST make sure that we don't copy too many chars. We can't 214 // rely on result because it could be a 3rd party implementation. We need 215 // to make sure we never copy more than charCount chars no matter the value 216 // of result 217 if (result < charCount) 218 charCount = result; 219 220 // We check both result and charCount so that we don't accidentally overrun 221 // our pointer buffer just because of an issue in GetChars 222 for (index = 0; index < charCount; index++) 223 chars[index] = arrChar[index]; 224 225 return charCount; 226 } 227 GetChars(ReadOnlySpan<byte> bytes, Span<char> chars, bool flush)228 public virtual unsafe int GetChars(ReadOnlySpan<byte> bytes, Span<char> chars, bool flush) 229 { 230 fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes)) 231 fixed (char* charsPtr = &MemoryMarshal.GetReference(chars)) 232 { 233 return GetChars(bytesPtr, bytes.Length, charsPtr, chars.Length, flush); 234 } 235 } 236 237 // This method is used when the output buffer might not be large enough. 238 // It will decode until it runs out of bytes, and then it will return 239 // true if it the entire input was converted. In either case it 240 // will also return the number of converted bytes and output characters used. 241 // It will only throw a buffer overflow exception if the entire lenght of chars[] is 242 // too small to store the next char. (like 0 or maybe 1 or 4 for some encodings) 243 // We're done processing this buffer only if completed returns true. 244 // 245 // Might consider checking Max...Count to avoid the extra counting step. 246 // 247 // Note that if all of the input bytes are not consumed, then we'll do a /2, which means 248 // that its likely that we didn't consume as many bytes as we could have. For some 249 // applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream) Convert(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, int charCount, bool flush, out int bytesUsed, out int charsUsed, out bool completed)250 public virtual void Convert(byte[] bytes, int byteIndex, int byteCount, 251 char[] chars, int charIndex, int charCount, bool flush, 252 out int bytesUsed, out int charsUsed, out bool completed) 253 { 254 // Validate parameters 255 if (bytes == null || chars == null) 256 throw new ArgumentNullException((bytes == null ? nameof(bytes) : nameof(chars)), 257 SR.ArgumentNull_Array); 258 259 if (byteIndex < 0 || byteCount < 0) 260 throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)), 261 SR.ArgumentOutOfRange_NeedNonNegNum); 262 263 if (charIndex < 0 || charCount < 0) 264 throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)), 265 SR.ArgumentOutOfRange_NeedNonNegNum); 266 267 if (bytes.Length - byteIndex < byteCount) 268 throw new ArgumentOutOfRangeException(nameof(bytes), 269 SR.ArgumentOutOfRange_IndexCountBuffer); 270 271 if (chars.Length - charIndex < charCount) 272 throw new ArgumentOutOfRangeException(nameof(chars), 273 SR.ArgumentOutOfRange_IndexCountBuffer); 274 275 bytesUsed = byteCount; 276 277 // Its easy to do if it won't overrun our buffer. 278 while (bytesUsed > 0) 279 { 280 if (GetCharCount(bytes, byteIndex, bytesUsed, flush) <= charCount) 281 { 282 charsUsed = GetChars(bytes, byteIndex, bytesUsed, chars, charIndex, flush); 283 completed = (bytesUsed == byteCount && 284 (_fallbackBuffer == null || _fallbackBuffer.Remaining == 0)); 285 return; 286 } 287 288 // Try again with 1/2 the count, won't flush then 'cause won't read it all 289 flush = false; 290 bytesUsed /= 2; 291 } 292 293 // Oops, we didn't have anything, we'll have to throw an overflow 294 throw new ArgumentException(SR.Argument_ConversionOverflow); 295 } 296 297 // This is the version that uses *. 298 // We're done processing this buffer only if completed returns true. 299 // 300 // Might consider checking Max...Count to avoid the extra counting step. 301 // 302 // Note that if all of the input bytes are not consumed, then we'll do a /2, which means 303 // that its likely that we didn't consume as many bytes as we could have. For some 304 // applications this could be slow. (Like trying to exactly fill an output buffer from a bigger stream) 305 [CLSCompliant(false)] Convert(byte* bytes, int byteCount, char* chars, int charCount, bool flush, out int bytesUsed, out int charsUsed, out bool completed)306 public virtual unsafe void Convert(byte* bytes, int byteCount, 307 char* chars, int charCount, bool flush, 308 out int bytesUsed, out int charsUsed, out bool completed) 309 { 310 // Validate input parameters 311 if (chars == null || bytes == null) 312 throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes), 313 SR.ArgumentNull_Array); 314 315 if (byteCount < 0 || charCount < 0) 316 throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)), 317 SR.ArgumentOutOfRange_NeedNonNegNum); 318 319 // Get ready to do it 320 bytesUsed = byteCount; 321 322 // Its easy to do if it won't overrun our buffer. 323 while (bytesUsed > 0) 324 { 325 if (GetCharCount(bytes, bytesUsed, flush) <= charCount) 326 { 327 charsUsed = GetChars(bytes, bytesUsed, chars, charCount, flush); 328 completed = (bytesUsed == byteCount && 329 (_fallbackBuffer == null || _fallbackBuffer.Remaining == 0)); 330 return; 331 } 332 333 // Try again with 1/2 the count, won't flush then 'cause won't read it all 334 flush = false; 335 bytesUsed /= 2; 336 } 337 338 // Oops, we didn't have anything, we'll have to throw an overflow 339 throw new ArgumentException(SR.Argument_ConversionOverflow); 340 } 341 Convert(ReadOnlySpan<byte> bytes, Span<char> chars, bool flush, out int bytesUsed, out int charsUsed, out bool completed)342 public virtual unsafe void Convert(ReadOnlySpan<byte> bytes, Span<char> chars, bool flush, out int bytesUsed, out int charsUsed, out bool completed) 343 { 344 fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes)) 345 fixed (char* charsPtr = &MemoryMarshal.GetReference(chars)) 346 { 347 Convert(bytesPtr, bytes.Length, charsPtr, chars.Length, flush, out bytesUsed, out charsUsed, out completed); 348 } 349 } 350 } 351 } 352