1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4 
5 using System.Text;
6 using System;
7 using System.Diagnostics;
8 using System.Runtime.InteropServices;
9 
10 namespace System.Text
11 {
12     // A Decoder is used to decode a sequence of blocks of bytes into a
13     // sequence of blocks of characters. Following instantiation of a decoder,
14     // sequential blocks of bytes are converted into blocks of characters through
15     // calls to the GetChars method. The decoder maintains state between the
16     // conversions, allowing it to correctly decode byte sequences that span
17     // adjacent blocks.
18     //
19     // Instances of specific implementations of the Decoder abstract base
20     // class are typically obtained through calls to the GetDecoder method
21     // of Encoding objects.
22     //
23     public abstract class Decoder
24     {
25         internal DecoderFallback _fallback = null;
26 
27         internal DecoderFallbackBuffer _fallbackBuffer = null;
28 
Decoder()29         protected Decoder()
30         {
31             // We don't call default reset because default reset probably isn't good if we aren't initialized.
32         }
33 
34         public DecoderFallback Fallback
35         {
36             get
37             {
38                 return _fallback;
39             }
40 
41             set
42             {
43                 if (value == null)
44                     throw new ArgumentNullException(nameof(value));
45 
46                 // Can't change fallback if buffer is wrong
47                 if (_fallbackBuffer != null && _fallbackBuffer.Remaining > 0)
48                     throw new ArgumentException(
49                       SR.Argument_FallbackBufferNotEmpty, nameof(value));
50 
51                 _fallback = value;
52                 _fallbackBuffer = null;
53             }
54         }
55 
56         // Note: we don't test for threading here because async access to Encoders and Decoders
57         // doesn't work anyway.
58         public DecoderFallbackBuffer FallbackBuffer
59         {
60             get
61             {
62                 if (_fallbackBuffer == null)
63                 {
64                     if (_fallback != null)
65                         _fallbackBuffer = _fallback.CreateFallbackBuffer();
66                     else
67                         _fallbackBuffer = DecoderFallback.ReplacementFallback.CreateFallbackBuffer();
68                 }
69 
70                 return _fallbackBuffer;
71             }
72         }
73 
74         internal bool InternalHasFallbackBuffer
75         {
76             get
77             {
78                 return _fallbackBuffer != null;
79             }
80         }
81 
82         // Reset the Decoder
83         //
84         // Normally if we call GetChars() and an error is thrown we don't change the state of the Decoder.  This
85         // would allow the caller to correct the error condition and try again (such as if they need a bigger buffer.)
86         //
87         // If the caller doesn't want to try again after GetChars() throws an error, then they need to call Reset().
88         //
89         // Virtual implementation has to call GetChars with flush and a big enough buffer to clear a 0 byte string
90         // We avoid GetMaxCharCount() because a) we can't call the base encoder and b) it might be really big.
Reset()91         public virtual void Reset()
92         {
93             byte[] byteTemp = Array.Empty<byte>();
94             char[] charTemp = new char[GetCharCount(byteTemp, 0, 0, true)];
95             GetChars(byteTemp, 0, 0, charTemp, 0, true);
96             _fallbackBuffer?.Reset();
97         }
98 
99         // Returns the number of characters the next call to GetChars will
100         // produce if presented with the given range of bytes. The returned value
101         // takes into account the state in which the decoder was left following the
102         // last call to GetChars. The state of the decoder is not affected
103         // by a call to this method.
104         //
GetCharCount(byte[] bytes, int index, int count)105         public abstract int GetCharCount(byte[] bytes, int index, int count);
106 
GetCharCount(byte[] bytes, int index, int count, bool flush)107         public virtual int GetCharCount(byte[] bytes, int index, int count, bool flush)
108         {
109             return GetCharCount(bytes, index, count);
110         }
111 
112         // We expect this to be the workhorse for NLS Encodings, but for existing
113         // ones we need a working (if slow) default implementation)
114         [CLSCompliant(false)]
GetCharCount(byte* bytes, int count, bool flush)115         public virtual unsafe int GetCharCount(byte* bytes, int count, bool flush)
116         {
117             // Validate input parameters
118             if (bytes == null)
119                 throw new ArgumentNullException(nameof(bytes),
120                       SR.ArgumentNull_Array);
121 
122             if (count < 0)
123                 throw new ArgumentOutOfRangeException(nameof(count),
124                       SR.ArgumentOutOfRange_NeedNonNegNum);
125 
126             byte[] arrbyte = new byte[count];
127             int index;
128 
129             for (index = 0; index < count; index++)
130                 arrbyte[index] = bytes[index];
131 
132             return GetCharCount(arrbyte, 0, count);
133         }
134 
GetCharCount(ReadOnlySpan<byte> bytes, bool flush)135         public virtual unsafe int GetCharCount(ReadOnlySpan<byte> bytes, bool flush)
136         {
137             fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes))
138             {
139                 return GetCharCount(bytesPtr, bytes.Length, flush);
140             }
141         }
142 
143         // Decodes a range of bytes in a byte array into a range of characters
144         // in a character array. The method decodes byteCount bytes from
145         // bytes starting at index byteIndex, storing the resulting
146         // characters in chars starting at index charIndex. The
147         // decoding takes into account the state in which the decoder was left
148         // following the last call to this method.
149         //
150         // An exception occurs if the character array is not large enough to
151         // hold the complete decoding of the bytes. The GetCharCount method
152         // can be used to determine the exact number of characters that will be
153         // produced for a given range of bytes. Alternatively, the
154         // GetMaxCharCount method of the Encoding that produced this
155         // decoder can be used to determine the maximum number of characters that
156         // will be produced for a given number of bytes, regardless of the actual
157         // byte values.
158         //
GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)159         public abstract int GetChars(byte[] bytes, int byteIndex, int byteCount,
160                                         char[] chars, int charIndex);
161 
GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, bool flush)162         public virtual int GetChars(byte[] bytes, int byteIndex, int byteCount,
163                                        char[] chars, int charIndex, bool flush)
164         {
165             return GetChars(bytes, byteIndex, byteCount, chars, charIndex);
166         }
167 
168         // We expect this to be the workhorse for NLS Encodings, but for existing
169         // ones we need a working (if slow) default implementation)
170         //
171         // WARNING WARNING WARNING
172         //
173         // WARNING: If this breaks it could be a security threat.  Obviously we
174         // call this internally, so you need to make sure that your pointers, counts
175         // and indexes are correct when you call this method.
176         //
177         // In addition, we have internal code, which will be marked as "safe" calling
178         // this code.  However this code is dependent upon the implementation of an
179         // external GetChars() method, which could be overridden by a third party and
180         // the results of which cannot be guaranteed.  We use that result to copy
181         // the char[] to our char* output buffer.  If the result count was wrong, we
182         // could easily overflow our output buffer.  Therefore we do an extra test
183         // when we copy the buffer so that we don't overflow charCount either.
184         [CLSCompliant(false)]
GetChars(byte* bytes, int byteCount, char* chars, int charCount, bool flush)185         public virtual unsafe int GetChars(byte* bytes, int byteCount,
186                                               char* chars, int charCount, bool flush)
187         {
188             // Validate input parameters
189             if (chars == null || bytes == null)
190                 throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
191                     SR.ArgumentNull_Array);
192 
193             if (byteCount < 0 || charCount < 0)
194                 throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
195                     SR.ArgumentOutOfRange_NeedNonNegNum);
196 
197             // Get the byte array to convert
198             byte[] arrByte = new byte[byteCount];
199 
200             int index;
201             for (index = 0; index < byteCount; index++)
202                 arrByte[index] = bytes[index];
203 
204             // Get the char array to fill
205             char[] arrChar = new char[charCount];
206 
207             // Do the work
208             int result = GetChars(arrByte, 0, byteCount, arrChar, 0, flush);
209 
210             Debug.Assert(result <= charCount, "Returned more chars than we have space for");
211 
212             // Copy the char array
213             // WARNING: We MUST make sure that we don't copy too many chars.  We can't
214             // rely on result because it could be a 3rd party implementation.  We need
215             // to make sure we never copy more than charCount chars no matter the value
216             // of result
217             if (result < charCount)
218                 charCount = result;
219 
220             // We check both result and charCount so that we don't accidentally overrun
221             // our pointer buffer just because of an issue in GetChars
222             for (index = 0; index < charCount; index++)
223                 chars[index] = arrChar[index];
224 
225             return charCount;
226         }
227 
GetChars(ReadOnlySpan<byte> bytes, Span<char> chars, bool flush)228         public virtual unsafe int GetChars(ReadOnlySpan<byte> bytes, Span<char> chars, bool flush)
229         {
230             fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes))
231             fixed (char* charsPtr = &MemoryMarshal.GetReference(chars))
232             {
233                 return GetChars(bytesPtr, bytes.Length, charsPtr, chars.Length, flush);
234             }
235         }
236 
237         // This method is used when the output buffer might not be large enough.
238         // It will decode until it runs out of bytes, and then it will return
239         // true if it the entire input was converted.  In either case it
240         // will also return the number of converted bytes and output characters used.
241         // It will only throw a buffer overflow exception if the entire lenght of chars[] is
242         // too small to store the next char. (like 0 or maybe 1 or 4 for some encodings)
243         // We're done processing this buffer only if completed returns true.
244         //
245         // Might consider checking Max...Count to avoid the extra counting step.
246         //
247         // Note that if all of the input bytes are not consumed, then we'll do a /2, which means
248         // that its likely that we didn't consume as many bytes as we could have.  For some
249         // applications this could be slow.  (Like trying to exactly fill an output buffer from a bigger stream)
Convert(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, int charCount, bool flush, out int bytesUsed, out int charsUsed, out bool completed)250         public virtual void Convert(byte[] bytes, int byteIndex, int byteCount,
251                                       char[] chars, int charIndex, int charCount, bool flush,
252                                       out int bytesUsed, out int charsUsed, out bool completed)
253         {
254             // Validate parameters
255             if (bytes == null || chars == null)
256                 throw new ArgumentNullException((bytes == null ? nameof(bytes) : nameof(chars)),
257                       SR.ArgumentNull_Array);
258 
259             if (byteIndex < 0 || byteCount < 0)
260                 throw new ArgumentOutOfRangeException((byteIndex < 0 ? nameof(byteIndex) : nameof(byteCount)),
261                       SR.ArgumentOutOfRange_NeedNonNegNum);
262 
263             if (charIndex < 0 || charCount < 0)
264                 throw new ArgumentOutOfRangeException((charIndex < 0 ? nameof(charIndex) : nameof(charCount)),
265                       SR.ArgumentOutOfRange_NeedNonNegNum);
266 
267             if (bytes.Length - byteIndex < byteCount)
268                 throw new ArgumentOutOfRangeException(nameof(bytes),
269                       SR.ArgumentOutOfRange_IndexCountBuffer);
270 
271             if (chars.Length - charIndex < charCount)
272                 throw new ArgumentOutOfRangeException(nameof(chars),
273                       SR.ArgumentOutOfRange_IndexCountBuffer);
274 
275             bytesUsed = byteCount;
276 
277             // Its easy to do if it won't overrun our buffer.
278             while (bytesUsed > 0)
279             {
280                 if (GetCharCount(bytes, byteIndex, bytesUsed, flush) <= charCount)
281                 {
282                     charsUsed = GetChars(bytes, byteIndex, bytesUsed, chars, charIndex, flush);
283                     completed = (bytesUsed == byteCount &&
284                         (_fallbackBuffer == null || _fallbackBuffer.Remaining == 0));
285                     return;
286                 }
287 
288                 // Try again with 1/2 the count, won't flush then 'cause won't read it all
289                 flush = false;
290                 bytesUsed /= 2;
291             }
292 
293             // Oops, we didn't have anything, we'll have to throw an overflow
294             throw new ArgumentException(SR.Argument_ConversionOverflow);
295         }
296 
297         // This is the version that uses *.
298         // We're done processing this buffer only if completed returns true.
299         //
300         // Might consider checking Max...Count to avoid the extra counting step.
301         //
302         // Note that if all of the input bytes are not consumed, then we'll do a /2, which means
303         // that its likely that we didn't consume as many bytes as we could have.  For some
304         // applications this could be slow.  (Like trying to exactly fill an output buffer from a bigger stream)
305         [CLSCompliant(false)]
Convert(byte* bytes, int byteCount, char* chars, int charCount, bool flush, out int bytesUsed, out int charsUsed, out bool completed)306         public virtual unsafe void Convert(byte* bytes, int byteCount,
307                                              char* chars, int charCount, bool flush,
308                                              out int bytesUsed, out int charsUsed, out bool completed)
309         {
310             // Validate input parameters
311             if (chars == null || bytes == null)
312                 throw new ArgumentNullException(chars == null ? nameof(chars) : nameof(bytes),
313                     SR.ArgumentNull_Array);
314 
315             if (byteCount < 0 || charCount < 0)
316                 throw new ArgumentOutOfRangeException((byteCount < 0 ? nameof(byteCount) : nameof(charCount)),
317                     SR.ArgumentOutOfRange_NeedNonNegNum);
318 
319             // Get ready to do it
320             bytesUsed = byteCount;
321 
322             // Its easy to do if it won't overrun our buffer.
323             while (bytesUsed > 0)
324             {
325                 if (GetCharCount(bytes, bytesUsed, flush) <= charCount)
326                 {
327                     charsUsed = GetChars(bytes, bytesUsed, chars, charCount, flush);
328                     completed = (bytesUsed == byteCount &&
329                         (_fallbackBuffer == null || _fallbackBuffer.Remaining == 0));
330                     return;
331                 }
332 
333                 // Try again with 1/2 the count, won't flush then 'cause won't read it all
334                 flush = false;
335                 bytesUsed /= 2;
336             }
337 
338             // Oops, we didn't have anything, we'll have to throw an overflow
339             throw new ArgumentException(SR.Argument_ConversionOverflow);
340         }
341 
Convert(ReadOnlySpan<byte> bytes, Span<char> chars, bool flush, out int bytesUsed, out int charsUsed, out bool completed)342         public virtual unsafe void Convert(ReadOnlySpan<byte> bytes, Span<char> chars, bool flush, out int bytesUsed, out int charsUsed, out bool completed)
343         {
344             fixed (byte* bytesPtr = &MemoryMarshal.GetReference(bytes))
345             fixed (char* charsPtr = &MemoryMarshal.GetReference(chars))
346             {
347                 Convert(bytesPtr, bytes.Length, charsPtr, chars.Length, flush, out bytesUsed, out charsUsed, out completed);
348             }
349         }
350     }
351 }
352