1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4 
5 using System.Diagnostics;
6 using System.Threading;
7 
8 namespace System.Text
9 {
10     public abstract class EncoderFallback
11     {
12         private static EncoderFallback s_replacementFallback; // Default fallback, uses no best fit & "?"
13         private static EncoderFallback s_exceptionFallback;
14 
15         // Get each of our generic fallbacks.
16 
17         public static EncoderFallback ReplacementFallback
18         {
19             get
20             {
21                 if (s_replacementFallback == null)
22                     Interlocked.CompareExchange<EncoderFallback>(ref s_replacementFallback, new EncoderReplacementFallback(), null);
23 
24                 return s_replacementFallback;
25             }
26         }
27 
28 
29         public static EncoderFallback ExceptionFallback
30         {
31             get
32             {
33                 if (s_exceptionFallback == null)
34                     Interlocked.CompareExchange<EncoderFallback>(ref s_exceptionFallback, new EncoderExceptionFallback(), null);
35 
36                 return s_exceptionFallback;
37             }
38         }
39 
40         // Fallback
41         //
42         // Return the appropriate unicode string alternative to the character that need to fall back.
43         // Most implementations will be:
44         //      return new MyCustomEncoderFallbackBuffer(this);
45 
CreateFallbackBuffer()46         public abstract EncoderFallbackBuffer CreateFallbackBuffer();
47 
48         // Maximum number of characters that this instance of this fallback could return
49 
50         public abstract int MaxCharCount { get; }
51     }
52 
53 
54     public abstract class EncoderFallbackBuffer
55     {
56         // Most implementations will probably need an implementation-specific constructor
57 
58         // Public methods that cannot be overridden that let us do our fallback thing
59         // These wrap the internal methods so that we can check for people doing stuff that is incorrect
60 
Fallback(char charUnknown, int index)61         public abstract bool Fallback(char charUnknown, int index);
62 
Fallback(char charUnknownHigh, char charUnknownLow, int index)63         public abstract bool Fallback(char charUnknownHigh, char charUnknownLow, int index);
64 
65         // Get next character
66 
GetNextChar()67         public abstract char GetNextChar();
68 
69         // Back up a character
70 
MovePrevious()71         public abstract bool MovePrevious();
72 
73         // How many chars left in this fallback?
74 
75         public abstract int Remaining { get; }
76 
77         // Not sure if this should be public or not.
78         // Clear the buffer
79 
Reset()80         public virtual void Reset()
81         {
82             while (GetNextChar() != (char)0) ;
83         }
84 
85         // Internal items to help us figure out what we're doing as far as error messages, etc.
86         // These help us with our performance and messages internally
87         internal unsafe char* charStart;
88         internal unsafe char* charEnd;
89         internal EncoderNLS encoder;
90         internal bool setEncoder;
91         internal bool bUsedEncoder;
92         internal bool bFallingBack = false;
93         internal int iRecursionCount = 0;
94         private const int iMaxRecursion = 250;
95 
96         // Internal Reset
97         // For example, what if someone fails a conversion and wants to reset one of our fallback buffers?
InternalReset()98         internal unsafe void InternalReset()
99         {
100             charStart = null;
101             bFallingBack = false;
102             iRecursionCount = 0;
103             Reset();
104         }
105 
106         // Set the above values
107         // This can't be part of the constructor because EncoderFallbacks would have to know how to implement these.
InternalInitialize(char* charStart, char* charEnd, EncoderNLS encoder, bool setEncoder)108         internal unsafe void InternalInitialize(char* charStart, char* charEnd, EncoderNLS encoder, bool setEncoder)
109         {
110             this.charStart = charStart;
111             this.charEnd = charEnd;
112             this.encoder = encoder;
113             this.setEncoder = setEncoder;
114             this.bUsedEncoder = false;
115             this.bFallingBack = false;
116             this.iRecursionCount = 0;
117         }
118 
InternalGetNextChar()119         internal char InternalGetNextChar()
120         {
121             char ch = GetNextChar();
122             bFallingBack = (ch != 0);
123             if (ch == 0) iRecursionCount = 0;
124             return ch;
125         }
126 
127         // Fallback the current character using the remaining buffer and encoder if necessary
128         // This can only be called by our encodings (other have to use the public fallback methods), so
129         // we can use our EncoderNLS here too.
130         // setEncoder is true if we're calling from a GetBytes method, false if we're calling from a GetByteCount
131         //
132         // Note that this could also change the contents of this.encoder, which is the same
133         // object that the caller is using, so the caller could mess up the encoder for us
134         // if they aren't careful.
InternalFallback(char ch, ref char* chars)135         internal unsafe virtual bool InternalFallback(char ch, ref char* chars)
136         {
137             // Shouldn't have null charStart
138             Debug.Assert(charStart != null,
139                 "[EncoderFallback.InternalFallbackBuffer]Fallback buffer is not initialized");
140 
141             // Get our index, remember chars was preincremented to point at next char, so have to -1
142             int index = (int)(chars - charStart) - 1;
143 
144             // See if it was a high surrogate
145             if (Char.IsHighSurrogate(ch))
146             {
147                 // See if there's a low surrogate to go with it
148                 if (chars >= this.charEnd)
149                 {
150                     // Nothing left in input buffer
151                     // No input, return 0 if mustflush is false
152                     if (this.encoder != null && !this.encoder.MustFlush)
153                     {
154                         // Done, nothing to fallback
155                         if (this.setEncoder)
156                         {
157                             bUsedEncoder = true;
158                             this.encoder._charLeftOver = ch;
159                         }
160                         bFallingBack = false;
161                         return false;
162                     }
163                 }
164                 else
165                 {
166                     // Might have a low surrogate
167                     char cNext = *chars;
168                     if (Char.IsLowSurrogate(cNext))
169                     {
170                         // If already falling back then fail
171                         if (bFallingBack && iRecursionCount++ > iMaxRecursion)
172                             ThrowLastCharRecursive(Char.ConvertToUtf32(ch, cNext));
173 
174                         // Next is a surrogate, add it as surrogate pair, and increment chars
175                         chars++;
176                         bFallingBack = Fallback(ch, cNext, index);
177                         return bFallingBack;
178                     }
179                     // Next isn't a low surrogate, just fallback the high surrogate
180                 }
181             }
182 
183             // If already falling back then fail
184             if (bFallingBack && iRecursionCount++ > iMaxRecursion)
185                 ThrowLastCharRecursive((int)ch);
186 
187             // Fall back our char
188             bFallingBack = Fallback(ch, index);
189 
190             return bFallingBack;
191         }
192 
193         // private helper methods
ThrowLastCharRecursive(int charRecursive)194         internal void ThrowLastCharRecursive(int charRecursive)
195         {
196             // Throw it, using our complete character
197             throw new ArgumentException(
198                 SR.Format(SR.Argument_RecursiveFallback,
199                     charRecursive), "chars");
200         }
201     }
202 }
203