1 // Licensed to the .NET Foundation under one or more agreements. 2 // The .NET Foundation licenses this file to you under the MIT license. 3 // See the LICENSE file in the project root for more information. 4 5 using System.Diagnostics; 6 using System.Threading; 7 8 namespace System.Text 9 { 10 public abstract class EncoderFallback 11 { 12 private static EncoderFallback s_replacementFallback; // Default fallback, uses no best fit & "?" 13 private static EncoderFallback s_exceptionFallback; 14 15 // Get each of our generic fallbacks. 16 17 public static EncoderFallback ReplacementFallback 18 { 19 get 20 { 21 if (s_replacementFallback == null) 22 Interlocked.CompareExchange<EncoderFallback>(ref s_replacementFallback, new EncoderReplacementFallback(), null); 23 24 return s_replacementFallback; 25 } 26 } 27 28 29 public static EncoderFallback ExceptionFallback 30 { 31 get 32 { 33 if (s_exceptionFallback == null) 34 Interlocked.CompareExchange<EncoderFallback>(ref s_exceptionFallback, new EncoderExceptionFallback(), null); 35 36 return s_exceptionFallback; 37 } 38 } 39 40 // Fallback 41 // 42 // Return the appropriate unicode string alternative to the character that need to fall back. 43 // Most implementations will be: 44 // return new MyCustomEncoderFallbackBuffer(this); 45 CreateFallbackBuffer()46 public abstract EncoderFallbackBuffer CreateFallbackBuffer(); 47 48 // Maximum number of characters that this instance of this fallback could return 49 50 public abstract int MaxCharCount { get; } 51 } 52 53 54 public abstract class EncoderFallbackBuffer 55 { 56 // Most implementations will probably need an implementation-specific constructor 57 58 // Public methods that cannot be overridden that let us do our fallback thing 59 // These wrap the internal methods so that we can check for people doing stuff that is incorrect 60 Fallback(char charUnknown, int index)61 public abstract bool Fallback(char charUnknown, int index); 62 Fallback(char charUnknownHigh, char charUnknownLow, int index)63 public abstract bool Fallback(char charUnknownHigh, char charUnknownLow, int index); 64 65 // Get next character 66 GetNextChar()67 public abstract char GetNextChar(); 68 69 // Back up a character 70 MovePrevious()71 public abstract bool MovePrevious(); 72 73 // How many chars left in this fallback? 74 75 public abstract int Remaining { get; } 76 77 // Not sure if this should be public or not. 78 // Clear the buffer 79 Reset()80 public virtual void Reset() 81 { 82 while (GetNextChar() != (char)0) ; 83 } 84 85 // Internal items to help us figure out what we're doing as far as error messages, etc. 86 // These help us with our performance and messages internally 87 internal unsafe char* charStart; 88 internal unsafe char* charEnd; 89 internal EncoderNLS encoder; 90 internal bool setEncoder; 91 internal bool bUsedEncoder; 92 internal bool bFallingBack = false; 93 internal int iRecursionCount = 0; 94 private const int iMaxRecursion = 250; 95 96 // Internal Reset 97 // For example, what if someone fails a conversion and wants to reset one of our fallback buffers? InternalReset()98 internal unsafe void InternalReset() 99 { 100 charStart = null; 101 bFallingBack = false; 102 iRecursionCount = 0; 103 Reset(); 104 } 105 106 // Set the above values 107 // This can't be part of the constructor because EncoderFallbacks would have to know how to implement these. InternalInitialize(char* charStart, char* charEnd, EncoderNLS encoder, bool setEncoder)108 internal unsafe void InternalInitialize(char* charStart, char* charEnd, EncoderNLS encoder, bool setEncoder) 109 { 110 this.charStart = charStart; 111 this.charEnd = charEnd; 112 this.encoder = encoder; 113 this.setEncoder = setEncoder; 114 this.bUsedEncoder = false; 115 this.bFallingBack = false; 116 this.iRecursionCount = 0; 117 } 118 InternalGetNextChar()119 internal char InternalGetNextChar() 120 { 121 char ch = GetNextChar(); 122 bFallingBack = (ch != 0); 123 if (ch == 0) iRecursionCount = 0; 124 return ch; 125 } 126 127 // Fallback the current character using the remaining buffer and encoder if necessary 128 // This can only be called by our encodings (other have to use the public fallback methods), so 129 // we can use our EncoderNLS here too. 130 // setEncoder is true if we're calling from a GetBytes method, false if we're calling from a GetByteCount 131 // 132 // Note that this could also change the contents of this.encoder, which is the same 133 // object that the caller is using, so the caller could mess up the encoder for us 134 // if they aren't careful. InternalFallback(char ch, ref char* chars)135 internal unsafe virtual bool InternalFallback(char ch, ref char* chars) 136 { 137 // Shouldn't have null charStart 138 Debug.Assert(charStart != null, 139 "[EncoderFallback.InternalFallbackBuffer]Fallback buffer is not initialized"); 140 141 // Get our index, remember chars was preincremented to point at next char, so have to -1 142 int index = (int)(chars - charStart) - 1; 143 144 // See if it was a high surrogate 145 if (Char.IsHighSurrogate(ch)) 146 { 147 // See if there's a low surrogate to go with it 148 if (chars >= this.charEnd) 149 { 150 // Nothing left in input buffer 151 // No input, return 0 if mustflush is false 152 if (this.encoder != null && !this.encoder.MustFlush) 153 { 154 // Done, nothing to fallback 155 if (this.setEncoder) 156 { 157 bUsedEncoder = true; 158 this.encoder._charLeftOver = ch; 159 } 160 bFallingBack = false; 161 return false; 162 } 163 } 164 else 165 { 166 // Might have a low surrogate 167 char cNext = *chars; 168 if (Char.IsLowSurrogate(cNext)) 169 { 170 // If already falling back then fail 171 if (bFallingBack && iRecursionCount++ > iMaxRecursion) 172 ThrowLastCharRecursive(Char.ConvertToUtf32(ch, cNext)); 173 174 // Next is a surrogate, add it as surrogate pair, and increment chars 175 chars++; 176 bFallingBack = Fallback(ch, cNext, index); 177 return bFallingBack; 178 } 179 // Next isn't a low surrogate, just fallback the high surrogate 180 } 181 } 182 183 // If already falling back then fail 184 if (bFallingBack && iRecursionCount++ > iMaxRecursion) 185 ThrowLastCharRecursive((int)ch); 186 187 // Fall back our char 188 bFallingBack = Fallback(ch, index); 189 190 return bFallingBack; 191 } 192 193 // private helper methods ThrowLastCharRecursive(int charRecursive)194 internal void ThrowLastCharRecursive(int charRecursive) 195 { 196 // Throw it, using our complete character 197 throw new ArgumentException( 198 SR.Format(SR.Argument_RecursiveFallback, 199 charRecursive), "chars"); 200 } 201 } 202 } 203