1 // 2 // I18N.CJK.CP936.cs 3 // 4 // Author: 5 // Atsushi Enomoto <atsushi@ximian.com> 6 // 7 // (new implementation based on CP950.) 8 // 9 10 using System; 11 using System.Text; 12 using I18N.Common; 13 14 namespace I18N.CJK 15 { 16 [Serializable] 17 internal class CP936 : DbcsEncoding 18 { 19 // Magic number used by Windows for the Gb2312 code page. 20 private const int GB2312_CODE_PAGE = 936; 21 22 // Constructor. CP936()23 public CP936() : base(GB2312_CODE_PAGE) { 24 } 25 GetConvert()26 internal override DbcsConvert GetConvert () 27 { 28 return DbcsConvert.Gb2312; 29 } 30 31 #if !DISABLE_UNSAFE 32 // Get the bytes that result from encoding a character buffer. GetByteCountImpl(char* chars, int count)33 public unsafe override int GetByteCountImpl (char* chars, int count) 34 { 35 return GetBytesImpl(chars, count, null, 0); 36 } 37 38 // Get the bytes that result from encoding a character buffer. GetBytesImpl(char* chars, int charCount, byte* bytes, int byteCount)39 public unsafe override int GetBytesImpl (char* chars, int charCount, byte* bytes, int byteCount) 40 { 41 DbcsConvert gb2312 = GetConvert (); 42 int charIndex = 0; 43 int byteIndex = 0; 44 int end = charCount; 45 EncoderFallbackBuffer buffer = null; 46 47 int origIndex = byteIndex; 48 for (int i = charIndex; i < end; i++, charCount--) { 49 char c = chars[i]; 50 if (c <= 0x80 || c == 0xFF) { // ASCII 51 int offset = byteIndex++; 52 if (bytes != null) bytes[offset] = (byte)c; 53 continue; 54 } 55 byte b1 = gb2312.u2n[((int)c) * 2 + 1]; 56 byte b2 = gb2312.u2n[((int)c) * 2]; 57 if (b1 == 0 && b2 == 0) { 58 HandleFallback (ref buffer, chars, 59 ref i, ref charCount, 60 bytes, ref byteIndex, ref byteCount, null); 61 } else { 62 if (bytes != null) 63 { 64 bytes[byteIndex++] = b1; 65 bytes[byteIndex++] = b2; 66 } 67 else 68 { 69 byteIndex += 2; 70 } 71 } 72 } 73 return byteIndex - origIndex; 74 } 75 #else GetBytesInternal(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)76 protected int GetBytesInternal(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) 77 { 78 int origIndex = byteIndex; 79 int end = charIndex + charCount; 80 int byteCount = bytes != null ? bytes.Length : 0; 81 82 DbcsConvert gb2312 = GetConvert(); 83 EncoderFallbackBuffer buffer = null; 84 for (int i = charIndex; i < end; i++, charCount--) 85 { 86 char c = chars[i]; 87 if (c <= 0x80 || c == 0xFF) 88 { // ASCII 89 int offset = byteIndex++; 90 if (bytes != null) bytes[offset] = (byte)c; 91 continue; 92 } 93 byte b1 = gb2312.u2n[((int)c) * 2 + 1]; 94 byte b2 = gb2312.u2n[((int)c) * 2]; 95 if (b1 == 0 && b2 == 0) 96 { 97 HandleFallback (ref buffer, chars, ref i, ref charCount, 98 bytes, ref byteIndex, ref byteCount, null); 99 } 100 else 101 { 102 if (bytes != null) 103 { 104 bytes[byteIndex++] = b1; 105 bytes[byteIndex++] = b2; 106 } 107 else 108 { 109 byteIndex += 2; 110 } 111 } 112 } 113 return byteIndex - origIndex; 114 } 115 116 // Get the bytes that result from encoding a character buffer. GetByteCount(char[] chars, int index, int count)117 public override int GetByteCount(char[] chars, int index, int count) 118 { 119 return GetBytes(chars, index, count, null, 0); 120 } 121 122 // Get the bytes that result from encoding a character buffer. GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)123 public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex) 124 { 125 return GetBytesInternal(chars, charIndex, charCount, bytes, byteIndex); 126 } 127 #endif 128 // Get the characters that result from decoding a byte buffer. GetCharCount(byte [] bytes, int index, int count)129 public override int GetCharCount (byte [] bytes, int index, int count) 130 { 131 return GetDecoder ().GetCharCount (bytes, index, count); 132 } 133 134 // Get the characters that result from decoding a byte buffer. GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)135 public override int GetChars(byte[] bytes, int byteIndex, int byteCount, 136 char[] chars, int charIndex) 137 { 138 return GetDecoder ().GetChars ( 139 bytes, byteIndex, byteCount, chars, charIndex); 140 } 141 142 // Get a decoder that handles a rolling Gb2312 state. GetDecoder()143 public override Decoder GetDecoder() 144 { 145 return new CP936Decoder(GetConvert ()); 146 } 147 148 // Get the mail body name for this encoding. 149 public override String BodyName 150 { 151 get { return("gb2312"); } 152 } 153 154 // Get the human-readable name for this encoding. 155 public override String EncodingName 156 { 157 get { return("Chinese Simplified (GB2312)"); } 158 } 159 160 // Get the mail agent header name for this encoding. 161 public override String HeaderName 162 { 163 get { return("gb2312"); } 164 } 165 166 // Determine if this encoding can be displayed in a Web browser. 167 public override bool IsBrowserDisplay 168 { 169 get { return(true); } 170 } 171 172 // Determine if this encoding can be saved from a Web browser. 173 public override bool IsBrowserSave 174 { 175 get { return(true); } 176 } 177 178 // Determine if this encoding can be displayed in a mail/news agent. 179 public override bool IsMailNewsDisplay 180 { 181 get { return(true); } 182 } 183 184 // Determine if this encoding can be saved from a mail/news agent. 185 public override bool IsMailNewsSave 186 { 187 get { return(true); } 188 } 189 190 // Get the IANA-preferred Web name for this encoding. 191 public override String WebName 192 { 193 get { return("gb2312"); } 194 } 195 } 196 197 // Decoder that handles a rolling Gb2312 state. 198 sealed class CP936Decoder : DbcsEncoding.DbcsDecoder 199 { 200 // Constructor. CP936Decoder(DbcsConvert convert)201 public CP936Decoder (DbcsConvert convert) 202 : base (convert) 203 { 204 } 205 206 int last_byte_count, last_byte_bytes; 207 208 // Get the characters that result from decoding a byte buffer. GetCharCount(byte [] bytes, int index, int count)209 public override int GetCharCount (byte [] bytes, int index, int count) 210 { 211 return GetCharCount (bytes, index, count, false); 212 } 213 214 public override GetCharCount(byte [] bytes, int index, int count, bool refresh)215 int GetCharCount (byte [] bytes, int index, int count, bool refresh) 216 { 217 CheckRange (bytes, index, count); 218 219 int lastByte = last_byte_count; 220 last_byte_count = 0; 221 int length = 0; 222 while (count-- > 0) { 223 int b = bytes [index++]; 224 if (lastByte == 0) { 225 if (b <= 0x80 || b == 0xFF) { // ASCII 226 length++; 227 continue; 228 } else { 229 lastByte = b; 230 continue; 231 } 232 } 233 length++; 234 lastByte = 0; 235 } 236 237 if (lastByte != 0) { 238 if (refresh) { 239 length++; 240 last_byte_count = 0; 241 } 242 else 243 last_byte_count = lastByte; 244 } 245 246 return length; 247 } 248 GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)249 public override int GetChars (byte[] bytes, int byteIndex, int byteCount, 250 char[] chars, int charIndex) 251 { 252 return GetChars (bytes, byteIndex, byteCount, chars, charIndex, false); 253 } 254 255 public override GetChars(byte [] bytes, int byteIndex, int byteCount, char [] chars, int charIndex, bool refresh)256 int GetChars (byte [] bytes, int byteIndex, int byteCount, 257 char [] chars, int charIndex, bool refresh) 258 { 259 CheckRange (bytes, byteIndex, byteCount, chars, charIndex); 260 261 int origIndex = charIndex; 262 int lastByte = last_byte_bytes; 263 last_byte_bytes = 0; 264 while (byteCount-- > 0) { 265 int b = bytes[byteIndex++]; 266 if (lastByte == 0) { 267 if (b <= 0x80 || b == 0xFF) { // ASCII 268 chars[charIndex++] = (char)b; 269 continue; 270 } else if (b < 0x81 || b >= 0xFF) { 271 continue; 272 } else { 273 lastByte = b; 274 continue; 275 } 276 } 277 int ord = ((lastByte - 0x81) * 191 + b - 0x40) * 2; 278 char c1 = ord < 0 || ord >= convert.n2u.Length ? 279 '\0' : (char) (convert.n2u[ord] + convert.n2u[ord + 1] * 256); 280 if (c1 == 0) 281 chars[charIndex++] = '?'; 282 else 283 chars[charIndex++] = c1; 284 lastByte = 0; 285 } 286 287 if (lastByte != 0) { 288 if (refresh) { 289 // FIXME: handle fallback 290 chars [charIndex++] = '?'; 291 last_byte_bytes = 0; 292 } 293 else 294 last_byte_bytes = lastByte; 295 } 296 297 return charIndex - origIndex; 298 } 299 } 300 301 [Serializable] 302 internal class ENCgb2312 : CP936 303 { ENCgb2312()304 public ENCgb2312(): base () {} 305 } 306 } 307