1 //
2 // I18N.CJK.CP936.cs
3 //
4 // Author:
5 //	Atsushi Enomoto <atsushi@ximian.com>
6 //
7 // (new implementation based on CP950.)
8 //
9 
10 using System;
11 using System.Text;
12 using I18N.Common;
13 
14 namespace I18N.CJK
15 {
16 	[Serializable]
17 	internal class CP936 : DbcsEncoding
18 	{
19 		// Magic number used by Windows for the Gb2312 code page.
20 		private const int GB2312_CODE_PAGE = 936;
21 
22 		// Constructor.
CP936()23 		public CP936() : base(GB2312_CODE_PAGE) {
24 		}
25 
GetConvert()26 		internal override DbcsConvert GetConvert ()
27 		{
28 			return DbcsConvert.Gb2312;
29 		}
30 
31 #if !DISABLE_UNSAFE
32 		// Get the bytes that result from encoding a character buffer.
GetByteCountImpl(char* chars, int count)33 		public unsafe override int GetByteCountImpl (char* chars, int count)
34 		{
35 			return GetBytesImpl(chars, count, null, 0);
36 		}
37 
38 		// Get the bytes that result from encoding a character buffer.
GetBytesImpl(char* chars, int charCount, byte* bytes, int byteCount)39 		public unsafe override int GetBytesImpl (char* chars, int charCount, byte* bytes, int byteCount)
40 		{
41 			DbcsConvert gb2312 = GetConvert ();
42 			int charIndex = 0;
43 			int byteIndex = 0;
44 			int end = charCount;
45 			EncoderFallbackBuffer buffer = null;
46 
47 			int origIndex = byteIndex;
48 			for (int i = charIndex; i < end; i++, charCount--) {
49 				char c = chars[i];
50 				if (c <= 0x80 || c == 0xFF) { // ASCII
51 					int offset = byteIndex++;
52 					if (bytes != null) bytes[offset] = (byte)c;
53 					continue;
54 				}
55 				byte b1 = gb2312.u2n[((int)c) * 2 + 1];
56 				byte b2 = gb2312.u2n[((int)c) * 2];
57 				if (b1 == 0 && b2 == 0) {
58 					HandleFallback (ref buffer, chars,
59 						ref i, ref charCount,
60 						bytes, ref byteIndex, ref byteCount, null);
61 				} else {
62 					if (bytes != null)
63 					{
64 						bytes[byteIndex++] = b1;
65 						bytes[byteIndex++] = b2;
66 					}
67 					else
68 					{
69 						byteIndex += 2;
70 					}
71 				}
72 			}
73 			return byteIndex - origIndex;
74 		}
75 #else
GetBytesInternal(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)76 		protected int GetBytesInternal(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
77 		{
78 			int origIndex = byteIndex;
79 			int end = charIndex + charCount;
80 			int byteCount = bytes != null ? bytes.Length : 0;
81 
82 			DbcsConvert gb2312 = GetConvert();
83 			EncoderFallbackBuffer buffer = null;
84 			for (int i = charIndex; i < end; i++, charCount--)
85 			{
86 				char c = chars[i];
87 				if (c <= 0x80 || c == 0xFF)
88 				{ // ASCII
89 					int offset = byteIndex++;
90 					if (bytes != null) bytes[offset] = (byte)c;
91 					continue;
92 				}
93 				byte b1 = gb2312.u2n[((int)c) * 2 + 1];
94 				byte b2 = gb2312.u2n[((int)c) * 2];
95 				if (b1 == 0 && b2 == 0)
96 				{
97 					HandleFallback (ref buffer, chars, ref i, ref charCount,
98 						bytes, ref byteIndex, ref byteCount, null);
99 				}
100 				else
101 				{
102 					if (bytes != null)
103 					{
104 						bytes[byteIndex++] = b1;
105 						bytes[byteIndex++] = b2;
106 					}
107 					else
108 					{
109 						byteIndex += 2;
110 					}
111 				}
112 			}
113 			return byteIndex - origIndex;
114 		}
115 
116 		// Get the bytes that result from encoding a character buffer.
GetByteCount(char[] chars, int index, int count)117 		public override int GetByteCount(char[] chars, int index, int count)
118 		{
119 			return GetBytes(chars, index, count, null, 0);
120 		}
121 
122 		// Get the bytes that result from encoding a character buffer.
GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)123 		public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
124 		{
125 			return GetBytesInternal(chars, charIndex, charCount, bytes, byteIndex);
126 		}
127 #endif
128 		// Get the characters that result from decoding a byte buffer.
GetCharCount(byte [] bytes, int index, int count)129 		public override int GetCharCount (byte [] bytes, int index, int count)
130 		{
131 			return GetDecoder ().GetCharCount (bytes, index, count);
132 		}
133 
134 		// Get the characters that result from decoding a byte buffer.
GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)135 		public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
136 					     char[] chars, int charIndex)
137 		{
138 			return GetDecoder ().GetChars (
139 				bytes, byteIndex, byteCount, chars, charIndex);
140 		}
141 
142 		// Get a decoder that handles a rolling Gb2312 state.
GetDecoder()143 		public override Decoder GetDecoder()
144 		{
145 			return new CP936Decoder(GetConvert ());
146 		}
147 
148 		// Get the mail body name for this encoding.
149 		public override String BodyName
150 		{
151 			get { return("gb2312"); }
152 		}
153 
154 		// Get the human-readable name for this encoding.
155 		public override String EncodingName
156 		{
157 			get { return("Chinese Simplified (GB2312)"); }
158 		}
159 
160 		// Get the mail agent header name for this encoding.
161 		public override String HeaderName
162 		{
163 			get { return("gb2312"); }
164 		}
165 
166 		// Determine if this encoding can be displayed in a Web browser.
167 		public override bool IsBrowserDisplay
168 		{
169 			get { return(true); }
170 		}
171 
172 		// Determine if this encoding can be saved from a Web browser.
173 		public override bool IsBrowserSave
174 		{
175 			get { return(true); }
176 		}
177 
178 		// Determine if this encoding can be displayed in a mail/news agent.
179 		public override bool IsMailNewsDisplay
180 		{
181 			get { return(true); }
182 		}
183 
184 		// Determine if this encoding can be saved from a mail/news agent.
185 		public override bool IsMailNewsSave
186 		{
187 			get { return(true); }
188 		}
189 
190 		// Get the IANA-preferred Web name for this encoding.
191 		public override String WebName
192 		{
193 			get { return("gb2312"); }
194 		}
195 	}
196 
197 	// Decoder that handles a rolling Gb2312 state.
198 	sealed class CP936Decoder : DbcsEncoding.DbcsDecoder
199 	{
200 		// Constructor.
CP936Decoder(DbcsConvert convert)201 		public CP936Decoder (DbcsConvert convert)
202 			: base (convert)
203 		{
204 		}
205 
206 		int last_byte_count, last_byte_bytes;
207 
208 		// Get the characters that result from decoding a byte buffer.
GetCharCount(byte [] bytes, int index, int count)209 		public override int GetCharCount (byte [] bytes, int index, int count)
210 		{
211 			return GetCharCount (bytes, index, count, false);
212 		}
213 
214 		public override
GetCharCount(byte [] bytes, int index, int count, bool refresh)215 		int GetCharCount (byte [] bytes, int index, int count, bool refresh)
216 		{
217 			CheckRange (bytes, index, count);
218 
219 			int lastByte = last_byte_count;
220 			last_byte_count = 0;
221 			int length = 0;
222 			while (count-- > 0) {
223 				int b = bytes [index++];
224 				if (lastByte == 0) {
225 					if (b <= 0x80 || b == 0xFF) { // ASCII
226 						length++;
227 						continue;
228 					} else {
229 						lastByte = b;
230 						continue;
231 					}
232 				}
233 				length++;
234 				lastByte = 0;
235 			}
236 
237 			if (lastByte != 0) {
238 				if (refresh) {
239 					length++;
240 					last_byte_count = 0;
241 				}
242 				else
243 					last_byte_count = lastByte;
244 			}
245 
246 			return length;
247 		}
248 
GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)249 		public override int GetChars (byte[] bytes, int byteIndex, int byteCount,
250 					     char[] chars, int charIndex)
251 		{
252 			return GetChars (bytes, byteIndex, byteCount, chars, charIndex, false);
253 		}
254 
255 		public override
GetChars(byte [] bytes, int byteIndex, int byteCount, char [] chars, int charIndex, bool refresh)256 		int GetChars (byte [] bytes, int byteIndex, int byteCount,
257 			      char [] chars, int charIndex, bool refresh)
258 		{
259 			CheckRange (bytes, byteIndex, byteCount, chars, charIndex);
260 
261 			int origIndex = charIndex;
262 			int lastByte = last_byte_bytes;
263 			last_byte_bytes = 0;
264 			while (byteCount-- > 0) {
265 				int b = bytes[byteIndex++];
266 				if (lastByte == 0) {
267 					if (b <= 0x80 || b == 0xFF) { // ASCII
268 						chars[charIndex++] = (char)b;
269 						continue;
270 					} else if (b < 0x81 || b >= 0xFF) {
271 						continue;
272 					} else {
273 						lastByte = b;
274 						continue;
275 					}
276 				}
277 				int ord = ((lastByte - 0x81) * 191 + b - 0x40) * 2;
278 				char c1 = ord < 0 || ord >= convert.n2u.Length ?
279 					'\0' : (char) (convert.n2u[ord] + convert.n2u[ord + 1] * 256);
280 				if (c1 == 0)
281 					chars[charIndex++] = '?';
282 				else
283 					chars[charIndex++] = c1;
284 				lastByte = 0;
285 			}
286 
287 			if (lastByte != 0) {
288 				if (refresh) {
289 					// FIXME: handle fallback
290 					chars [charIndex++] = '?';
291 					last_byte_bytes = 0;
292 				}
293 				else
294 					last_byte_bytes = lastByte;
295 			}
296 
297 			return charIndex - origIndex;
298 		}
299 	}
300 
301 	[Serializable]
302 	internal class ENCgb2312 : CP936
303 	{
ENCgb2312()304 		public ENCgb2312(): base () {}
305 	}
306 }
307