1 //
2 // I18N.CJK.CP949
3 //
4 // Author:
5 //   Hye-Shik Chang (perky@FreeBSD.org)
6 //   Atsushi Enomoto  <atsushi@ximian.com>
7 //
8 
9 using System;
10 using System.Text;
11 using I18N.Common;
12 
13 namespace I18N.CJK
14 {
15     [Serializable]
16     internal class CP949 : KoreanEncoding
17     {
18         // Magic number used by Windows for the UHC code page.
19         private const int UHC_CODE_PAGE = 949;
20 
21         // Constructor.
CP949()22         public CP949 () : base (UHC_CODE_PAGE, true)
23         {
24         }
25 
26         // Get the mail body name for this encoding.
27         public override String BodyName
28         {
29             get { return "ks_c_5601-1987"; }
30         }
31 
32         // Get the human-readable name for this encoding.
33         public override String EncodingName
34         {
35             get { return "Korean (UHC)"; }
36         }
37 
38         // Get the mail agent header name for this encoding.
39         public override String HeaderName
40         {
41             get { return "ks_c_5601-1987"; }
42         }
43 
44         // Get the IANA-preferred Web name for this encoding.
45         public override String WebName
46         {
47             get { return "ks_c_5601-1987"; }
48         }
49 
50         /*
51         // Get the Windows code page represented by this object.
52         public override int WindowsCodePage
53         {
54             get { return UHC_PAGE; }
55         }
56         */
57     }
58 
59     [Serializable]
60     internal class CP51949 : KoreanEncoding
61     {
62         // Magic number used by Windows for the euc-kr code page.
63         private const int EUCKR_CODE_PAGE = 51949;
64 
65         // Constructor.
CP51949()66         public CP51949 () : base (EUCKR_CODE_PAGE, false)
67         {
68         }
69 
70         // Get the mail body name for this encoding.
71         public override String BodyName
72         {
73             get { return "euc-kr"; }
74         }
75 
76         // Get the human-readable name for this encoding.
77         public override String EncodingName
78         {
79             get { return "Korean (EUC)"; }
80         }
81 
82         // Get the mail agent header name for this encoding.
83         public override String HeaderName
84         {
85             get { return "euc-kr"; }
86         }
87 
88         // Get the IANA-preferred Web name for this encoding.
89         public override String WebName
90         {
91             get { return "euc-kr"; }
92         }
93 
94         /*
95         // Get the Windows code page represented by this object.
96         public override int WindowsCodePage
97         {
98             get { return UHC_PAGE; }
99         }
100         */
101 
102     }
103 
104     [Serializable]
105     internal class KoreanEncoding : DbcsEncoding
106     {
107         // Constructor.
KoreanEncoding(int codepage, bool useUHC)108         public KoreanEncoding (int codepage, bool useUHC)
109             : base (codepage, 949) {
110             this.useUHC = useUHC;
111         }
112 
GetConvert()113         internal override DbcsConvert GetConvert ()
114         {
115                 return DbcsConvert.KS;
116         }
117 
118         bool useUHC;
119 
120 #if !DISABLE_UNSAFE
121         // Get the bytes that result from encoding a character buffer.
GetByteCountImpl(char* chars, int count)122         public unsafe override int GetByteCountImpl (char* chars, int count)
123         {
124             int index = 0;
125             int length = 0;
126 			int end = count;
127             DbcsConvert convert = GetConvert ();
128 
129             // 00 00 - FF FF
130             for (int i = 0; i < end; i++, charCount--) {
131                 char c = chars[i];
132                 if (c <= 0x80 || c == 0xFF) { // ASCII
133                     length++;
134                     continue;
135                 }
136                 byte b1 = convert.u2n[((int)c) * 2];
137                 byte b2 = convert.u2n[((int)c) * 2 + 1];
138                 if (b1 == 0 && b2 == 0) {
139                     // FIXME: handle fallback for GetByteCountImpl().
140                     length++;
141                 }
142                 else
143                     length += 2;
144             }
145             return length;
146         }
147 
148         // Get the bytes that result from encoding a character buffer.
GetBytesImpl(char* chars, int charCount, byte* bytes, int byteCount)149         public unsafe override int GetBytesImpl (char* chars, int charCount,
150                          byte* bytes, int byteCount)
151         {
152             int charIndex = 0;
153             int byteIndex = 0;
154 			int end = charCount;
155             DbcsConvert convert = GetConvert ();
156             EncoderFallbackBuffer buffer = null;
157 
158             // 00 00 - FF FF
159             int origIndex = byteIndex;
160             for (int = charIndex; i < end; i++, charCount--) {
161                 char c = chars[i];
162                 if (c <= 0x80 || c == 0xFF) { // ASCII
163                     bytes[byteIndex++] = (byte)c;
164                     continue;
165                 }
166                 byte b1 = convert.u2n[((int)c) * 2];
167                 byte b2 = convert.u2n[((int)c) * 2 + 1];
168                 if (b1 == 0 && b2 == 0) {
169                     HandleFallback (ref buffer, chars, ref i, ref charCount,
170                         bytes, ref byteIndex, ref byteCount, null);
171                 } else {
172                     bytes[byteIndex++] = b1;
173                     bytes[byteIndex++] = b2;
174                 }
175             }
176             return byteIndex - origIndex;
177         }
178 #else
179 		// Get the bytes that result from encoding a character buffer.
GetByteCount(char[] chars, int index, int count)180 		public override int GetByteCount(char[] chars, int index, int count)
181 		{
182 			int length = 0;
183 			DbcsConvert convert = GetConvert();
184 
185 			// 00 00 - FF FF
186 			while (count-- > 0)
187 			{
188 				char c = chars[index++];
189 				if (c <= 0x80 || c == 0xFF)
190 				{ // ASCII
191 					length++;
192 					continue;
193 				}
194 				byte b1 = convert.u2n[((int)c) * 2];
195 				byte b2 = convert.u2n[((int)c) * 2 + 1];
196 				if (b1 == 0 && b2 == 0)
197 				{
198 					// FIXME: handle fallback for GetByteCountImpl().
199 					length++;
200 				}
201 				else
202 					length += 2;
203 			}
204 			return length;
205 		}
206 
207 		// Get the bytes that result from encoding a character buffer.
GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)208 		public override int GetBytes(char[] chars, int charIndex, int charCount, byte[] bytes, int byteIndex)
209 		{
210 			int byteCount = bytes.Length;
211 			int end = charIndex + charCount;
212 
213 			DbcsConvert convert = GetConvert();
214 			EncoderFallbackBuffer buffer = null;
215 
216 			// 00 00 - FF FF
217 			int origIndex = byteIndex;
218 			for (int i = charIndex; i < end; i++, charCount--)
219 			{
220 				char c = chars[i];
221 				if (c <= 0x80 || c == 0xFF)
222 				{ // ASCII
223 					bytes[byteIndex++] = (byte)c;
224 					continue;
225 				}
226 				byte b1 = convert.u2n[((int)c) * 2];
227 				byte b2 = convert.u2n[((int)c) * 2 + 1];
228 				if (b1 == 0 && b2 == 0)
229 				{
230 					HandleFallback (ref buffer, chars, ref i, ref charCount,
231 						bytes, ref byteIndex, ref byteCount, null);
232 				}
233 				else
234 				{
235 					bytes[byteIndex++] = b1;
236 					bytes[byteIndex++] = b2;
237 				}
238 			}
239 			return byteIndex - origIndex;
240 		}
241 #endif
242 		// Get the characters that result from decoding a byte buffer.
GetCharCount(byte[] bytes, int index, int count)243         public override int GetCharCount (byte[] bytes, int index, int count)
244         {
245             return GetDecoder ().GetCharCount (bytes, index, count);
246         }
247 
248         // Get the characters that result from decoding a byte buffer.
GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)249         public override int GetChars(byte[] bytes, int byteIndex, int byteCount,
250                          char[] chars, int charIndex)
251         {
252             return GetDecoder ().GetChars (bytes, byteIndex, byteCount, chars, charIndex);
253         }
254 
255         // Get a decoder that handles a rolling UHC state.
GetDecoder()256         public override Decoder GetDecoder()
257         {
258             return new KoreanDecoder (GetConvert (), useUHC);
259         }
260 
261         // Decoder that handles a rolling UHC state.
262         private sealed class KoreanDecoder : DbcsDecoder
263         {
264             // Constructor.
KoreanDecoder(DbcsConvert convert, bool useUHC)265             public KoreanDecoder (DbcsConvert convert, bool useUHC)
266                 : base(convert)
267             {
268                 this.useUHC = useUHC;
269             }
270             bool useUHC;
271             int last_byte_count, last_byte_conv;
272 
GetCharCount(byte[] bytes, int index, int count)273             public override int GetCharCount (byte[] bytes, int index, int count)
274             {
275                 return GetCharCount (bytes, index, count, false);
276             }
277 
278             public override
GetCharCount(byte [] bytes, int index, int count, bool refresh)279             int GetCharCount (byte [] bytes, int index, int count, bool refresh)
280             {
281                 CheckRange (bytes, index, count);
282 
283                 int lastByte = last_byte_count;
284                 last_byte_count = 0;
285                 int length = 0;
286                 while (count-- > 0) {
287                     int b = bytes[index++];
288                     if (lastByte == 0) {
289                         if (b <= 0x80 || b == 0xFF) { // ASCII
290                             length++;
291                             continue;
292                         } else {
293                             lastByte = b;
294                             continue;
295                         }
296                     }
297 
298                     char c1;
299                     if (useUHC && lastByte < 0xa1) { // UHC Level 1
300                         int ord = 8836 + (lastByte - 0x81) * 178;
301 
302                         if (b >= 0x41 && b <= 0x5A)
303                             ord += b - 0x41;
304                         else if (b >= 0x61 && b <= 0x7A)
305                             ord += b - 0x61 + 26;
306                         else if (b >= 0x81 && b <= 0xFE)
307                             ord += b - 0x81 + 52;
308                         else
309                             ord = -1;
310 
311                         if (ord >= 0 && ord * 2 <= convert.n2u.Length)
312                             c1 = (char)(convert.n2u[ord*2] +
313                                         convert.n2u[ord*2 + 1] * 256);
314                         else
315                             c1 = (char)0;
316                     } else if (useUHC && lastByte <= 0xC6 && b < 0xA1) { // UHC Level 2
317                         int ord = 14532 + (lastByte - 0xA1) * 84;
318 
319                         if (b >= 0x41 && b <= 0x5A)
320                             ord += b - 0x41;
321                         else if (b >= 0x61 && b <= 0x7A)
322                             ord += b - 0x61 + 26;
323                         else if (b >= 0x81 && b <= 0xA0)
324                             ord += b - 0x81 + 52;
325                         else
326                             ord = -1;
327 
328                         if (ord >= 0 && ord * 2 <= convert.n2u.Length)
329                             c1 = (char)(convert.n2u[ord*2] +
330                                         convert.n2u[ord*2 + 1] * 256);
331                         else
332                             c1 = (char)0;
333                     } else if (b >= 0xA1 && b <= 0xFE) { // KS X 1001
334                         int ord = ((lastByte - 0xA1) * 94 + b - 0xA1) * 2;
335 
336                         c1 = ord < 0 || ord >= convert.n2u.Length ?
337                             '\0' : (char)(convert.n2u[ord] +
338                                     convert.n2u[ord + 1] * 256);
339                     } else
340                         c1 = (char)0;
341 
342                     if (c1 == 0)
343                         // FIXME: fallback
344                         length++;
345                     else
346                         length++;
347                     lastByte = 0;
348                 }
349 
350                 if (lastByte != 0) {
351                     if (refresh) {
352                         // FIXME: fallback
353                         length++;
354                         last_byte_count = 0;
355                     }
356                     else
357                         last_byte_count = lastByte;
358                 }
359                 return length;
360             }
361 
GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex)362             public override int GetChars(byte[] bytes, int byteIndex,
363                                 int byteCount, char[] chars, int charIndex)
364             {
365                 return GetChars (bytes, byteIndex, byteCount, chars, charIndex, false);
366             }
367 
368             public override
GetChars(byte[] bytes, int byteIndex, int byteCount, char[] chars, int charIndex, bool refresh)369             int GetChars(byte[] bytes, int byteIndex,
370                                 int byteCount, char[] chars, int charIndex, bool refresh)
371             {
372                 CheckRange (bytes, byteIndex, byteCount, chars, charIndex);
373                 int origIndex = charIndex;
374                 int lastByte = last_byte_conv;
375                 last_byte_conv = 0;
376                 while (byteCount-- > 0) {
377                     int b = bytes[byteIndex++];
378                     if (lastByte == 0) {
379                         if (b <= 0x80 || b == 0xFF) { // ASCII
380                             chars[charIndex++] = (char)b;
381                             continue;
382                         } else {
383                             lastByte = b;
384                             continue;
385                         }
386                     }
387 
388                     char c1;
389                     if (useUHC && lastByte < 0xa1) { // UHC Level 1
390                         int ord = 8836 + (lastByte - 0x81) * 178;
391 
392                         if (b >= 0x41 && b <= 0x5A)
393                             ord += b - 0x41;
394                         else if (b >= 0x61 && b <= 0x7A)
395                             ord += b - 0x61 + 26;
396                         else if (b >= 0x81 && b <= 0xFE)
397                             ord += b - 0x81 + 52;
398                         else
399                             ord = -1;
400 
401                         if (ord >= 0 && ord * 2 <= convert.n2u.Length)
402                             c1 = (char)(convert.n2u[ord*2] +
403                                         convert.n2u[ord*2 + 1] * 256);
404                         else
405                             c1 = (char)0;
406                     } else if (useUHC && lastByte <= 0xC6 && b < 0xA1) { // UHC Level 2
407                         int ord = 14532 + (lastByte - 0xA1) * 84;
408 
409                         if (b >= 0x41 && b <= 0x5A)
410                             ord += b - 0x41;
411                         else if (b >= 0x61 && b <= 0x7A)
412                             ord += b - 0x61 + 26;
413                         else if (b >= 0x81 && b <= 0xA0)
414                             ord += b - 0x81 + 52;
415                         else
416                             ord = -1;
417 
418                         if (ord >= 0 && ord * 2 <= convert.n2u.Length)
419                             c1 = (char)(convert.n2u[ord*2] +
420                                         convert.n2u[ord*2 + 1] * 256);
421                         else
422                             c1 = (char)0;
423                     } else if (b >= 0xA1 && b <= 0xFE) { // KS X 1001
424                         int ord = ((lastByte - 0xA1) * 94 + b - 0xA1) * 2;
425 
426                         c1 = ord < 0 || ord >= convert.n2u.Length ?
427                             '\0' : (char)(convert.n2u[ord] +
428                                     convert.n2u[ord + 1] * 256);
429                     } else
430                         c1 = (char)0;
431 
432                     if (c1 == 0)
433                         chars[charIndex++] = '?';
434                     else
435                         chars[charIndex++] = c1;
436                     lastByte = 0;
437                 }
438 
439                 if (lastByte != 0) {
440                     if (refresh) {
441                         chars[charIndex++] = '?';
442                         last_byte_conv = 0;
443                     }
444                     else
445                         last_byte_conv = lastByte;
446                 }
447                 return charIndex - origIndex;
448             }
449         }
450     }
451 
452     [Serializable]
453     internal class ENCuhc : CP949
454     {
ENCuhc()455         public ENCuhc() {}
456     }
457 
458     [Serializable]
459     internal class ENCeuc_kr: CP51949
460     {
ENCeuc_kr()461         public ENCeuc_kr() {}
462     }
463 }
464 
465 // ex: ts=8 sts=4 et
466