1 // ==++==
2 //
3 //   Copyright (c) Microsoft Corporation.  All rights reserved.
4 //
5 // ==--==
6 
7 //  ISO2022Encoding.cs
8 //
9 //  Ported to managed code from c_is2022.c and related iso 2022 dll files from mlang
10 //
11 //  Abstract:
12 //
13 //      Managed implimentation of ISO 2022 code pages, ported from the implimentation in c_is2022.dll
14 //      This code should be kept in sync with the other implimentations
15 //      This encoding wraps the basic encodings in code that adds the shift in/out wrapper methods
16 //
17 //  Notes:
18 //
19 // IsAlwaysNormalized ???
20 // Regarding Normalization for ISO-2022-JP (50220, 50221, 50222), its the same rules as EUCJP
21 //  Forms KC & KD are precluded because of things like halfwidth Katakana that has compatibility mappings
22 //  Form D is precluded because of 0x00a8, which changes to space + dierises.
23 //
24 // Note: I think that IsAlwaysNormalized should probably return true for form C for Japanese 20932 based CPs.
25 //
26 // For ISO-2022-KR
27 //  Never normalized, C & D (& therefore KC & KD) are precluded because of Hangul syllables and combined characters.
28 //
29 // IsAlwaysNormalized ???
30 // Regarding Normalization for ISO-2022-CN (50227, 50229) & HZ-GB2312 (52936) I think is similar to the Japanese case.
31 //  Forms KC & KD are precluded because of things like halfwidth Katakana that has compatibility mappings
32 //  Form D is precluded because of 0x00a8, which changes to space + dierises.
33 //
34 // Note: I think that IsAlwaysNormalized should probably return true for form C for Chinese 20936 based CPs.
35 //
36 #if FEATURE_CODEPAGES_FILE // requires BaseCodePageEncooding
37 namespace System.Text
38 {
39     using System.Globalization;
40     using System.Diagnostics.Contracts;
41     using System.Text;
42     using System.Runtime.InteropServices;
43     using System;
44     using System.Security;
45     using System.Runtime.CompilerServices;
46     using System.Runtime.Serialization;
47 
48 
49     /*=================================ISO2022Encoding============================
50     **
51     ** This is used to support ISO 2022 encodings that use shift/escape sequences.
52     **
53     ==============================================================================*/
54 
55     [Serializable]
56     internal class ISO2022Encoding : DBCSCodePageEncoding
57     {
58         const byte SHIFT_OUT            = (byte)0x0E;
59         const byte SHIFT_IN             = (byte)0x0F;
60         const byte ESCAPE               = 0x1B;
61         const byte LEADBYTE_HALFWIDTH   = 0x10;
62 
63         // We have to load the 936 code page tables, so impersonate 936 as our base
64         // This pretends to be other code pages as far as memory sections are concerned.
65         [System.Security.SecurityCritical]  // auto-generated
ISO2022Encoding(int codePage)66         internal ISO2022Encoding(int codePage) : base(codePage, tableBaseCodePages[codePage % 10])
67         {
68             this.m_bUseMlangTypeForSerialization = true;
69         }
70 
71         // Constructor called by serialization.
72         // Note:  We use the base GetObjectData however
73         [System.Security.SecurityCritical]  // auto-generated
ISO2022Encoding(SerializationInfo info, StreamingContext context)74         internal ISO2022Encoding(SerializationInfo info, StreamingContext context) : base(info, context)
75         {
76             // Actually this can't ever get called, CodePageEncoding is our proxy
77             Contract.Assert(false, "Didn't expect to make it to DBCSCodePageEncoding serialization constructor");
78             throw new ArgumentException(Environment.GetResourceString("Arg_ExecutionEngineException"));
79         }
80 
81         static int[] tableBaseCodePages =
82         {
83             932,    // 50220  ISO-2022-JP, No halfwidth Katakana, convert to full width
84             932,    // 50221  ISO-2022-JP, Use escape sequence for half width Katakana
85             932,    // 50222  ISO-2022-JP, Use shift-in/shift-out for half width Katakana
86             0,
87             0,
88             949,    // 50225  ISO-2022-KR, Korean
89             936,    // 52936  HZ-GB2312, 936 might be better source
90             0, //20936,    // 50227  ISO-2022-CN, Note: This is just the same as CP 936 in Everett.
91             0,
92             // 50229 is currently unsupported, CP 20000 is currently not built in .nlp file
93             0, //20000,    // 50229  ISO-2022-CN, ModeCNS11643_1
94             0, //20000,    // 50229  ISO-2022-CN, ModeCNS11643_2
95             0         //                     ModeASCII
96         };
97 
98         internal enum ISO2022Modes
99         {
100             ModeHalfwidthKatakana   = 0,
101             ModeJIS0208             = 1,
102             ModeKR                  = 5,
103             ModeHZ                  = 6,
104             ModeGB2312              = 7,
105             ModeCNS11643_1          = 9,
106             ModeCNS11643_2          = 10,
107             ModeASCII               = 11,
108 
109             ModeIncompleteEscape    = -1,
110             ModeInvalidEscape       = -2,
111             ModeNOOP                = -3
112         }
113 
114         [System.Security.SecurityCritical]  // auto-generated
GetMemorySectionName()115         protected unsafe override String GetMemorySectionName()
116         {
117             int iUseCodePage = this.bFlagDataTable ? dataTableCodePage : CodePage;
118 
119             String strFormat;
120 
121             switch (this.CodePage)
122             {
123                 case 50220:
124                 case 50221:
125                 case 50222:
126                     strFormat = "CodePage_{0}_{1}_{2}_{3}_{4}_ISO2022JP";
127                     break;
128                 case 50225:
129                     strFormat = "CodePage_{0}_{1}_{2}_{3}_{4}_ISO2022KR";
130                     break;
131                 case 52936:
132                     strFormat = "CodePage_{0}_{1}_{2}_{3}_{4}_HZ";
133                     break;
134                 default:
135                     Contract.Assert(false, "[ISO2022Encoding.GetMemorySectionName] Don't expect to get here for code page " + this.CodePage);
136                     strFormat = "CodePage_{0}_{1}_{2}_{3}_{4}";
137                     break;
138             }
139 
140             String strName = String.Format(CultureInfo.InvariantCulture, strFormat,
141                 iUseCodePage, this.pCodePage->VersionMajor, this.pCodePage->VersionMinor,
142                 this.pCodePage->VersionRevision, this.pCodePage->VersionBuild);
143 
144             return strName;
145         }
146 
147         // Clean up characters for ISO2022 code pages, etc.
148         // ISO2022 (50220, 50221, 50222)
149         // GB-HZ (52936)
CleanUpBytes(ref int bytes)150         protected override bool CleanUpBytes(ref int bytes)
151         {
152             switch (this.CodePage)
153             {
154                 // 932 based code pages
155                 case 50220:
156                 case 50221:
157                 case 50222:
158                 {
159                     if (bytes >= 0x100)
160                     {
161                         // map extended char (0xfa40-0xfc4b) to a special range
162                         // (ported from mlang)
163                         if (bytes >= 0xfa40 && bytes <= 0xfc4b)
164                         {
165                             if ( bytes >= 0xfa40 && bytes <= 0xfa5b )
166                             {
167                                 if ( bytes <= 0xfa49 )
168                                     bytes = bytes - 0x0b51 ;
169                                 else if ( bytes >= 0xfa4a && bytes <= 0xfa53 )
170                                     bytes = bytes - 0x072f6 ;
171                                 else if ( bytes >= 0xfa54 && bytes <= 0xfa57 )
172                                     bytes = bytes - 0x0b5b ;
173                                 else if ( bytes == 0xfa58 )
174                                     bytes = 0x878a ;
175                                 else if ( bytes == 0xfa59 )
176                                     bytes = 0x8782 ;
177                                 else if ( bytes == 0xfa5a )
178                                     bytes = 0x8784 ;
179                                 else if ( bytes == 0xfa5b )
180                                     bytes = 0x879a ;
181                             }
182                             else if ( bytes >= 0xfa5c && bytes <= 0xfc4b )
183                             {
184                                 byte tc = unchecked((byte)bytes);
185                                 if ( tc < 0x5c )
186                                     bytes = bytes - 0x0d5f;
187                                 else if ( tc >= 0x80 && tc <= 0x9B )
188                                     bytes = bytes - 0x0d1d;
189                                 else
190                                     bytes = bytes - 0x0d1c;
191                             }
192                         }
193 
194                         // Convert 932 code page to 20932 like code page range
195                         // (also ported from mlang)
196                         byte bLead = unchecked((byte)(bytes >> 8));
197                         byte bTrail = unchecked((byte)bytes);
198 
199                         bLead -= ((bLead > (byte)0x9f) ? (byte)0xb1 : (byte)0x71);
200                         bLead = (byte)((bLead << 1) + 1);
201                         if (bTrail > (byte)0x9e)
202                         {
203                             bTrail -= (byte)0x7e;
204                             bLead++;
205                         }
206                         else
207                         {
208                             if (bTrail > (byte)0x7e)
209                                 bTrail--;
210                             bTrail -= (byte)0x1f;
211                         }
212 
213                         bytes = ((int)bLead) << 8 | (int)bTrail;
214 
215                         // Don't step out of our allocated lead byte area.
216                         // All DBCS lead and trail bytes should be >= 0x21 and <= 0x7e
217                         // This is commented out because Everett/Mlang had illegal PUA
218                         // mappings to ISO2022 code pages that we're maintaining.
219 //                        if ((bytes & 0xFF00) < 0x2100 || (bytes & 0xFF00) > 0x7e00 ||
220   //                          (bytes & 0xFF) < 0x21 || (bytes & 0xFF) > 0x7e)
221     //                        return false;
222                     }
223                     else
224                     {
225                         // Adjust 1/2 Katakana
226                         if (bytes >= 0xa1 && bytes <= 0xdf)
227                             bytes += (LEADBYTE_HALFWIDTH << 8) - 0x80;
228 
229                         // 0x81-0x9f and 0xe0-0xfc CP 932
230                         // 0x8e and 0xa1-0xfe      CP 20932 (we don't use 8e though)
231                         // b0-df is 1/2 Katakana
232                         if (bytes >= 0x81 &&
233                             (bytes <= 0x9f ||
234                              (bytes >= 0xe0 && bytes <= 0xfc)))
235                         {
236                             // Don't do lead bytes, we use escape sequences instead.
237                             return false;
238                         }
239                     }
240                     break;
241                 }
242                 case 50225:
243                 {
244                     // For 50225 since we don't rely on lead byte marks, return false and don't add them,
245                     // esp. since we're only a 7 bit code page.
246                     if (bytes >= 0x80 && bytes <= 0xff)
247                         return false;
248 
249                     // Ignore characters out of range (a1-7f)
250                     if (bytes >= 0x100 &&
251                         ((bytes & 0xff) < 0xa1 || (bytes & 0xff) == 0xff ||
252                          (bytes & 0xff00) < 0xa100 || (bytes & 0xff00) == 0xff00))
253                          return false;
254 
255                     // May as well get them into our 7 bit range
256                     bytes &= 0x7f7f;
257 
258                     break;
259                 }
260                 case 52936:
261                 {
262                     // Since we don't rely on lead byte marks for 52936, get rid of them so we
263                     // don't end up with extra wierd fffe mappings.
264                     if (bytes >= 0x81 && bytes <= 0xfe)
265                         return false;
266 
267                     break;
268                 }
269             }
270 
271             return true;
272         }
273 
274         // GetByteCount
275         [System.Security.SecurityCritical]  // auto-generated
GetByteCount(char* chars, int count, EncoderNLS baseEncoder)276         internal override unsafe int GetByteCount(char* chars, int count, EncoderNLS baseEncoder)
277         {
278             // Just need to ASSERT, this is called by something else internal that checked parameters already
279             Contract.Assert(count >= 0, "[ISO2022Encoding.GetByteCount]count is negative");
280             Contract.Assert(chars != null, "[ISO2022Encoding.GetByteCount]chars is null");
281 
282             // Just call GetBytes with null byte* to get count
283             return GetBytes(chars, count, null, 0, baseEncoder);
284         }
285 
286         [System.Security.SecurityCritical]  // auto-generated
GetBytes(char* chars, int charCount, byte* bytes, int byteCount, EncoderNLS baseEncoder)287         internal override unsafe int GetBytes(char* chars, int charCount,
288                                                 byte* bytes, int byteCount, EncoderNLS baseEncoder)
289         {
290             // Just need to ASSERT, this is called by something else internal that checked parameters already
291             Contract.Assert(chars != null, "[ISO2022Encoding.GetBytes]chars is null");
292             Contract.Assert(byteCount >= 0, "[ISO2022Encoding.GetBytes]byteCount is negative");
293             Contract.Assert(charCount >= 0, "[ISO2022Encoding.GetBytes]charCount is negative");
294 
295             // Assert because we shouldn't be able to have a null encoder.
296             Contract.Assert(encoderFallback != null, "[ISO2022Encoding.GetBytes]Attempting to use null encoder fallback");
297 
298             // Fix our encoder
299             ISO2022Encoder encoder = (ISO2022Encoder)baseEncoder;
300 
301             // Our return value
302             int iCount = 0;
303 
304             switch(CodePage)
305             {
306                 case 50220:
307                 case 50221:
308                 case 50222:
309                     iCount = GetBytesCP5022xJP( chars, charCount, bytes, byteCount, encoder );
310                     break;
311                 case 50225:
312                     iCount = GetBytesCP50225KR( chars, charCount, bytes, byteCount, encoder );
313                     break;
314 // Everett had 50227 the same as 936
315 /*              case 50227:
316                     iCount = GetBytesCP50227CN( chars, charCount, bytes, byteCount, encoder );
317                     break;
318 */
319                 case 52936:
320                     iCount = GetBytesCP52936( chars, charCount, bytes, byteCount, encoder );
321                     break;
322             }
323 
324             return iCount;
325         }
326 
327         // This is internal and called by something else,
328         [System.Security.SecurityCritical]  // auto-generated
GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)329         internal override unsafe int GetCharCount(byte* bytes, int count, DecoderNLS baseDecoder)
330         {
331             // Just assert, we're called internally so these should be safe, checked already
332             Contract.Assert(bytes != null, "[ISO2022Encoding.GetCharCount]bytes is null");
333             Contract.Assert(count >= 0, "[ISO2022Encoding.GetCharCount]byteCount is negative");
334 
335             // Just call getChars with null char* to get count
336             return GetChars(bytes, count, null, 0, baseDecoder);
337         }
338 
339         [System.Security.SecurityCritical]  // auto-generated
GetChars(byte* bytes, int byteCount, char* chars, int charCount, DecoderNLS baseDecoder)340         internal override unsafe int GetChars(byte* bytes, int byteCount,
341                                                 char* chars, int charCount, DecoderNLS baseDecoder)
342         {
343             // Just need to ASSERT, this is called by something else internal that checked parameters already
344             Contract.Assert(bytes != null, "[ISO2022Encoding.GetChars]bytes is null");
345             Contract.Assert(byteCount >= 0, "[ISO2022Encoding.GetChars]byteCount is negative");
346             Contract.Assert(charCount >= 0, "[ISO2022Encoding.GetChars]charCount is negative");
347 
348             // Fix our decoder
349             ISO2022Decoder decoder = (ISO2022Decoder)baseDecoder;
350             int iCount = 0;
351 
352             switch (CodePage)
353             {
354                 case 50220:
355                 case 50221:
356                 case 50222:
357                     iCount = GetCharsCP5022xJP( bytes, byteCount, chars, charCount, decoder);
358                     break;
359                 case 50225:
360                     iCount = GetCharsCP50225KR( bytes, byteCount, chars, charCount, decoder);
361                     break;
362                     // Currently 50227 is the same as 936
363 //                case 50227:
364   //                  iCount = GetCharsCP50227CN( bytes, byteCount, chars, charCount, decoder);
365     //                break;
366                 case 52936:
367                     iCount = GetCharsCP52936( bytes, byteCount, chars, charCount, decoder);
368                     break;
369                 default:
370                     Contract.Assert(false, "[ISO2022Encoding.GetChars] had unexpected code page");
371                     break;
372             }
373 
374             return iCount;
375         }
376 
377         // ISO 2022 Code pages for JP.
378         //  50220 - No halfwidth Katakana, convert to full width
379         //  50221 - Use escape sequence for half width Katakana
380         //  50222 - Use shift-in/shift-out for half width Katakana
381         //
382         // These are the JIS code pages, superset of ISO-2022 / ISO-2022-JP-1
383         //  0E          Shift Out (following bytes are Katakana)
384         //  0F          Shift In  (back to "normal" behavior)
385         //  21-7E       Byte ranges (1 or 2 bytes)
386         //  <ESC> $ @   To Double Byte 0208 Mode (actually older code page, but subset of 0208)
387         //  <ESC> $ B   To Double Byte 0208 Mode (duplicate)
388         //  <ESC> $ ( D To Double Byte 0212 Mode (previously we misinterpreted this)
389         //  <ESC> $ I   To half width Katakana
390         //  <ESC> ( J   To JIS-Roman
391         //  <ESC> ( H   To JIS-Roman (swedish character set)
392         //  <ESC> ( B   To ASCII
393         //  <ESC> & @   Alternate lead in to <ESC> $ B so just ignore it.
394         //
395         // So in Katakana mode we add 0x8e as a lead byte and use CP 20932 to convert it
396         // In ASCII mode we just spit out the single byte.
397         // In Roman mode we should change 0x5c (\) -> Yen sign and 0x7e (~) to Overline, however
398         //      we didn't in mLang, otherwise roman is like ASCII.
399         // In 0208 double byte mode we have to |= with 0x8080 and use CP 20932 to convert it.
400         // In 0212 double byte mode we have to |= with 0x8000 and use CP 20932 to convert it.
401         //
402         // Note that JIS Shift In/Shift Out is different than the other ISO2022 encodings.  For JIS
403         // Shift out always shifts to half-width Katakana.  Chinese encodings use designator sequences
404         // instead of escape sequences and shift out to the designated sequence or back in to ASCII.
405         //
406         // When decoding JIS 0208, MLang used a '*' (0x2a) character in JIS 0208 mode to map the trailing byte
407         // to halfwidth katakana.  I found no description of that behavior, however that block of 0208 is
408         // undefined, so we maintain that behavior when decoding.  We will never generate characters using
409         // that technique, but the decoder will process them.
410         //
411         [System.Security.SecurityCritical]  // auto-generated
GetBytesCP5022xJP(char* chars, int charCount, byte* bytes, int byteCount, ISO2022Encoder encoder)412         private unsafe int GetBytesCP5022xJP(char* chars, int charCount,
413                                                   byte* bytes, int byteCount, ISO2022Encoder encoder)
414         {
415             // prepare our helpers
416             Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer(
417                 this, encoder, bytes, byteCount, chars, charCount);
418 
419             // Get our mode
420             ISO2022Modes currentMode = ISO2022Modes.ModeASCII;      // Mode
421             ISO2022Modes shiftInMode = ISO2022Modes.ModeASCII;      // Mode that shift in will go back to (only used by CP 50222)
422 
423             // Check our encoder
424             if (encoder != null)
425             {
426                 char charLeftOver = encoder.charLeftOver;
427 
428                 currentMode = encoder.currentMode;
429                 shiftInMode = encoder.shiftInOutMode;
430 
431                 // We may have a left over character from last time, try and process it.
432                 if (charLeftOver > 0)
433                 {
434                     Contract.Assert(Char.IsHighSurrogate(charLeftOver), "[ISO2022Encoding.GetBytesCP5022xJP]leftover character should be high surrogate");
435 
436                     // It has to be a high surrogate, which we don't support, so it has to be a fallback
437                     buffer.Fallback(charLeftOver);
438                 }
439             }
440 
441             while (buffer.MoreData)
442             {
443                 // Get our char
444                 char ch = buffer.GetNextChar();
445 
446                 // Get our bytes
447                 ushort iBytes = mapUnicodeToBytes[ch];
448 
449                 StartConvert:
450                 // Check for halfwidth bytes
451                 byte bLeadByte = (byte)(iBytes >> 8);
452                 byte bTrailByte = (byte)(iBytes & 0xff);
453 
454                 if (bLeadByte == LEADBYTE_HALFWIDTH)
455                 {
456                     // Its Halfwidth Katakana
457                     if (CodePage == 50220)
458                     {
459                         // CodePage 50220 doesn't use halfwidth Katakana, convert to fullwidth
460                         // See if its out of range, fallback if so, throws if recursive fallback
461                         if (bTrailByte < 0x21 || bTrailByte >= 0x21 + HalfToFullWidthKanaTable.Length)
462                         {
463                             buffer.Fallback(ch);
464                             continue;
465                         }
466 
467                         // Get the full width katakana char to use.
468                         iBytes = unchecked((ushort)(HalfToFullWidthKanaTable[bTrailByte - 0x21] & 0x7F7F));
469 
470                         // May have to do all sorts of fun stuff for mode, go back to start convert
471                         goto StartConvert;
472                     }
473 
474                     // Can use halfwidth Katakana, make sure we're in right mode
475 
476                     // Make sure we're in right mode
477                     if (currentMode != ISO2022Modes.ModeHalfwidthKatakana)
478                     {
479                         // 50222 or 50221, either shift in/out or escape to get to Katakana mode
480                         if (CodePage == 50222)
481                         {
482                             // Shift Out
483                             if (!buffer.AddByte(SHIFT_OUT))
484                                 break;  // convert out of space, stop
485 
486                             // Don't change modes until after AddByte in case it fails for convert
487                             // We get to shift out to Katakana, make sure we'll go back to the right mode
488                             // (This ends up always being ASCII)
489                             shiftInMode = currentMode;
490                             currentMode = ISO2022Modes.ModeHalfwidthKatakana;
491                         }
492                         else
493                         {
494                             // 50221 does halfwidth katakana by escape sequence
495                             Contract.Assert(CodePage == 50221, "[ISO2022Encoding.GetBytesCP5022xJP]Expected Code Page 50221");
496 
497                             // Add our escape sequence
498                             if (!buffer.AddByte(ESCAPE, unchecked((byte)'('), unchecked((byte)'I')))
499                                 break;  // convert out of space, stop
500 
501                             currentMode = ISO2022Modes.ModeHalfwidthKatakana;
502                         }
503                     }
504 
505                     // We know we're in Katakana mode now, so add it.
506                     // Go ahead and add the Katakana byte.  Our table tail bytes are 0x80 too big.
507                     if (!buffer.AddByte(unchecked((byte)(bTrailByte & 0x7F))))
508                         break;  // convert out of space, stop
509 
510                     // Done with this one
511                     continue;
512                 }
513                 else if (bLeadByte != 0)
514                 {
515                     //
516                     //  It's a double byte character.
517                     //
518 
519                     // If we're CP 50222 we may have to shift in from Katakana mode first
520                     if (CodePage == 50222 && currentMode == ISO2022Modes.ModeHalfwidthKatakana)
521                     {
522                         // Shift In
523                         if (!buffer.AddByte(SHIFT_IN))
524                             break;    // convert out of space, stop
525 
526                         // Need to shift in from katakana.  (Still might not be right, but won't be shifted out anyway)
527                         currentMode = shiftInMode;
528                     }
529 
530                     // Make sure we're in the right mode (JIS 0208 or JIS 0212)
531                     // Note: Right now we don't use JIS 0212.  Also this table'd be wrong
532 
533                     // Its JIS extension 0208
534                     if (currentMode != ISO2022Modes.ModeJIS0208)
535                     {
536                         // Escape sequence, we can fail after this, mode will be correct for convert
537                         if (!buffer.AddByte(ESCAPE, unchecked((byte)'$'), unchecked((byte)'B')))
538                             break;  // Convert out of space, stop
539 
540                         currentMode = ISO2022Modes.ModeJIS0208;
541                     }
542 
543                     // Add our double bytes
544                     if (!buffer.AddByte(unchecked((byte)(bLeadByte)), unchecked((byte)(bTrailByte))))
545                         break; // Convert out of space, stop
546                     continue;
547                 }
548                 else if (iBytes != 0 || ch == 0)
549                 {
550                     // Single byte Char
551                     // If we're CP 50222 we may have to shift in from Katakana mode first
552                     if (CodePage == 50222 && currentMode == ISO2022Modes.ModeHalfwidthKatakana)
553                     {
554                         // Shift IN
555                         if (!buffer.AddByte(SHIFT_IN))
556                             break; // convert ran out of room
557 
558                         // Need to shift in from katakana.  (Still might not be right, but won't be shifted out anyway)
559                         currentMode = shiftInMode;
560                     }
561 
562                     // Its a single byte character, switch to ASCII if we have to
563                     if (currentMode != ISO2022Modes.ModeASCII)
564                     {
565                         if (!buffer.AddByte(ESCAPE,unchecked((byte)'('), unchecked((byte)'B')))
566                             break; // convert ran out of room
567 
568                         currentMode = ISO2022Modes.ModeASCII;
569                     }
570 
571                     // Add the ASCII char
572                     if (!buffer.AddByte(bTrailByte))
573                         break; // convert had no room left
574                     continue;
575                 }
576 
577                 // Its unknown, do fallback, throws if recursive (knows because we called InternalGetNextChar)
578                 buffer.Fallback(ch);
579             }
580 
581             // Switch back to ASCII if MustFlush or no encoder
582             if (currentMode != ISO2022Modes.ModeASCII &&
583                 (encoder == null || encoder.MustFlush))
584             {
585                 // If we're CP 50222 we may have to shift in from Katakana mode first
586                 if (CodePage == 50222 && currentMode == ISO2022Modes.ModeHalfwidthKatakana)
587                 {
588                     // Shift IN, only shift mode if necessary.
589                     if (buffer.AddByte(SHIFT_IN))
590                         // Need to shift in from katakana.  (Still might not be right, but won't be shifted out anyway)
591                         currentMode = shiftInMode;
592                     else
593                         // If not successful, convert will maintain state for next time, also
594                         // AddByte will have decremented our char count, however we need it to remain the same
595                         buffer.GetNextChar();
596                 }
597 
598                 // switch back to ASCII to finish neatly
599                 if (currentMode != ISO2022Modes.ModeASCII &&
600                     (CodePage != 50222 || currentMode != ISO2022Modes.ModeHalfwidthKatakana))
601                 {
602                     // only shift if it was successful
603                     if (buffer.AddByte(ESCAPE, unchecked((byte)'('), unchecked((byte)'B')))
604                         currentMode = ISO2022Modes.ModeASCII;
605                     else
606                         // If not successful, convert will maintain state for next time, also
607                         // AddByte will have decremented our char count, however we need it to remain the same
608                         buffer.GetNextChar();
609                 }
610             }
611 
612             // Remember our encoder state
613             if (bytes != null && encoder != null)
614             {
615                 // This is ASCII if we had to flush
616                 encoder.currentMode = currentMode;
617                 encoder.shiftInOutMode = shiftInMode;
618 
619                 if (!buffer.fallbackBuffer.bUsedEncoder)
620                 {
621                     encoder.charLeftOver = (char)0;
622                 }
623 
624                 encoder.m_charsUsed = buffer.CharsUsed;
625             }
626 
627             // Return our length
628             return buffer.Count;
629         }
630 
631         // ISO 2022 Code pages for Korean - CP 50225
632         //
633         // CP 50225 has Shift In/Shift Out codes, and a single designator sequence that is supposed
634         // to appear once in the file, at the beginning of a line, before any multibyte code points.
635         // So we stick the designator at the beginning of the output.
636         //
637         // These are the KR code page codes for ISO-2022-KR
638         //  0E          Shift Out (following bytes are double byte)
639         //  0F          Shift In  (back to ASCII behavior)
640         //  21-7E       Byte ranges (1 or 2 bytes)
641         //  <ESC> $)C   Double byte ISO-2022-KR designator
642         //
643         // Note that this encoding is a little different than other encodings.  The <esc>$)C sequence
644         // should only appear once per file.  (Actually I saw another spec/rfc that said at the beginning
645         // of each line, but it shouldn't really matter.)
646         //
647         // During decoding Mlang accepted ' ', '\t, and '\n' as their respective characters, even if
648         // it was in double byte mode.  We maintain that behavior, although I couldn't find a reference or
649         // reason for that behavior.  We never generate data using that shortcut.
650         //
651         // Also Mlang always assumed KR mode, even if the designator wasn't found yet, so we do that as
652         // well.  So basically we just ignore <ESC>$)C when decoding.
653         //
654         [System.Security.SecurityCritical]  // auto-generated
GetBytesCP50225KR(char* chars, int charCount, byte* bytes, int byteCount, ISO2022Encoder encoder)655         private unsafe int GetBytesCP50225KR(char* chars, int charCount,
656                                                     byte* bytes, int byteCount, ISO2022Encoder encoder)
657         {
658             // prepare our helpers
659             Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer(
660                 this, encoder, bytes, byteCount, chars, charCount);
661 
662             // Get our mode
663             ISO2022Modes currentMode = ISO2022Modes.ModeASCII;      // Mode
664             ISO2022Modes shiftOutMode = ISO2022Modes.ModeASCII;     // ModeKR if already stamped lead bytes
665 
666             // Check our encoder
667             if (encoder != null)
668             {
669                 // May have leftover stuff
670                 char charLeftOver = encoder.charLeftOver;
671                 currentMode = encoder.currentMode;
672                 shiftOutMode = encoder.shiftInOutMode;
673 
674                 // We may have a l left over character from last time, try and process it.
675                 if (charLeftOver > 0)
676                 {
677                     Contract.Assert(Char.IsHighSurrogate(charLeftOver), "[ISO2022Encoding.GetBytesCP50225KR]leftover character should be high surrogate");
678 
679                     // It has to be a high surrogate, which we don't support, so it has to be a fallback
680                     buffer.Fallback(charLeftOver);
681                 }
682             }
683 
684             while (buffer.MoreData)
685             {
686                 // Get our data
687                 char ch = buffer.GetNextChar();
688 
689                 // Get our bytes
690                 ushort iBytes = mapUnicodeToBytes[ch];
691 
692                 // Check for double byte bytes
693                 byte bLeadByte = (byte)(iBytes >> 8);
694                 byte bTrailByte = (byte)(iBytes & 0xff);
695 
696                 if (bLeadByte != 0)
697                 {
698                     //
699                     //  It's a double byte character.
700                     //
701 
702                     // If we haven't done our Korean designator, then do so, if we have any input
703                     if (shiftOutMode != ISO2022Modes.ModeKR)
704                     {
705                         // Add our code page designator sequence
706                         if (!buffer.AddByte(ESCAPE, unchecked((byte)'$'), unchecked((byte)')'), unchecked((byte)'C')))
707                             break; // No room during convert.
708 
709                         shiftOutMode = ISO2022Modes.ModeKR;
710                     }
711 
712                     // May have to switch to ModeKR first
713                     if (currentMode != ISO2022Modes.ModeKR)
714                     {
715                         if (!buffer.AddByte(SHIFT_OUT))
716                             break; // No convert room
717 
718                         currentMode = ISO2022Modes.ModeKR;
719                     }
720 
721                     // Add the bytes
722                     if (!buffer.AddByte(bLeadByte, bTrailByte))
723                         break; // no convert room
724                     continue;
725                 }
726                 else if (iBytes != 0 || ch == 0)
727                 {
728                     // Its a single byte character, switch to ASCII if we have to
729                     if (currentMode != ISO2022Modes.ModeASCII)
730                     {
731                         if (!buffer.AddByte(SHIFT_IN))
732                             break;
733 
734                         currentMode = ISO2022Modes.ModeASCII;
735                     }
736 
737                     // Add the ASCII char
738                     if (!buffer.AddByte(bTrailByte))
739                         break;
740                     continue;
741                 }
742 
743                 // Its unknown, do fallback, throws if recursive (knows because we called InternalGetNextChar)
744                 buffer.Fallback(ch);
745             }
746 
747             // Switch back to ASCII if MustFlush or no encoder
748             if (currentMode != ISO2022Modes.ModeASCII &&
749                 (encoder == null || encoder.MustFlush))
750             {
751                 // Get back to ASCII to be safe.  Only do it if it success.
752                 if (buffer.AddByte(SHIFT_IN))
753                     currentMode = ISO2022Modes.ModeASCII;
754                 else
755                     // If not successful, convert will maintain state for next time, also
756                     // AddByte will have decremented our char count, however we need it to remain the same
757                     buffer.GetNextChar();
758             }
759 
760             // Remember our encoder state
761             if (bytes != null && encoder != null)
762             {
763                 // If we didn't use the encoder, then there's no chars left over
764                 if (!buffer.fallbackBuffer.bUsedEncoder)
765                 {
766                     encoder.charLeftOver = (char)0;
767                 }
768 
769                 // This is ASCII if we had to flush
770                 encoder.currentMode = currentMode;
771 
772                 // We don't use shift out mode, but if we've flushed we need to reset it so it doesn't
773                 // get output again.
774                 if (!encoder.MustFlush || encoder.charLeftOver != (char)0)
775                 {
776                     // We should be not flushing or converting
777                     Contract.Assert(!encoder.MustFlush || !encoder.m_throwOnOverflow,
778                         "[ISO2022Encoding.GetBytesCP50225KR]Expected no left over data or not flushing or not converting");
779                     encoder.shiftInOutMode = shiftOutMode;
780                 }
781                 else
782                     encoder.shiftInOutMode = ISO2022Modes.ModeASCII;
783 
784                 encoder.m_charsUsed = buffer.CharsUsed;
785             }
786 
787             // Return our length
788             return buffer.Count;
789         }
790 
791         // CP52936 is HZ Encoding
792         // HZ Encoding has 4 shift sequences:
793         // ~~       '~' (\u7e)
794         // ~}       shift into 1 byte mode,
795         // ~{       shift into 2 byte GB 2312-80
796         // ~<NL>    Maintain 2 byte mode across new lines (ignore both ~ and <NL> characters)
797         //          (This is for mailers that restrict to 70 or 80 or whatever character lines)
798         //
799         // According to comment in mlang, lead & trail byte ranges are described in RFC 1843
800         // RFC 1843 => valid HZ code range: leading byte 0x21 - 0x77, 2nd byte 0x21 - 0x7e
801         // Our 936 code points are or'd with 0x8080, so lead byte 0xa1 - 0xf7, trail byte 0xa1 - 0xfe
802         //
803         // This encoding is designed for transmission by e-mail and news.  No bytes should have high bit set.
804         // (all bytes <= 0x7f)
805         [System.Security.SecurityCritical]  // auto-generated
GetBytesCP52936(char* chars, int charCount, byte* bytes, int byteCount, ISO2022Encoder encoder)806         private unsafe int GetBytesCP52936(char* chars, int charCount,
807                                            byte* bytes, int byteCount, ISO2022Encoder encoder)
808         {
809             // prepare our helpers
810             Encoding.EncodingByteBuffer buffer = new Encoding.EncodingByteBuffer(
811                 this, encoder, bytes, byteCount, chars, charCount);
812 
813             // Mode
814             ISO2022Modes currentMode = ISO2022Modes.ModeASCII;
815 
816             // Check our encoder
817             if (encoder != null)
818             {
819                 char charLeftOver = encoder.charLeftOver;
820                 currentMode = encoder.currentMode;
821 
822                 // We may have a left over character from last time, try and process it.
823                 if (charLeftOver > 0)
824                 {
825                     Contract.Assert(Char.IsHighSurrogate(charLeftOver), "[ISO2022Encoding.GetBytesCP52936]leftover character should be high surrogate");
826 
827                     // It has to be a high surrogate, which we don't support, so it has to be a fallback
828                     buffer.Fallback(charLeftOver);
829                 }
830             }
831 
832             while (buffer.MoreData)
833             {
834                 // Get our char
835                 char ch = buffer.GetNextChar();
836 
837                 // Get our bytes
838                 ushort sChar = mapUnicodeToBytes[ch];
839                 if (sChar == 0 && ch != 0)
840                 {
841                     // Wasn't a legal byte sequence, its a surrogate or fallback
842                     // Throws if recursive (knows because we called InternalGetNextChar)
843                     buffer.Fallback(ch);
844 
845                     // Done with our char, now process fallback
846                     continue;
847                 }
848 
849                 // Check for halfwidth bytes
850                 byte bLeadByte = (byte)(sChar >> 8);
851                 byte bTrailByte = (byte)(sChar & 0xff);
852 
853                 // If its a double byte, it has to fit in the lead byte 0xa1 - 0xf7, trail byte 0xa1 - 0xfe range
854                 // (including the 0x8080 that our codepage or's to the value)
855                 if ((bLeadByte != 0 &&
856                      (bLeadByte < 0xa1 || bLeadByte > 0xf7 || bTrailByte < 0xa1 || bTrailByte > 0xfe)) ||
857                     (bLeadByte == 0 && bTrailByte > 0x80 && bTrailByte != 0xff))
858                 {
859                     // Illegal character, in 936 code page, but not in HZ subset, get fallback for it
860                     buffer.Fallback(ch);
861                     continue;
862                 }
863 
864                 // sChar is now either ASCII or has an 0x8080 mask
865                 if (bLeadByte != 0)
866                 {
867                     // Its a double byte mode
868                     if (currentMode != ISO2022Modes.ModeHZ)
869                     {
870                         // Need to add the double byte mode marker
871                         if (!buffer.AddByte((byte)'~', (byte)'{', 2))
872                             break;                                      // Stop if no buffer space in convert
873 
874                         currentMode = ISO2022Modes.ModeHZ;
875                     }
876 
877                     // Go ahead and add the 2 bytes
878                     if (!buffer.AddByte(unchecked((byte)(bLeadByte & 0x7f)), unchecked((byte)(bTrailByte & 0x7f))))
879                         break;                                      // Stop if no buffer space in convert
880                 }
881                 else
882                 {
883                     // Its supposed to be ASCII
884                     if (currentMode != ISO2022Modes.ModeASCII)
885                     {
886                         // Need to add the ASCII mode marker
887                         // Will have 1 more byte (or 2 if ~)
888                         if (!buffer.AddByte((byte)'~', (byte)'}', bTrailByte == '~' ? 2:1))
889                             break;
890 
891                         currentMode = ISO2022Modes.ModeASCII;
892                     }
893 
894                     // If its a '~' we'll need an extra one
895                     if (bTrailByte == '~')
896                     {
897                         // Need to add the extra ~
898                         if (!buffer.AddByte((byte)'~', 1))
899                             break;
900                     }
901 
902                     // Need to add the character
903                     if (!buffer.AddByte(bTrailByte))
904                         break;
905                 }
906             }
907 
908             // Add ASCII shift out if we're at end of decoder
909             if (currentMode != ISO2022Modes.ModeASCII &&
910                 (encoder == null || encoder.MustFlush))
911             {
912                 // Need to add the ASCII mode marker
913                 // Only turn off other mode if this works
914                 if (buffer.AddByte((byte)'~',(byte)'}'))
915                     currentMode = ISO2022Modes.ModeASCII;
916                 else
917                     // If not successful, convert will maintain state for next time, also
918                     // AddByte will have decremented our char count, however we need it to remain the same
919                     buffer.GetNextChar();
920             }
921 
922             // Need to remember our mode
923             if (encoder != null && bytes != null)
924             {
925                 // This is ASCII if we had to flush
926                 encoder.currentMode = currentMode;
927 
928                 if (!buffer.fallbackBuffer.bUsedEncoder)
929                 {
930                     encoder.charLeftOver = (char)0;
931                 }
932 
933                 encoder.m_charsUsed = buffer.CharsUsed;
934             }
935 
936             // Return our length
937             return buffer.Count;
938         }
939 
940         [System.Security.SecurityCritical]  // auto-generated
GetCharsCP5022xJP(byte* bytes, int byteCount, char* chars, int charCount, ISO2022Decoder decoder)941         private unsafe int GetCharsCP5022xJP(byte* bytes, int byteCount,
942                                                   char* chars, int charCount, ISO2022Decoder decoder)
943         {
944             // Get our info.
945             Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
946                 this, decoder, chars, charCount, bytes, byteCount);
947 
948             // No mode information yet
949             ISO2022Modes currentMode = ISO2022Modes.ModeASCII;      // Our current Mode
950             ISO2022Modes shiftInMode = ISO2022Modes.ModeASCII;      // Mode that we'll shift in to
951             byte[] escapeBytes = new byte[4];
952             int escapeCount = 0;
953 
954             if (decoder != null)
955             {
956                 currentMode = decoder.currentMode;
957                 shiftInMode = decoder.shiftInOutMode;
958 
959                 // See if we have leftover decoder buffer to use
960                 // Load our bytesLeftOver
961                 escapeCount = decoder.bytesLeftOverCount;
962 
963                 // Don't want to mess up decoder if we're counting or throw an exception
964                 for (int i = 0; i < escapeCount; i++)
965                     escapeBytes[i] = decoder.bytesLeftOver[i];
966             }
967 
968             // Do this until the end
969             while (buffer.MoreData || escapeCount > 0)
970             {
971                 byte ch;
972 
973                 if (escapeCount > 0)
974                 {
975                     // Get more escape sequences if necessary
976                     if (escapeBytes[0] == ESCAPE)
977                     {
978                         // Stop if no more input
979                         if (!buffer.MoreData)
980                         {
981                             if (decoder != null && !decoder.MustFlush)
982                                 break;
983                         }
984                         else
985                         {
986                             // Add it to the sequence we can check
987                             escapeBytes[escapeCount++] = buffer.GetNextByte();
988 
989                             // We have an escape sequence
990                             ISO2022Modes modeReturn =
991                                 CheckEscapeSequenceJP(escapeBytes, escapeCount);
992 
993                             if (modeReturn != ISO2022Modes.ModeInvalidEscape)
994                             {
995                                 if (modeReturn != ISO2022Modes.ModeIncompleteEscape)
996                                 {
997                                     // Processed escape correctly
998                                     escapeCount = 0;
999 
1000                                     // We're now this mode
1001                                     currentMode = shiftInMode = modeReturn;
1002                                 }
1003 
1004                                 // Either way, continue to get next escape or real byte
1005                                 continue;
1006                             }
1007                         }
1008 
1009                         // If ModeInvalidEscape, or no input & must flush, then fall through to add escape.
1010                     }
1011 
1012                     // Read next escape byte and move them down one.
1013                     ch = DecrementEscapeBytes(ref escapeBytes, ref escapeCount);
1014                 }
1015                 else
1016                 {
1017                     // Get our next byte
1018                     ch = buffer.GetNextByte();
1019 
1020                     if (ch == ESCAPE)
1021                     {
1022                         // We'll have an escape sequence, use it if we don't have one buffered already
1023                         if (escapeCount == 0)
1024                         {
1025                             // Start this new escape sequence
1026                             escapeBytes[0] = ch;
1027                             escapeCount = 1;
1028                             continue;
1029                         }
1030 
1031                         // Flush the previous escape sequence, then reuse this escape byte
1032                         buffer.AdjustBytes(-1);
1033                     }
1034                 }
1035 
1036                 if (ch == SHIFT_OUT)
1037                 {
1038                    shiftInMode = currentMode;
1039                    currentMode = ISO2022Modes.ModeHalfwidthKatakana;
1040                    continue;
1041                 }
1042                 else if (ch == SHIFT_IN)
1043                 {
1044                    currentMode = shiftInMode;
1045                    continue;
1046                 }
1047 
1048                 // Get our full character
1049                 ushort iBytes = ch;
1050                 bool b2Bytes = false;
1051 
1052                 if (currentMode == ISO2022Modes.ModeJIS0208)
1053                 {
1054                     //
1055                     //  To handle errors, we need to check:
1056                     //    1. if trailbyte is there
1057                     //    2. if code is valid
1058                     //
1059                     if (escapeCount > 0)
1060                     {
1061                         // Let another escape fall through
1062                         if (escapeBytes[0] != ESCAPE)
1063                         {
1064                             // Move them down one & get the next data
1065                             iBytes <<= 8;
1066                             iBytes |= DecrementEscapeBytes(ref escapeBytes, ref escapeCount);
1067                             b2Bytes = true;
1068                         }
1069                     }
1070                     else if (buffer.MoreData)
1071                     {
1072                         iBytes <<= 8;
1073                         iBytes |= buffer.GetNextByte();
1074                         b2Bytes = true;
1075                     }
1076                     else
1077                     {
1078                         // Not enough input, use decoder if possible
1079                         if (decoder == null || decoder.MustFlush)
1080                         {
1081                             // No decoder, do fallback for this byte
1082                             buffer.Fallback(ch);
1083                             break;
1084                         }
1085 
1086                         // Stick it in the decoder if we're not counting
1087                         if (chars != null)
1088                         {
1089                             escapeBytes[0] = ch;
1090                             escapeCount = 1;
1091                         }
1092                         break;
1093                     }
1094 
1095                     // MLang treated JIS 0208 '*' lead byte like a single halfwidth katakana
1096                     // escape, so use 0x8e00 as katakana lead byte and keep same trail byte.
1097                     // 0x2a lead byte range is normally unused in JIS 0208, so shouldn't have
1098                     // any wierd compatibility issues.
1099                     if ((b2Bytes == true) && ((iBytes & 0xff00) == 0x2a00))
1100                     {
1101                         iBytes = (ushort)(iBytes & 0xff);
1102                         iBytes |= (LEADBYTE_HALFWIDTH << 8);   // Put us in the halfwidth katakana range
1103                     }
1104                 }
1105                 else if (iBytes >= 0xA1 && iBytes <= 0xDF)
1106                 {
1107                     // Everett accidentally mapped Katakana like shift-jis (932),
1108                     // even though this is a 7 bit code page.  We keep that mapping
1109                     iBytes |= (LEADBYTE_HALFWIDTH << 8);    // Map to halfwidth katakana range
1110                     iBytes &= 0xff7f;                       // remove extra 0x80
1111                 }
1112                 else if (currentMode == ISO2022Modes.ModeHalfwidthKatakana )
1113                 {
1114                     // Add 0x10 lead byte that our encoding expects for Katakana:
1115                     iBytes |= (LEADBYTE_HALFWIDTH << 8);
1116                 }
1117 
1118                 // We have an iBytes to try to convert.
1119                 char c = mapBytesToUnicode[iBytes];
1120 
1121                 // See if it was unknown
1122                 if (c == UNKNOWN_CHAR_FLAG && iBytes != 0)
1123                 {
1124                     // Have to do fallback
1125                     if (b2Bytes)
1126                     {
1127                         if (!buffer.Fallback((byte)(iBytes >> 8), (byte)iBytes))
1128                             break;
1129                     }
1130                     else
1131                     {
1132                         if (!buffer.Fallback(ch))
1133                             break;
1134                     }
1135                 }
1136                 else
1137                 {
1138                     // If we were JIS 0208, then we consumed an extra byte
1139                     if (!buffer.AddChar(c, b2Bytes ? 2:1))
1140                         break;
1141                 }
1142             }
1143 
1144             // Make sure our decoder state matches our mode, if not counting
1145             if (chars != null && decoder != null)
1146             {
1147                 // Remember it if we don't flush
1148                 if (!decoder.MustFlush || escapeCount != 0)
1149                 {
1150                     // Either not flushing or had state (from convert)
1151                     Contract.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow,
1152                         "[ISO2022Encoding.GetCharsCP5022xJP]Expected no state or not converting or not flushing");
1153 
1154                     decoder.currentMode = currentMode;
1155                     decoder.shiftInOutMode = shiftInMode;
1156 
1157                     // Remember escape buffer
1158                     decoder.bytesLeftOverCount = escapeCount;
1159                     decoder.bytesLeftOver = escapeBytes;
1160                 }
1161                 else
1162                 {
1163                     // We flush, clear buffer
1164                     decoder.currentMode = ISO2022Modes.ModeASCII;
1165                     decoder.shiftInOutMode = ISO2022Modes.ModeASCII;
1166                     decoder.bytesLeftOverCount = 0;
1167                     // Slightly different if counting/not counting
1168                 }
1169 
1170                 decoder.m_bytesUsed = buffer.BytesUsed;
1171             }
1172 
1173             // Return # of characters we found
1174             return buffer.Count;
1175         }
1176 
1177         // We know we have an escape sequence, so check it starting with the byte after the escape
CheckEscapeSequenceJP( byte[] bytes, int escapeCount )1178         private ISO2022Modes CheckEscapeSequenceJP( byte[] bytes, int escapeCount )
1179         {
1180             // Have an escape sequence
1181             if (bytes[0] != ESCAPE)
1182                 return ISO2022Modes.ModeInvalidEscape;
1183 
1184             if (escapeCount < 3)
1185                 return ISO2022Modes.ModeIncompleteEscape;
1186 
1187             if (bytes[1] == '(')
1188             {
1189                 if (bytes[2] == 'B')       // <esc>(B
1190                 {
1191                     return ISO2022Modes.ModeASCII;
1192                 }
1193                 else if (bytes[2] == 'H')  // <esc>(H
1194                 {
1195                     // Actually this is supposed to be Swedish
1196                     // We treat it like ASCII though.
1197                     return ISO2022Modes.ModeASCII;
1198                 }
1199                 else if (bytes[2] == 'J')  // <esc>(J
1200                 {
1201                     // Actually this is supposed to be Roman
1202                     // 2 characters are different, but historically we treat it as ascii
1203                     return ISO2022Modes.ModeASCII;
1204                 }
1205                 else if (bytes[2] == 'I')  // <esc>(I
1206                 {
1207                     return ISO2022Modes.ModeHalfwidthKatakana;
1208                 }
1209             }
1210             else if (bytes[1] == '$')
1211             {
1212                 if (bytes[2] == '@' ||   // <esc>$@
1213                     bytes[2] == 'B')     // <esc>$B
1214                 {
1215                     return ISO2022Modes.ModeJIS0208;
1216                 }
1217                 else
1218                 {
1219                     // Looking for <esc>$(D
1220                     if (escapeCount < 4)
1221                         return ISO2022Modes.ModeIncompleteEscape;
1222 
1223                     if (bytes[2] == '(' && bytes[3] == 'D') // <esc>$(D
1224                     {
1225                         // Mlang treated 0208 like 0212 even though that's wrong
1226                         return ISO2022Modes.ModeJIS0208;
1227                     }
1228                 }
1229             }
1230             else if (bytes[1] == '&')
1231             {
1232                 if (bytes[2] == '@')            // <esc>&@
1233                 {
1234                     // Ignore ESC & @ (prefix to <esc>$B)
1235                     return ISO2022Modes.ModeNOOP;
1236                 }
1237             }
1238 
1239             // If we get here we fell through and have an invalid/unknown escape sequence
1240             return ISO2022Modes.ModeInvalidEscape;
1241         }
1242 
DecrementEscapeBytes(ref byte[] bytes, ref int count)1243         private byte DecrementEscapeBytes(ref byte[] bytes, ref int count)
1244         {
1245             Contract.Assert(count > 0, "[ISO2022Encoding.DecrementEscapeBytes]count > 0");
1246 
1247             // Decrement our count
1248             count--;
1249 
1250             // Remember the first one
1251             byte returnValue = bytes[0];
1252 
1253             // Move them down one.
1254             for (int i = 0; i < count; i++)
1255             {
1256                 bytes[i] = bytes[i+1];
1257             }
1258 
1259             // Clear out the last byte
1260             bytes[count] = 0;
1261 
1262             // Return the old 1st byte
1263             return returnValue;
1264         }
1265 
1266         // Note that in DBCS mode mlang passed through ' ', '\t' and '\n' as SBCS characters
1267         // probably to allow mailer formatting without too much extra work.
1268         [System.Security.SecurityCritical]  // auto-generated
GetCharsCP50225KR(byte* bytes, int byteCount, char* chars, int charCount, ISO2022Decoder decoder)1269         private unsafe int GetCharsCP50225KR(byte* bytes, int byteCount,
1270                                                    char* chars, int charCount, ISO2022Decoder decoder)
1271         {
1272             // Get our info.
1273             Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
1274                 this, decoder, chars, charCount, bytes, byteCount);
1275 
1276             // No mode information yet
1277             ISO2022Modes currentMode = ISO2022Modes.ModeASCII;      // Our current Mode
1278 
1279             byte[] escapeBytes = new byte[4];
1280             int escapeCount = 0;
1281 
1282             if (decoder != null)
1283             {
1284                 currentMode = decoder.currentMode;
1285 
1286                 // See if we have leftover decoder buffer to use
1287                 // Load our bytesLeftOver
1288                 escapeCount = decoder.bytesLeftOverCount;
1289 
1290                 // Don't want to mess up decoder if we're counting or throw an exception
1291                 for (int i = 0; i < escapeCount; i++)
1292                     escapeBytes[i] = decoder.bytesLeftOver[i];
1293             }
1294 
1295             // Do this until the end, just do '?' replacement because we don't have fallbacks for decodings.
1296             while (buffer.MoreData || escapeCount > 0)
1297             {
1298                 byte ch;
1299 
1300                 if (escapeCount > 0)
1301                 {
1302                     // Get more escape sequences if necessary
1303                     if (escapeBytes[0] == ESCAPE)
1304                     {
1305                         // Stop if no more input
1306                         if (!buffer.MoreData)
1307                         {
1308                             if (decoder != null && !decoder.MustFlush)
1309                                 break;
1310                         }
1311                         else
1312                         {
1313                             // Add it to the sequence we can check
1314                             escapeBytes[escapeCount++] = buffer.GetNextByte();
1315 
1316                             // We have an escape sequence
1317                             ISO2022Modes modeReturn =
1318                                 CheckEscapeSequenceKR(escapeBytes, escapeCount);
1319 
1320                             if (modeReturn != ISO2022Modes.ModeInvalidEscape)
1321                             {
1322                                 if (modeReturn != ISO2022Modes.ModeIncompleteEscape)
1323                                 {
1324                                     // Processed escape correctly, no effect (we know about KR mode)
1325                                     escapeCount = 0;
1326                                 }
1327 
1328                                 // Either way, continue to get next escape or real byte
1329                                 continue;
1330                             }
1331                         }
1332 
1333                         // If ModeInvalidEscape, or no input & must flush, then fall through to add escape.
1334                     }
1335 
1336                     // Still have something left over in escape buffer
1337                     // Get it and move them down one
1338                     ch = DecrementEscapeBytes(ref escapeBytes, ref escapeCount);
1339                 }
1340                 else
1341                 {
1342                     // Get our next byte
1343                     ch = buffer.GetNextByte();
1344 
1345                     if (ch == ESCAPE)
1346                     {
1347                         // We'll have an escape sequence, use it if we don't have one buffered already
1348                         if (escapeCount == 0)
1349                         {
1350                             // Start this new escape sequence
1351                             escapeBytes[0] = ch;
1352                             escapeCount = 1;
1353                             continue;
1354                         }
1355 
1356                         // Flush previous escape sequence, then reuse this escape byte
1357                         buffer.AdjustBytes(-1);
1358                     }
1359                 }
1360 
1361                 if (ch == SHIFT_OUT)
1362                 {
1363                    currentMode = ISO2022Modes.ModeKR;
1364                    continue;
1365                 }
1366                 else if (ch == SHIFT_IN)
1367                 {
1368                    currentMode = ISO2022Modes.ModeASCII;
1369                    continue;
1370                 }
1371 
1372                 // Get our full character
1373                 ushort iBytes = ch;
1374                 bool b2Bytes = false;
1375 
1376                 // MLANG was passing through ' ', '\t' and '\n', so we do so as well, but I don't see that in the RFC.
1377                 if (currentMode == ISO2022Modes.ModeKR && ch != ' ' && ch != '\t' && ch != '\n')
1378                 {
1379                     //
1380                     //  To handle errors, we need to check:
1381                     //    1. if trailbyte is there
1382                     //    2. if code is valid
1383                     //
1384                     if (escapeCount > 0)
1385                     {
1386                         // Let another escape fall through
1387                         if (escapeBytes[0] != ESCAPE)
1388                         {
1389                             // Move them down one & get the next data
1390                             iBytes <<= 8;
1391                             iBytes |= DecrementEscapeBytes(ref escapeBytes, ref escapeCount);
1392                             b2Bytes = true;
1393                         }
1394                     }
1395                     else if (buffer.MoreData)
1396                     {
1397                         iBytes <<= 8;
1398                         iBytes |= buffer.GetNextByte();
1399                         b2Bytes = true;
1400                     }
1401                     else
1402                     {
1403                         // Not enough input, use decoder if possible
1404                         if (decoder == null || decoder.MustFlush)
1405                         {
1406                             // No decoder, do fallback for lonely 1st byte
1407                             buffer.Fallback(ch);
1408                             break;
1409                         }
1410 
1411                         // Stick it in the decoder if we're not counting
1412                         if (chars != null)
1413                         {
1414                             escapeBytes[0] = ch;
1415                             escapeCount = 1;
1416                         }
1417                         break;
1418                     }
1419                 }
1420 
1421                 // We have a iBytes to try to convert.
1422                 char c = mapBytesToUnicode[iBytes];
1423 
1424                 // See if it was unknown
1425                 if (c == UNKNOWN_CHAR_FLAG && iBytes != 0)
1426                 {
1427                     // Have to do fallback
1428                     if (b2Bytes)
1429                     {
1430                         if (!buffer.Fallback((byte)(iBytes >> 8), (byte)iBytes))
1431                             break;
1432                     }
1433                     else
1434                     {
1435                         if (!buffer.Fallback(ch))
1436                             break;
1437                     }
1438                 }
1439                 else
1440                 {
1441                     if (!buffer.AddChar(c, b2Bytes ? 2:1))
1442                         break;
1443                 }
1444             }
1445 
1446             // Make sure our decoder state matches our mode, if not counting
1447             if (chars != null && decoder != null)
1448             {
1449                 // Remember it if we don't flush
1450                 if (!decoder.MustFlush || escapeCount != 0)
1451                 {
1452                     // Either not flushing or had state (from convert)
1453                     Contract.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow,
1454                         "[ISO2022Encoding.GetCharsCP50225KR]Expected no state or not converting or not flushing");
1455 
1456                     decoder.currentMode = currentMode;
1457 
1458                     // Remember escape buffer
1459                     decoder.bytesLeftOverCount = escapeCount;
1460                     decoder.bytesLeftOver = escapeBytes;
1461                 }
1462                 else
1463                 {
1464                     // We flush, clear buffer
1465                     decoder.currentMode = ISO2022Modes.ModeASCII;
1466                     decoder.shiftInOutMode = ISO2022Modes.ModeASCII;
1467                     decoder.bytesLeftOverCount = 0;
1468                 }
1469 
1470                 decoder.m_bytesUsed = buffer.BytesUsed;
1471             }
1472 
1473             // Return # of characters we found
1474             return buffer.Count;
1475         }
1476 
1477         // We know we have an escape sequence, so check it starting with the byte after the escape
CheckEscapeSequenceKR( byte[] bytes, int escapeCount )1478         private ISO2022Modes CheckEscapeSequenceKR( byte[] bytes, int escapeCount )
1479         {
1480             // Have an escape sequence
1481             if (bytes[0] != ESCAPE)
1482                 return ISO2022Modes.ModeInvalidEscape;
1483 
1484             if (escapeCount < 4)
1485                 return ISO2022Modes.ModeIncompleteEscape;
1486 
1487             if (bytes[1] == '$' && bytes[2] == ')' && bytes[3] == 'C') // <esc>$)C
1488                 return ISO2022Modes.ModeKR;
1489 
1490             // If we get here we fell through and have an invalid/unknown escape sequence
1491             return ISO2022Modes.ModeInvalidEscape;
1492         }
1493 
1494         // CP52936 is HZ Encoding
1495         // HZ Encoding has 4 shift sequences:
1496         // ~~       '~' (\u7e)
1497         // ~}       shift into 1 byte mode,
1498         // ~{       shift into 2 byte GB 2312-80
1499         // ~<NL>    Maintain 2 byte mode across new lines (ignore both ~ and <NL> characters)
1500         //          (This is for mailers that restrict to 70 or 80 or whatever character lines)
1501         //
1502         // According to comment in mlang, lead & trail byte ranges are described in RFC 1843
1503         // RFC 1843 => valid HZ code range: leading byte 0x21 - 0x77, 2nd byte 0x21 - 0x7e
1504         // Our 936 code points are or'd with 0x8080, so lead byte 0xa1 - 0xf7, trail byte 0xa1 - 0xfe
1505         //
1506         // This encoding is designed for transmission by e-mail and news.  No bytes should have high bit set.
1507         // (all bytes <= 0x7f)
1508         [System.Security.SecurityCritical]  // auto-generated
GetCharsCP52936(byte* bytes, int byteCount, char* chars, int charCount, ISO2022Decoder decoder)1509         private unsafe int GetCharsCP52936(byte* bytes, int byteCount,
1510                                                 char* chars, int charCount, ISO2022Decoder decoder)
1511         {
1512             Contract.Assert(byteCount >=0, "[ISO2022Encoding.GetCharsCP52936]count >=0");
1513             Contract.Assert(bytes!=null, "[ISO2022Encoding.GetCharsCP52936]bytes!=null");
1514 
1515             // Get our info.
1516             Encoding.EncodingCharBuffer buffer = new Encoding.EncodingCharBuffer(
1517                 this, decoder, chars, charCount, bytes, byteCount);
1518 
1519             // No mode information yet
1520             ISO2022Modes currentMode = ISO2022Modes.ModeASCII;
1521             int byteLeftOver = -1;
1522             bool bUsedDecoder = false;
1523 
1524             if (decoder != null)
1525             {
1526                 currentMode = decoder.currentMode;
1527                 // See if we have leftover decoder buffer to use
1528                 // Don't want to mess up decoder if we're counting or throw an exception
1529                 if (decoder.bytesLeftOverCount != 0 )
1530                 {
1531                     // Load our bytesLeftOver
1532                     byteLeftOver = decoder.bytesLeftOver[0];
1533                 }
1534             }
1535 
1536             // Do this until the end, just do '?' replacement because we don't have fallbacks for decodings.
1537             while (buffer.MoreData || byteLeftOver >= 0)
1538             {
1539                 byte ch;
1540 
1541                 // May have a left over byte
1542                 if (byteLeftOver >= 0)
1543                 {
1544                     ch = (byte)byteLeftOver;
1545                     byteLeftOver = -1;
1546                 }
1547                 else
1548                 {
1549                     ch = buffer.GetNextByte();
1550                 }
1551 
1552                 // We're in escape mode
1553                 if (ch == '~')
1554                 {
1555                     // Next char is type of switch
1556                     if (!buffer.MoreData)
1557                     {
1558                         // We don't have anything left, it'll be in decoder or a ?
1559                         // don't fail if we are allowing overflows
1560                         if (decoder == null || decoder.MustFlush)
1561                         {
1562                             // We'll be a '?'
1563                             buffer.Fallback(ch);
1564                             // break if we fail & break if we don't (because !MoreData)
1565                             // Add succeeded, continue
1566                             break;
1567                         }
1568 
1569                         // Stick it in decoder
1570                         if (decoder != null)
1571                             decoder.ClearMustFlush();
1572 
1573                         if (chars != null)
1574                         {
1575                             decoder.bytesLeftOverCount = 1;
1576                             decoder.bytesLeftOver[0] = (byte)'~';
1577                             bUsedDecoder = true;
1578                         }
1579                         break;
1580                     }
1581 
1582                     // What type is it?, get 2nd byte
1583                     ch = buffer.GetNextByte();
1584 
1585                     if (ch == '~' && currentMode == ISO2022Modes.ModeASCII)
1586                     {
1587                         // Its just a ~~ replacement for ~, add it
1588                         if (!buffer.AddChar((char)ch, 2))
1589                             // Add failed, break for converting
1590                             break;
1591 
1592                         // Add succeeded, continue
1593                         continue;
1594                     }
1595                     else if (ch == '{')
1596                     {
1597                         // Switching to Double Byte mode
1598                         currentMode = ISO2022Modes.ModeHZ;
1599                         continue;
1600                     }
1601                     else if (ch == '}')
1602                     {
1603                         // Switching to ASCII mode
1604                         currentMode = ISO2022Modes.ModeASCII;
1605                         continue;
1606                     }
1607                     else if (ch == '\n')
1608                     {
1609                         // Ignore ~\n sequence
1610                         continue;
1611                     }
1612                     else
1613                     {
1614                         // Unknown escape, back up and try the '~' as a "normal" byte or lead byte
1615                         buffer.AdjustBytes(-1);
1616                         ch = (byte)'~';
1617                     }
1618                 }
1619 
1620                 // go ahead and add our data
1621                 if (currentMode != ISO2022Modes.ModeASCII)
1622                 {
1623                     // Should be ModeHZ
1624                     Contract.Assert(currentMode == ISO2022Modes.ModeHZ, "[ISO2022Encoding.GetCharsCP52936]Expected ModeHZ");
1625                     char cm;
1626 
1627                     // Everett allowed characters < 0x20 to be passed as if they were ASCII
1628                     if (ch < 0x20)
1629                     {
1630                         // Emit it as ASCII
1631                         goto STOREASCII;
1632                     }
1633 
1634                     // Its multibyte, should have another byte
1635                     if (!buffer.MoreData)
1636                     {
1637                         // No bytes left
1638                         // don't fail if we are allowing overflows
1639                         if (decoder == null || decoder.MustFlush)
1640                         {
1641                             // Not enough bytes, fallback lead byte
1642                             buffer.Fallback(ch);
1643 
1644                             // Break if we fail & break because !MoreData
1645                             break;
1646                         }
1647 
1648                         if (decoder != null)
1649                             decoder.ClearMustFlush();
1650 
1651                         // Stick it in decoder
1652                         if (chars != null)
1653                         {
1654                             decoder.bytesLeftOverCount = 1;
1655                             decoder.bytesLeftOver[0] = ch;
1656                             bUsedDecoder = true;
1657                         }
1658                         break;
1659                     }
1660 
1661                     // Everett uses space as an escape character for single SBCS bytes
1662                     byte ch2 = buffer.GetNextByte();
1663                     ushort iBytes = (ushort)(ch << 8 | ch2);
1664 
1665                     if (ch == ' ' && ch2 != 0)
1666                     {
1667                         // Get next char and treat it like ASCII (Everett treated space like an escape
1668                         // allowing the next char to be just ascii)
1669                         cm = (char)ch2;
1670                         goto STOREMULTIBYTE;
1671                     }
1672 
1673                     // Bytes should be in range: lead byte 0x21-0x77, trail byte: 0x21 - 0x7e
1674                     if ((ch < 0x21 || ch > 0x77 || ch2 < 0x21 || ch2 > 0x7e) &&
1675                     // Everett allowed high bit mappings for same characters (but only if both bits set)
1676                         (ch < 0xa1 || ch > 0xf7 || ch2 < 0xa1 || ch2 > 0xfe))
1677                     {
1678                         // For some reason Everett allowed XX20 to become unicode 3000... (ideo sp)
1679                         if (ch2 == 0x20 && 0x21 <= ch && ch <= 0x7d)
1680                         {
1681                             iBytes = 0x2121;
1682                             goto MULTIBYTE;
1683                         }
1684 
1685                         // Illegal char, use fallback.  If lead byte is 0 have to do it special and do it first
1686                         if (!buffer.Fallback((byte)(iBytes>>8), (byte)(iBytes)))
1687                             break;
1688                         continue;
1689                     }
1690 
1691                     MULTIBYTE:
1692                     iBytes |= 0x8080;
1693                     // Look up the multibyte char to stick it in our data
1694 
1695                     // We have a iBytes to try to convert.
1696                     cm = mapBytesToUnicode[iBytes];
1697 
1698                     STOREMULTIBYTE:
1699 
1700                     // See if it was unknown
1701                     if (cm == UNKNOWN_CHAR_FLAG && iBytes != 0)
1702                     {
1703                         // Fall back the unknown stuff
1704                         if (!buffer.Fallback((byte)(iBytes>>8), (byte)(iBytes)))
1705                             break;
1706                         continue;
1707                     }
1708 
1709                     if (!buffer.AddChar(cm, 2))
1710                         break;              // convert ran out of buffer, stop
1711                     continue;
1712                 }
1713 
1714                 // Just ASCII
1715                 // We allow some chars > 7f because everett did, so we have to look them up.
1716                 STOREASCII:
1717                 char c = mapBytesToUnicode[ch];
1718 
1719                 // Check if it was unknown
1720                 if ((c == UNKNOWN_CHAR_FLAG || c == 0) && (ch != 0))
1721                 {
1722                     // fallback the unkown bytes
1723                     if (!buffer.Fallback((byte)ch))
1724                         break;
1725                     continue;
1726                 }
1727 
1728                 // Go ahead and add our ASCII character
1729                 if (!buffer.AddChar(c))
1730                     break;                  // convert ran out of buffer, stop
1731             }
1732 
1733             // Need to remember our state, IF we're not counting
1734             if (chars != null && decoder != null)
1735             {
1736                 if (!bUsedDecoder)
1737                 {
1738                     // If we didn't use it, clear the byte left over
1739                     decoder.bytesLeftOverCount = 0;
1740                 }
1741 
1742                 if (decoder.MustFlush && decoder.bytesLeftOverCount == 0)
1743                 {
1744                     decoder.currentMode = ISO2022Modes.ModeASCII;
1745                 }
1746                 else
1747                 {
1748                     // Either not flushing or had state (from convert)
1749                     Contract.Assert(!decoder.MustFlush || !decoder.m_throwOnOverflow,
1750                         "[ISO2022Encoding.GetCharsCP52936]Expected no state or not converting or not flushing");
1751 
1752                     decoder.currentMode = currentMode;
1753                 }
1754                 decoder.m_bytesUsed = buffer.BytesUsed;
1755             }
1756 
1757             // Return # of characters we found
1758             return buffer.Count;
1759         }
1760 
1761         // Note: These all end up with 1/2 bytes of average byte count, so unless we're 1 we're always
1762         // charCount/2 bytes too big.
GetMaxByteCount(int charCount)1763         public override int GetMaxByteCount(int charCount)
1764         {
1765             if (charCount < 0)
1766                throw new ArgumentOutOfRangeException("charCount",
1767                     Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
1768             Contract.EndContractBlock();
1769 
1770             // Characters would be # of characters + 1 in case high surrogate is ? * max fallback
1771             long byteCount = (long)charCount + 1;
1772 
1773             if (EncoderFallback.MaxCharCount > 1)
1774                 byteCount *= EncoderFallback.MaxCharCount;
1775 
1776             // Start with just generic DBCS values (sort of).
1777             int perChar = 2;
1778             int extraStart = 0;
1779             int extraEnd = 0;
1780 
1781             switch (CodePage)
1782             {
1783                 case 50220:
1784                 case 50221:
1785                     // 2 bytes per char + 3 bytes switch to JIS 0208 or 1 byte + 3 bytes switch to 1 byte CP
1786                     perChar = 5;        // 5 max (4.5 average)
1787                     extraEnd = 3;       // 3 bytes to shift back to ASCII
1788                     break;
1789                 case 50222:
1790                     // 2 bytes per char + 3 bytes switch to JIS 0208 or 1 byte + 3 bytes switch to 1 byte CP
1791                     perChar = 5;        // 5 max (4.5 average)
1792                     extraEnd = 4;       // 1 byte to shift from Katakana -> DBCS, 3 bytes to shift back to ASCII from DBCS
1793                     break;
1794                 case 50225:
1795                     // 2 bytes per char + 1 byte SO, or 1 byte per char + 1 byte SI.
1796                     perChar = 3;        // 3 max, (2.5 average)
1797                     extraStart = 4;     // EUC-KR marker appears at beginning of file.
1798                     extraEnd = 1;       // 1 byte to shift back to ascii if necessary.
1799                     break;
1800                 case 52936:
1801                     // 2 bytes per char + 2 byte shift, or 1 byte + 1 byte shift
1802                     // Worst case: left over surrogate with no low surrogate is extra ?, could have to switch to ASCII, then could have HZ and flush to ASCII mode
1803                     perChar = 4;        // 4 max, (3.5 average if every other char is HZ/ASCII)
1804                     extraEnd = 2;       // 2 if we have to shift back to ASCII
1805                     break;
1806             }
1807 
1808             // Return our surrogate and End plus perChar for each char.
1809             byteCount *= perChar;
1810             byteCount += extraStart + extraEnd;
1811 
1812             if (byteCount > 0x7fffffff)
1813                 throw new ArgumentOutOfRangeException("charCount", Environment.GetResourceString("ArgumentOutOfRange_GetByteCountOverflow"));
1814 
1815             return (int)byteCount;
1816         }
1817 
GetMaxCharCount(int byteCount)1818         public override int GetMaxCharCount(int byteCount)
1819         {
1820             if (byteCount < 0)
1821                throw new ArgumentOutOfRangeException("byteCount",
1822                     Environment.GetResourceString("ArgumentOutOfRange_NeedNonNegNum"));
1823             Contract.EndContractBlock();
1824 
1825             int perChar = 1;
1826             int extraDecoder = 1;
1827 
1828             switch (CodePage)
1829             {
1830                 case 50220:
1831                 case 50221:
1832                 case 50222:
1833                 case 50225:
1834                     perChar = 1;        // Worst case all ASCII
1835                     extraDecoder = 3;   // Could have left over 3 chars of 4 char escape sequence, that all become ?
1836                     break;
1837                 case 52936:
1838                     perChar = 1;        // Worst case all ASCII
1839                     extraDecoder = 1;   // sequences are 2 chars, so if next one is illegal, then previous 1 could be ?
1840                     break;
1841             }
1842 
1843             // Figure out our length, perchar * char + whatever extra our decoder could do to us.
1844             long charCount = ((long)byteCount * perChar) + extraDecoder;
1845 
1846             // Just in case we have to fall back unknown ones.
1847             if (DecoderFallback.MaxCharCount > 1)
1848                 charCount *= DecoderFallback.MaxCharCount;
1849 
1850             if (charCount > 0x7fffffff)
1851                 throw new ArgumentOutOfRangeException("byteCount", Environment.GetResourceString("ArgumentOutOfRange_GetCharCountOverflow"));
1852 
1853             return (int)charCount;
1854         }
1855 
GetEncoder()1856         public override Encoder GetEncoder()
1857         {
1858             return new ISO2022Encoder(this);
1859         }
1860 
GetDecoder()1861         public override Decoder GetDecoder()
1862         {
1863             return new ISO2022Decoder(this);
1864         }
1865 
1866         [Serializable]
1867         internal class ISO2022Encoder : System.Text.EncoderNLS
1868         {
1869             internal ISO2022Modes currentMode;
1870             internal ISO2022Modes shiftInOutMode;
1871 
ISO2022Encoder(EncodingNLS encoding)1872             internal ISO2022Encoder(EncodingNLS encoding) : base(encoding)
1873             {
1874                 // base calls reset
1875             }
1876 
Reset()1877             public override void Reset()
1878             {
1879                 // Reset
1880                 currentMode = ISO2022Modes.ModeASCII;
1881                 shiftInOutMode = ISO2022Modes.ModeASCII;
1882                 charLeftOver = (char)0;
1883                 if (m_fallbackBuffer != null)
1884                     m_fallbackBuffer.Reset();
1885             }
1886 
1887             // Anything left in our encoder?
1888             internal override bool HasState
1889             {
1890                 get
1891                 {
1892                     // Don't check shift-out mode, it may be ascii (JP) or not (KR)
1893                     return (this.charLeftOver != (char)0 ||
1894                             currentMode != ISO2022Modes.ModeASCII);
1895                 }
1896             }
1897         }
1898 
1899         [Serializable]
1900         internal class ISO2022Decoder : System.Text.DecoderNLS
1901         {
1902             internal byte[] bytesLeftOver;
1903             internal int bytesLeftOverCount;
1904             internal ISO2022Modes currentMode;
1905             internal ISO2022Modes shiftInOutMode;
1906 
ISO2022Decoder(EncodingNLS encoding)1907             internal ISO2022Decoder(EncodingNLS encoding) : base(encoding)
1908             {
1909                 // base calls reset
1910             }
1911 
Reset()1912             public override void Reset()
1913             {
1914                 // Reset
1915                 bytesLeftOverCount = 0;
1916                 bytesLeftOver = new byte[4];
1917                 currentMode = ISO2022Modes.ModeASCII;
1918                 shiftInOutMode = ISO2022Modes.ModeASCII;
1919                 if (m_fallbackBuffer != null)
1920                     m_fallbackBuffer.Reset();
1921             }
1922 
1923             // Anything left in our decoder?
1924             internal override bool HasState
1925             {
1926                 get
1927                 {
1928                     // If have bytes left over or not shifted back to ASCII then have problem
1929                     return (this.bytesLeftOverCount != 0 ||
1930                             currentMode != ISO2022Modes.ModeASCII);
1931                 }
1932             }
1933         }
1934 
1935         static ushort[] HalfToFullWidthKanaTable =
1936         {
1937             0xa1a3, // 0x8ea1 : Halfwidth Ideographic Period
1938             0xa1d6, // 0x8ea2 : Halfwidth Opening Corner Bracket
1939             0xa1d7, // 0x8ea3 : Halfwidth Closing Corner Bracket
1940             0xa1a2, // 0x8ea4 : Halfwidth Ideographic Comma
1941             0xa1a6, // 0x8ea5 : Halfwidth Katakana Middle Dot
1942             0xa5f2, // 0x8ea6 : Halfwidth Katakana Wo
1943             0xa5a1, // 0x8ea7 : Halfwidth Katakana Small A
1944             0xa5a3, // 0x8ea8 : Halfwidth Katakana Small I
1945             0xa5a5, // 0x8ea9 : Halfwidth Katakana Small U
1946             0xa5a7, // 0x8eaa : Halfwidth Katakana Small E
1947             0xa5a9, // 0x8eab : Halfwidth Katakana Small O
1948             0xa5e3, // 0x8eac : Halfwidth Katakana Small Ya
1949             0xa5e5, // 0x8ead : Halfwidth Katakana Small Yu
1950             0xa5e7, // 0x8eae : Halfwidth Katakana Small Yo
1951             0xa5c3, // 0x8eaf : Halfwidth Katakana Small Tu
1952             0xa1bc, // 0x8eb0 : Halfwidth Katakana-Hiragana Prolonged Sound Mark
1953             0xa5a2, // 0x8eb1 : Halfwidth Katakana A
1954             0xa5a4, // 0x8eb2 : Halfwidth Katakana I
1955             0xa5a6, // 0x8eb3 : Halfwidth Katakana U
1956             0xa5a8, // 0x8eb4 : Halfwidth Katakana E
1957             0xa5aa, // 0x8eb5 : Halfwidth Katakana O
1958             0xa5ab, // 0x8eb6 : Halfwidth Katakana Ka
1959             0xa5ad, // 0x8eb7 : Halfwidth Katakana Ki
1960             0xa5af, // 0x8eb8 : Halfwidth Katakana Ku
1961             0xa5b1, // 0x8eb9 : Halfwidth Katakana Ke
1962             0xa5b3, // 0x8eba : Halfwidth Katakana Ko
1963             0xa5b5, // 0x8ebb : Halfwidth Katakana Sa
1964             0xa5b7, // 0x8ebc : Halfwidth Katakana Si
1965             0xa5b9, // 0x8ebd : Halfwidth Katakana Su
1966             0xa5bb, // 0x8ebe : Halfwidth Katakana Se
1967             0xa5bd, // 0x8ebf : Halfwidth Katakana So
1968             0xa5bf, // 0x8ec0 : Halfwidth Katakana Ta
1969             0xa5c1, // 0x8ec1 : Halfwidth Katakana Ti
1970             0xa5c4, // 0x8ec2 : Halfwidth Katakana Tu
1971             0xa5c6, // 0x8ec3 : Halfwidth Katakana Te
1972             0xa5c8, // 0x8ec4 : Halfwidth Katakana To
1973             0xa5ca, // 0x8ec5 : Halfwidth Katakana Na
1974             0xa5cb, // 0x8ec6 : Halfwidth Katakana Ni
1975             0xa5cc, // 0x8ec7 : Halfwidth Katakana Nu
1976             0xa5cd, // 0x8ec8 : Halfwidth Katakana Ne
1977             0xa5ce, // 0x8ec9 : Halfwidth Katakana No
1978             0xa5cf, // 0x8eca : Halfwidth Katakana Ha
1979             0xa5d2, // 0x8ecb : Halfwidth Katakana Hi
1980             0xa5d5, // 0x8ecc : Halfwidth Katakana Hu
1981             0xa5d8, // 0x8ecd : Halfwidth Katakana He
1982             0xa5db, // 0x8ece : Halfwidth Katakana Ho
1983             0xa5de, // 0x8ecf : Halfwidth Katakana Ma
1984             0xa5df, // 0x8ed0 : Halfwidth Katakana Mi
1985             0xa5e0, // 0x8ed1 : Halfwidth Katakana Mu
1986             0xa5e1, // 0x8ed2 : Halfwidth Katakana Me
1987             0xa5e2, // 0x8ed3 : Halfwidth Katakana Mo
1988             0xa5e4, // 0x8ed4 : Halfwidth Katakana Ya
1989             0xa5e6, // 0x8ed5 : Halfwidth Katakana Yu
1990             0xa5e8, // 0x8ed6 : Halfwidth Katakana Yo
1991             0xa5e9, // 0x8ed7 : Halfwidth Katakana Ra
1992             0xa5ea, // 0x8ed8 : Halfwidth Katakana Ri
1993             0xa5eb, // 0x8ed9 : Halfwidth Katakana Ru
1994             0xa5ec, // 0x8eda : Halfwidth Katakana Re
1995             0xa5ed, // 0x8edb : Halfwidth Katakana Ro
1996             0xa5ef, // 0x8edc : Halfwidth Katakana Wa
1997             0xa5f3, // 0x8edd : Halfwidth Katakana N
1998             0xa1ab, // 0x8ede : Halfwidth Katakana Voiced Sound Mark
1999             0xa1ac  // 0x8edf : Halfwidth Katakana Semi-Voiced Sound Mark
2000         };
2001     }
2002 }
2003 #endif // FEATURE_CODEPAGES_FILE
2004 
2005