1 // Licensed to the .NET Foundation under one or more agreements.
2 // The .NET Foundation licenses this file to you under the MIT license.
3 // See the LICENSE file in the project root for more information.
4 
5 using System.Collections.Generic;
6 using System.Globalization;
7 using System.Linq;
8 using System.Runtime.Serialization.Formatters.Tests;
9 using Xunit;
10 
11 namespace System.Text.Tests
12 {
13     public partial class EncodingTest : IClassFixture<CultureSetup>
14     {
EncodingTest(CultureSetup setup)15         public EncodingTest(CultureSetup setup)
16         {
17             // Setting up the culture happens externally, and only once, which is what we want.
18             // xUnit will keep track of it, do nothing.
19         }
20 
CodePageInfo()21         public static IEnumerable<object[]> CodePageInfo()
22         {
23             // The layout is code page, IANA(web) name, and query string.
24             // Query strings may be undocumented, and IANA names will be returned from Encoding objects.
25             // Entries are sorted by code page.
26             yield return new object[] { 37, "ibm037", "ibm037" };
27             yield return new object[] { 37, "ibm037", "cp037" };
28             yield return new object[] { 37, "ibm037", "csibm037" };
29             yield return new object[] { 37, "ibm037", "ebcdic-cp-ca" };
30             yield return new object[] { 37, "ibm037", "ebcdic-cp-nl" };
31             yield return new object[] { 37, "ibm037", "ebcdic-cp-us" };
32             yield return new object[] { 37, "ibm037", "ebcdic-cp-wt" };
33             yield return new object[] { 437, "ibm437", "ibm437" };
34             yield return new object[] { 437, "ibm437", "437" };
35             yield return new object[] { 437, "ibm437", "cp437" };
36             yield return new object[] { 437, "ibm437", "cspc8codepage437" };
37             yield return new object[] { 500, "ibm500", "ibm500" };
38             yield return new object[] { 500, "ibm500", "cp500" };
39             yield return new object[] { 500, "ibm500", "csibm500" };
40             yield return new object[] { 500, "ibm500", "ebcdic-cp-be" };
41             yield return new object[] { 500, "ibm500", "ebcdic-cp-ch" };
42             yield return new object[] { 708, "asmo-708", "asmo-708" };
43             yield return new object[] { 720, "dos-720", "dos-720" };
44             yield return new object[] { 737, "ibm737", "ibm737" };
45             yield return new object[] { 775, "ibm775", "ibm775" };
46             yield return new object[] { 850, "ibm850", "ibm850" };
47             yield return new object[] { 850, "ibm850", "cp850" };
48             yield return new object[] { 852, "ibm852", "ibm852" };
49             yield return new object[] { 852, "ibm852", "cp852" };
50             yield return new object[] { 855, "ibm855", "ibm855" };
51             yield return new object[] { 855, "ibm855", "cp855" };
52             yield return new object[] { 857, "ibm857", "ibm857" };
53             yield return new object[] { 857, "ibm857", "cp857" };
54             yield return new object[] { 858, "ibm00858", "ibm00858" };
55             yield return new object[] { 858, "ibm00858", "ccsid00858" };
56             yield return new object[] { 858, "ibm00858", "cp00858" };
57             yield return new object[] { 858, "ibm00858", "cp858" };
58             yield return new object[] { 858, "ibm00858", "pc-multilingual-850+euro" };
59             yield return new object[] { 860, "ibm860", "ibm860" };
60             yield return new object[] { 860, "ibm860", "cp860" };
61             yield return new object[] { 861, "ibm861", "ibm861" };
62             yield return new object[] { 861, "ibm861", "cp861" };
63             yield return new object[] { 862, "dos-862", "dos-862" };
64             yield return new object[] { 862, "dos-862", "cp862" };
65             yield return new object[] { 862, "dos-862", "ibm862" };
66             yield return new object[] { 863, "ibm863", "ibm863" };
67             yield return new object[] { 863, "ibm863", "cp863" };
68             yield return new object[] { 864, "ibm864", "ibm864" };
69             yield return new object[] { 864, "ibm864", "cp864" };
70             yield return new object[] { 865, "ibm865", "ibm865" };
71             yield return new object[] { 865, "ibm865", "cp865" };
72             yield return new object[] { 866, "cp866", "cp866" };
73             yield return new object[] { 866, "cp866", "ibm866" };
74             yield return new object[] { 869, "ibm869", "ibm869" };
75             yield return new object[] { 869, "ibm869", "cp869" };
76             yield return new object[] { 870, "ibm870", "ibm870" };
77             yield return new object[] { 870, "ibm870", "cp870" };
78             yield return new object[] { 870, "ibm870", "csibm870" };
79             yield return new object[] { 870, "ibm870", "ebcdic-cp-roece" };
80             yield return new object[] { 870, "ibm870", "ebcdic-cp-yu" };
81             yield return new object[] { 874, "windows-874", "windows-874" };
82             yield return new object[] { 874, "windows-874", "dos-874" };
83             yield return new object[] { 874, "windows-874", "iso-8859-11" };
84             yield return new object[] { 874, "windows-874", "tis-620" };
85             yield return new object[] { 875, "cp875", "cp875" };
86             yield return new object[] { 932, "shift_jis", "shift_jis" };
87             yield return new object[] { 932, "shift_jis", "csshiftjis" };
88             yield return new object[] { 932, "shift_jis", "cswindows31j" };
89             yield return new object[] { 932, "shift_jis", "ms_kanji" };
90             yield return new object[] { 932, "shift_jis", "shift-jis" };
91             yield return new object[] { 932, "shift_jis", "sjis" };
92             yield return new object[] { 932, "shift_jis", "x-ms-cp932" };
93             yield return new object[] { 932, "shift_jis", "x-sjis" };
94             yield return new object[] { 936, "gb2312", "gb2312" };
95             yield return new object[] { 936, "gb2312", "chinese" };
96             yield return new object[] { 936, "gb2312", "cn-gb" };
97             yield return new object[] { 936, "gb2312", "csgb2312" };
98             yield return new object[] { 936, "gb2312", "csgb231280" };
99             yield return new object[] { 936, "gb2312", "csiso58gb231280" };
100             yield return new object[] { 936, "gb2312", "gb_2312-80" };
101             yield return new object[] { 936, "gb2312", "gb231280" };
102             yield return new object[] { 936, "gb2312", "gb2312-80" };
103             yield return new object[] { 936, "gb2312", "gbk" };
104             yield return new object[] { 936, "gb2312", "iso-ir-58" };
105             yield return new object[] { 949, "ks_c_5601-1987", "ks_c_5601-1987" };
106             yield return new object[] { 949, "ks_c_5601-1987", "csksc56011987" };
107             yield return new object[] { 949, "ks_c_5601-1987", "iso-ir-149" };
108             yield return new object[] { 949, "ks_c_5601-1987", "korean" };
109             yield return new object[] { 949, "ks_c_5601-1987", "ks_c_5601" };
110             yield return new object[] { 949, "ks_c_5601-1987", "ks_c_5601_1987" };
111             yield return new object[] { 949, "ks_c_5601-1987", "ks_c_5601-1989" };
112             yield return new object[] { 949, "ks_c_5601-1987", "ksc_5601" };
113             yield return new object[] { 949, "ks_c_5601-1987", "ksc5601" };
114             yield return new object[] { 949, "ks_c_5601-1987", "ks-c5601" };
115             yield return new object[] { 949, "ks_c_5601-1987", "ks-c-5601" };
116             yield return new object[] { 950, "big5", "big5" };
117             yield return new object[] { 950, "big5", "big5-hkscs" };
118             yield return new object[] { 950, "big5", "cn-big5" };
119             yield return new object[] { 950, "big5", "csbig5" };
120             yield return new object[] { 950, "big5", "x-x-big5" };
121             yield return new object[] { 1026, "ibm1026", "ibm1026" };
122             yield return new object[] { 1026, "ibm1026", "cp1026" };
123             yield return new object[] { 1026, "ibm1026", "csibm1026" };
124             yield return new object[] { 1047, "ibm01047", "ibm01047" };
125             yield return new object[] { 1140, "ibm01140", "ibm01140" };
126             yield return new object[] { 1140, "ibm01140", "ccsid01140" };
127             yield return new object[] { 1140, "ibm01140", "cp01140" };
128             yield return new object[] { 1140, "ibm01140", "ebcdic-us-37+euro" };
129             yield return new object[] { 1141, "ibm01141", "ibm01141" };
130             yield return new object[] { 1141, "ibm01141", "ccsid01141" };
131             yield return new object[] { 1141, "ibm01141", "cp01141" };
132             yield return new object[] { 1141, "ibm01141", "ebcdic-de-273+euro" };
133             yield return new object[] { 1142, "ibm01142", "ibm01142" };
134             yield return new object[] { 1142, "ibm01142", "ccsid01142" };
135             yield return new object[] { 1142, "ibm01142", "cp01142" };
136             yield return new object[] { 1142, "ibm01142", "ebcdic-dk-277+euro" };
137             yield return new object[] { 1142, "ibm01142", "ebcdic-no-277+euro" };
138             yield return new object[] { 1143, "ibm01143", "ibm01143" };
139             yield return new object[] { 1143, "ibm01143", "ccsid01143" };
140             yield return new object[] { 1143, "ibm01143", "cp01143" };
141             yield return new object[] { 1143, "ibm01143", "ebcdic-fi-278+euro" };
142             yield return new object[] { 1143, "ibm01143", "ebcdic-se-278+euro" };
143             yield return new object[] { 1144, "ibm01144", "ibm01144" };
144             yield return new object[] { 1144, "ibm01144", "ccsid01144" };
145             yield return new object[] { 1144, "ibm01144", "cp01144" };
146             yield return new object[] { 1144, "ibm01144", "ebcdic-it-280+euro" };
147             yield return new object[] { 1145, "ibm01145", "ibm01145" };
148             yield return new object[] { 1145, "ibm01145", "ccsid01145" };
149             yield return new object[] { 1145, "ibm01145", "cp01145" };
150             yield return new object[] { 1145, "ibm01145", "ebcdic-es-284+euro" };
151             yield return new object[] { 1146, "ibm01146", "ibm01146" };
152             yield return new object[] { 1146, "ibm01146", "ccsid01146" };
153             yield return new object[] { 1146, "ibm01146", "cp01146" };
154             yield return new object[] { 1146, "ibm01146", "ebcdic-gb-285+euro" };
155             yield return new object[] { 1147, "ibm01147", "ibm01147" };
156             yield return new object[] { 1147, "ibm01147", "ccsid01147" };
157             yield return new object[] { 1147, "ibm01147", "cp01147" };
158             yield return new object[] { 1147, "ibm01147", "ebcdic-fr-297+euro" };
159             yield return new object[] { 1148, "ibm01148", "ibm01148" };
160             yield return new object[] { 1148, "ibm01148", "ccsid01148" };
161             yield return new object[] { 1148, "ibm01148", "cp01148" };
162             yield return new object[] { 1148, "ibm01148", "ebcdic-international-500+euro" };
163             yield return new object[] { 1149, "ibm01149", "ibm01149" };
164             yield return new object[] { 1149, "ibm01149", "ccsid01149" };
165             yield return new object[] { 1149, "ibm01149", "cp01149" };
166             yield return new object[] { 1149, "ibm01149", "ebcdic-is-871+euro" };
167             yield return new object[] { 1250, "windows-1250", "windows-1250" };
168             yield return new object[] { 1250, "windows-1250", "x-cp1250" };
169             yield return new object[] { 1251, "windows-1251", "windows-1251" };
170             yield return new object[] { 1251, "windows-1251", "x-cp1251" };
171             yield return new object[] { 1252, "windows-1252", "windows-1252" };
172             yield return new object[] { 1252, "windows-1252", "x-ansi" };
173             yield return new object[] { 1253, "windows-1253", "windows-1253" };
174             yield return new object[] { 1254, "windows-1254", "windows-1254" };
175             yield return new object[] { 1255, "windows-1255", "windows-1255" };
176             yield return new object[] { 1256, "windows-1256", "windows-1256" };
177             yield return new object[] { 1256, "windows-1256", "cp1256" };
178             yield return new object[] { 1257, "windows-1257", "windows-1257" };
179             yield return new object[] { 1258, "windows-1258", "windows-1258" };
180             yield return new object[] { 1361, "johab", "johab" };
181             yield return new object[] { 10000, "macintosh", "macintosh" };
182             yield return new object[] { 10001, "x-mac-japanese", "x-mac-japanese" };
183             yield return new object[] { 10002, "x-mac-chinesetrad", "x-mac-chinesetrad" };
184             yield return new object[] { 10003, "x-mac-korean", "x-mac-korean" };
185             yield return new object[] { 10004, "x-mac-arabic", "x-mac-arabic" };
186             yield return new object[] { 10005, "x-mac-hebrew", "x-mac-hebrew" };
187             yield return new object[] { 10006, "x-mac-greek", "x-mac-greek" };
188             yield return new object[] { 10007, "x-mac-cyrillic", "x-mac-cyrillic" };
189             yield return new object[] { 10008, "x-mac-chinesesimp", "x-mac-chinesesimp" };
190             yield return new object[] { 10010, "x-mac-romanian", "x-mac-romanian" };
191             yield return new object[] { 10017, "x-mac-ukrainian", "x-mac-ukrainian" };
192             yield return new object[] { 10021, "x-mac-thai", "x-mac-thai" };
193             yield return new object[] { 10029, "x-mac-ce", "x-mac-ce" };
194             yield return new object[] { 10079, "x-mac-icelandic", "x-mac-icelandic" };
195             yield return new object[] { 10081, "x-mac-turkish", "x-mac-turkish" };
196             yield return new object[] { 10082, "x-mac-croatian", "x-mac-croatian" };
197             yield return new object[] { 20000, "x-chinese-cns", "x-chinese-cns" };
198             yield return new object[] { 20001, "x-cp20001", "x-cp20001" };
199             yield return new object[] { 20002, "x-chinese-eten", "x-chinese-eten" };
200             yield return new object[] { 20003, "x-cp20003", "x-cp20003" };
201             yield return new object[] { 20004, "x-cp20004", "x-cp20004" };
202             yield return new object[] { 20005, "x-cp20005", "x-cp20005" };
203             yield return new object[] { 20105, "x-ia5", "x-ia5" };
204             yield return new object[] { 20105, "x-ia5", "irv" };
205             yield return new object[] { 20106, "x-ia5-german", "x-ia5-german" };
206             yield return new object[] { 20106, "x-ia5-german", "din_66003" };
207             yield return new object[] { 20106, "x-ia5-german", "german" };
208             yield return new object[] { 20107, "x-ia5-swedish", "x-ia5-swedish" };
209             yield return new object[] { 20107, "x-ia5-swedish", "sen_850200_b" };
210             yield return new object[] { 20107, "x-ia5-swedish", "swedish" };
211             yield return new object[] { 20108, "x-ia5-norwegian", "x-ia5-norwegian" };
212             yield return new object[] { 20108, "x-ia5-norwegian", "norwegian" };
213             yield return new object[] { 20108, "x-ia5-norwegian", "ns_4551-1" };
214             yield return new object[] { 20261, "x-cp20261", "x-cp20261" };
215             yield return new object[] { 20269, "x-cp20269", "x-cp20269" };
216             yield return new object[] { 20273, "ibm273", "ibm273" };
217             yield return new object[] { 20273, "ibm273", "cp273" };
218             yield return new object[] { 20273, "ibm273", "csibm273" };
219             yield return new object[] { 20277, "ibm277", "ibm277" };
220             yield return new object[] { 20277, "ibm277", "csibm277" };
221             yield return new object[] { 20277, "ibm277", "ebcdic-cp-dk" };
222             yield return new object[] { 20277, "ibm277", "ebcdic-cp-no" };
223             yield return new object[] { 20278, "ibm278", "ibm278" };
224             yield return new object[] { 20278, "ibm278", "cp278" };
225             yield return new object[] { 20278, "ibm278", "csibm278" };
226             yield return new object[] { 20278, "ibm278", "ebcdic-cp-fi" };
227             yield return new object[] { 20278, "ibm278", "ebcdic-cp-se" };
228             yield return new object[] { 20280, "ibm280", "ibm280" };
229             yield return new object[] { 20280, "ibm280", "cp280" };
230             yield return new object[] { 20280, "ibm280", "csibm280" };
231             yield return new object[] { 20280, "ibm280", "ebcdic-cp-it" };
232             yield return new object[] { 20284, "ibm284", "ibm284" };
233             yield return new object[] { 20284, "ibm284", "cp284" };
234             yield return new object[] { 20284, "ibm284", "csibm284" };
235             yield return new object[] { 20284, "ibm284", "ebcdic-cp-es" };
236             yield return new object[] { 20285, "ibm285", "ibm285" };
237             yield return new object[] { 20285, "ibm285", "cp285" };
238             yield return new object[] { 20285, "ibm285", "csibm285" };
239             yield return new object[] { 20285, "ibm285", "ebcdic-cp-gb" };
240             yield return new object[] { 20290, "ibm290", "ibm290" };
241             yield return new object[] { 20290, "ibm290", "cp290" };
242             yield return new object[] { 20290, "ibm290", "csibm290" };
243             yield return new object[] { 20290, "ibm290", "ebcdic-jp-kana" };
244             yield return new object[] { 20297, "ibm297", "ibm297" };
245             yield return new object[] { 20297, "ibm297", "cp297" };
246             yield return new object[] { 20297, "ibm297", "csibm297" };
247             yield return new object[] { 20297, "ibm297", "ebcdic-cp-fr" };
248             yield return new object[] { 20420, "ibm420", "ibm420" };
249             yield return new object[] { 20420, "ibm420", "cp420" };
250             yield return new object[] { 20420, "ibm420", "csibm420" };
251             yield return new object[] { 20420, "ibm420", "ebcdic-cp-ar1" };
252             yield return new object[] { 20423, "ibm423", "ibm423" };
253             yield return new object[] { 20423, "ibm423", "cp423" };
254             yield return new object[] { 20423, "ibm423", "csibm423" };
255             yield return new object[] { 20423, "ibm423", "ebcdic-cp-gr" };
256             yield return new object[] { 20424, "ibm424", "ibm424" };
257             yield return new object[] { 20424, "ibm424", "cp424" };
258             yield return new object[] { 20424, "ibm424", "csibm424" };
259             yield return new object[] { 20424, "ibm424", "ebcdic-cp-he" };
260             yield return new object[] { 20833, "x-ebcdic-koreanextended", "x-ebcdic-koreanextended" };
261             yield return new object[] { 20838, "ibm-thai", "ibm-thai" };
262             yield return new object[] { 20838, "ibm-thai", "csibmthai" };
263             yield return new object[] { 20866, "koi8-r", "koi8-r" };
264             yield return new object[] { 20866, "koi8-r", "cskoi8r" };
265             yield return new object[] { 20866, "koi8-r", "koi" };
266             yield return new object[] { 20866, "koi8-r", "koi8" };
267             yield return new object[] { 20866, "koi8-r", "koi8r" };
268             yield return new object[] { 20871, "ibm871", "ibm871" };
269             yield return new object[] { 20871, "ibm871", "cp871" };
270             yield return new object[] { 20871, "ibm871", "csibm871" };
271             yield return new object[] { 20871, "ibm871", "ebcdic-cp-is" };
272             yield return new object[] { 20880, "ibm880", "ibm880" };
273             yield return new object[] { 20880, "ibm880", "cp880" };
274             yield return new object[] { 20880, "ibm880", "csibm880" };
275             yield return new object[] { 20880, "ibm880", "ebcdic-cyrillic" };
276             yield return new object[] { 20905, "ibm905", "ibm905" };
277             yield return new object[] { 20905, "ibm905", "cp905" };
278             yield return new object[] { 20905, "ibm905", "csibm905" };
279             yield return new object[] { 20905, "ibm905", "ebcdic-cp-tr" };
280             yield return new object[] { 20924, "ibm00924", "ibm00924" };
281             yield return new object[] { 20924, "ibm00924", "ccsid00924" };
282             yield return new object[] { 20924, "ibm00924", "cp00924" };
283             yield return new object[] { 20924, "ibm00924", "ebcdic-latin9--euro" };
284             yield return new object[] { 20932, "euc-jp", "euc-jp" };
285             yield return new object[] { 20936, "x-cp20936", "x-cp20936" };
286             yield return new object[] { 20949, "x-cp20949", "x-cp20949" };
287             yield return new object[] { 21025, "cp1025", "cp1025" };
288             yield return new object[] { 21866, "koi8-u", "koi8-u" };
289             yield return new object[] { 21866, "koi8-u", "koi8-ru" };
290             yield return new object[] { 28592, "iso-8859-2", "iso-8859-2" };
291             yield return new object[] { 28592, "iso-8859-2", "csisolatin2" };
292             yield return new object[] { 28592, "iso-8859-2", "iso_8859-2" };
293             yield return new object[] { 28592, "iso-8859-2", "iso_8859-2:1987" };
294             yield return new object[] { 28592, "iso-8859-2", "iso8859-2" };
295             yield return new object[] { 28592, "iso-8859-2", "iso-ir-101" };
296             yield return new object[] { 28592, "iso-8859-2", "l2" };
297             yield return new object[] { 28592, "iso-8859-2", "latin2" };
298             yield return new object[] { 28593, "iso-8859-3", "iso-8859-3" };
299             yield return new object[] { 28593, "iso-8859-3", "csisolatin3" };
300             yield return new object[] { 28593, "iso-8859-3", "iso_8859-3" };
301             yield return new object[] { 28593, "iso-8859-3", "iso_8859-3:1988" };
302             yield return new object[] { 28593, "iso-8859-3", "iso-ir-109" };
303             yield return new object[] { 28593, "iso-8859-3", "l3" };
304             yield return new object[] { 28593, "iso-8859-3", "latin3" };
305             yield return new object[] { 28594, "iso-8859-4", "iso-8859-4" };
306             yield return new object[] { 28594, "iso-8859-4", "csisolatin4" };
307             yield return new object[] { 28594, "iso-8859-4", "iso_8859-4" };
308             yield return new object[] { 28594, "iso-8859-4", "iso_8859-4:1988" };
309             yield return new object[] { 28594, "iso-8859-4", "iso-ir-110" };
310             yield return new object[] { 28594, "iso-8859-4", "l4" };
311             yield return new object[] { 28594, "iso-8859-4", "latin4" };
312             yield return new object[] { 28595, "iso-8859-5", "iso-8859-5" };
313             yield return new object[] { 28595, "iso-8859-5", "csisolatincyrillic" };
314             yield return new object[] { 28595, "iso-8859-5", "cyrillic" };
315             yield return new object[] { 28595, "iso-8859-5", "iso_8859-5" };
316             yield return new object[] { 28595, "iso-8859-5", "iso_8859-5:1988" };
317             yield return new object[] { 28595, "iso-8859-5", "iso-ir-144" };
318             yield return new object[] { 28596, "iso-8859-6", "iso-8859-6" };
319             yield return new object[] { 28596, "iso-8859-6", "arabic" };
320             yield return new object[] { 28596, "iso-8859-6", "csisolatinarabic" };
321             yield return new object[] { 28596, "iso-8859-6", "ecma-114" };
322             yield return new object[] { 28596, "iso-8859-6", "iso_8859-6" };
323             yield return new object[] { 28596, "iso-8859-6", "iso_8859-6:1987" };
324             yield return new object[] { 28596, "iso-8859-6", "iso-ir-127" };
325             yield return new object[] { 28597, "iso-8859-7", "iso-8859-7" };
326             yield return new object[] { 28597, "iso-8859-7", "csisolatingreek" };
327             yield return new object[] { 28597, "iso-8859-7", "ecma-118" };
328             yield return new object[] { 28597, "iso-8859-7", "elot_928" };
329             yield return new object[] { 28597, "iso-8859-7", "greek" };
330             yield return new object[] { 28597, "iso-8859-7", "greek8" };
331             yield return new object[] { 28597, "iso-8859-7", "iso_8859-7" };
332             yield return new object[] { 28597, "iso-8859-7", "iso_8859-7:1987" };
333             yield return new object[] { 28597, "iso-8859-7", "iso-ir-126" };
334             yield return new object[] { 28598, "iso-8859-8", "iso-8859-8" };
335             yield return new object[] { 28598, "iso-8859-8", "csisolatinhebrew" };
336             yield return new object[] { 28598, "iso-8859-8", "hebrew" };
337             yield return new object[] { 28598, "iso-8859-8", "iso_8859-8" };
338             yield return new object[] { 28598, "iso-8859-8", "iso_8859-8:1988" };
339             yield return new object[] { 28598, "iso-8859-8", "iso-8859-8 visual" };
340             yield return new object[] { 28598, "iso-8859-8", "iso-ir-138" };
341             yield return new object[] { 28598, "iso-8859-8", "logical" };
342             yield return new object[] { 28598, "iso-8859-8", "visual" };
343             yield return new object[] { 28599, "iso-8859-9", "iso-8859-9" };
344             yield return new object[] { 28599, "iso-8859-9", "csisolatin5" };
345             yield return new object[] { 28599, "iso-8859-9", "iso_8859-9" };
346             yield return new object[] { 28599, "iso-8859-9", "iso_8859-9:1989" };
347             yield return new object[] { 28599, "iso-8859-9", "iso-ir-148" };
348             yield return new object[] { 28599, "iso-8859-9", "l5" };
349             yield return new object[] { 28599, "iso-8859-9", "latin5" };
350             yield return new object[] { 28603, "iso-8859-13", "iso-8859-13" };
351             yield return new object[] { 28605, "iso-8859-15", "iso-8859-15" };
352             yield return new object[] { 28605, "iso-8859-15", "csisolatin9" };
353             yield return new object[] { 28605, "iso-8859-15", "iso_8859-15" };
354             yield return new object[] { 28605, "iso-8859-15", "l9" };
355             yield return new object[] { 28605, "iso-8859-15", "latin9" };
356             yield return new object[] { 29001, "x-europa", "x-europa" };
357             yield return new object[] { 38598, "iso-8859-8-i", "iso-8859-8-i" };
358             yield return new object[] { 50220, "iso-2022-jp", "iso-2022-jp" };
359             yield return new object[] { 50221, "csiso2022jp", "csiso2022jp" };
360             yield return new object[] { 50222, "iso-2022-jp", "iso-2022-jp" };
361             yield return new object[] { 50225, "iso-2022-kr", "iso-2022-kr" };
362             yield return new object[] { 50225, "iso-2022-kr", "csiso2022kr" };
363             yield return new object[] { 50225, "iso-2022-kr", "iso-2022-kr-7" };
364             yield return new object[] { 50225, "iso-2022-kr", "iso-2022-kr-7bit" };
365             yield return new object[] { 50227, "x-cp50227", "x-cp50227" };
366             yield return new object[] { 50227, "x-cp50227", "cp50227" };
367             yield return new object[] { 51932, "euc-jp", "euc-jp" };
368             yield return new object[] { 51932, "euc-jp", "cseucpkdfmtjapanese" };
369             yield return new object[] { 51932, "euc-jp", "extended_unix_code_packed_format_for_japanese" };
370             yield return new object[] { 51932, "euc-jp", "iso-2022-jpeuc" };
371             yield return new object[] { 51932, "euc-jp", "x-euc" };
372             yield return new object[] { 51932, "euc-jp", "x-euc-jp" };
373             yield return new object[] { 51936, "euc-cn", "euc-cn" };
374             yield return new object[] { 51936, "euc-cn", "x-euc-cn" };
375             yield return new object[] { 51949, "euc-kr", "euc-kr" };
376             yield return new object[] { 51949, "euc-kr", "cseuckr" };
377             yield return new object[] { 51949, "euc-kr", "iso-2022-kr-8" };
378             yield return new object[] { 51949, "euc-kr", "iso-2022-kr-8bit" };
379             yield return new object[] { 52936, "hz-gb-2312", "hz-gb-2312" };
380             yield return new object[] { 54936, "gb18030", "gb18030" };
381             yield return new object[] { 57002, "x-iscii-de", "x-iscii-de" };
382             yield return new object[] { 57003, "x-iscii-be", "x-iscii-be" };
383             yield return new object[] { 57004, "x-iscii-ta", "x-iscii-ta" };
384             yield return new object[] { 57005, "x-iscii-te", "x-iscii-te" };
385             yield return new object[] { 57006, "x-iscii-as", "x-iscii-as" };
386             yield return new object[] { 57007, "x-iscii-or", "x-iscii-or" };
387             yield return new object[] { 57008, "x-iscii-ka", "x-iscii-ka" };
388             yield return new object[] { 57009, "x-iscii-ma", "x-iscii-ma" };
389             yield return new object[] { 57010, "x-iscii-gu", "x-iscii-gu" };
390             yield return new object[] { 57011, "x-iscii-pa", "x-iscii-pa" };
391         }
392 
SpecificCodepageEncodings()393         public static IEnumerable<object[]> SpecificCodepageEncodings()
394         {
395             // Layout is codepage encoding, bytes, and matching unicode string.
396             yield return new object[] { "Windows-1256", new byte[] { 0xC7, 0xE1, 0xE1, 0xE5, 0x20, 0xC7, 0xCD, 0xCF }, "\x0627\x0644\x0644\x0647\x0020\x0627\x062D\x062F" };
397             yield return new object[] {"Windows-1252", new byte[] { 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF } ,
398                     "\x00D0\x00D1\x00D2\x00D3\x00D4\x00D5\x00D6\x00D7\x00D8\x00D9\x00DA\x00DB\x00DC\x00DD\x00DE\x00DF"};
399             yield return new object[] { "GB2312", new byte[] { 0xCD, 0xE2, 0xCD, 0xE3, 0xCD, 0xE4 }, "\x5916\x8C4C\x5F2F" };
400             yield return new object[] {"GB18030", new byte[] { 0x81, 0x30, 0x89, 0x37, 0x81, 0x30, 0x89, 0x38, 0xA8, 0xA4, 0xA8, 0xA2, 0x81, 0x30, 0x89, 0x39, 0x81, 0x30, 0x8A, 0x30 } ,
401                     "\x00DE\x00DF\x00E0\x00E1\x00E2\x00E3"};
402         }
403 
MultibyteCharacterEncodings()404         public static IEnumerable<object[]> MultibyteCharacterEncodings()
405         {
406             // Layout is the encoding, bytes, and expected result.
407             yield return new object[] { "iso-2022-jp",
408                 new byte[] { 0xA,
409                     0x1B, 0x24, 0x42, 0x25, 0x4A, 0x25, 0x4A,
410                     0x1B, 0x28, 0x42,
411                     0x1B, 0x24, 0x42, 0x25, 0x4A,
412                     0x1B, 0x28, 0x42,
413                     0x1B, 0x24, 0x42, 0x25, 0x4A,
414                     0x1B, 0x28, 0x42,
415                     0x1B, 0x1, 0x2, 0x3, 0x4,
416                     0x1B, 0x24, 0x42, 0x25, 0x4A, 0x0E, 0x25, 0x4A,
417                     0x1B, 0x28, 0x42, 0x41, 0x42, 0x0E, 0x25, 0x0F, 0x43 },
418                 new int[] { 0xA, 0x30CA, 0x30CA, 0x30CA, 0x30CA, 0x1B, 0x1, 0x2, 0x3, 0x4,
419                     0x30CA, 0xFF65, 0xFF8A, 0x41, 0x42, 0xFF65, 0x43}
420             };
421 
422             yield return new object[] { "GB18030",
423                 new byte[] { 0x41, 0x42, 0x43, 0x81, 0x40, 0x82, 0x80, 0x81, 0x30, 0x82, 0x31, 0x81, 0x20 },
424                  new int[] { 0x41, 0x42, 0x43, 0x4E02, 0x500B, 0x8B, 0x3F, 0x20 }
425             };
426 
427             yield return new object[] { "shift_jis",
428                 new byte[] { 0x41, 0x42, 0x43, 0x81, 0x42, 0xE0, 0x43, 0x44, 0x45 },
429                 new int[] { 0x41, 0x42, 0x43, 0x3002, 0x6F86, 0x44, 0x45 }
430             };
431 
432             yield return new object[] { "iso-2022-kr",
433                 new byte[] { 0x0E, 0x21, 0x7E, 0x1B, 0x24, 0x29, 0x43, 0x21, 0x7E, 0x0F, 0x21, 0x7E, 0x1B, 0x24, 0x29, 0x43, 0x21, 0x7E },
434                 new int[] { 0xFFE2, 0xFFE2, 0x21, 0x7E, 0x21, 0x7E }
435             };
436 
437             yield return new object[] { "hz-gb-2312",
438                 new byte[] { 0x7E, 0x42, 0x7E, 0x7E, 0x7E, 0x7B, 0x21, 0x7E, 0x7E, 0x7D, 0x42, 0x42, 0x7E, 0xA, 0x43, 0x43 },
439                 new int[] { 0x7E, 0x42, 0x7E, 0x3013, 0x42, 0x42, 0x43, 0x43, }
440             };
441         }
442 
CrossplatformDefaultEncodings()443         private static IEnumerable<KeyValuePair<int, string>> CrossplatformDefaultEncodings()
444         {
445             yield return Map(1200, "utf-16");
446             yield return Map(12000, "utf-32");
447             yield return Map(20127, "us-ascii");
448             yield return Map(65000, "utf-7");
449             yield return Map(65001, "utf-8");
450         }
451 
Map(int codePage, string webName)452         private static KeyValuePair<int, string> Map(int codePage, string webName)
453         {
454             return new KeyValuePair<int, string>(codePage, webName);
455         }
456 
457         [Fact]
TestDefaultEncodings()458         public static void TestDefaultEncodings()
459         {
460             ValidateDefaultEncodings();
461 
462             // The default encoding should be something from the known list.
463             Encoding defaultEncoding = Encoding.GetEncoding(0);
464             Assert.NotNull(defaultEncoding);
465             KeyValuePair<int, string> mappedEncoding = Map(defaultEncoding.CodePage, defaultEncoding.WebName);
466 
467             if (defaultEncoding.CodePage == Encoding.UTF8.CodePage)
468             {
469                 // if the default encoding is not UTF8 that means either we are running on the full framework
470                 // or the encoding provider is registered throw the call Encoding.RegisterProvider.
471                 // at that time we shouldn't expect exceptions when creating the following encodings.
472                 foreach (object[] mapping in CodePageInfo())
473                 {
474                     Assert.Throws<NotSupportedException>(() => Encoding.GetEncoding((int)mapping[0]));
475                     AssertExtensions.Throws<ArgumentException>("name", () => Encoding.GetEncoding((string)mapping[2]));
476                 }
477 
478                 // Currently the class EncodingInfo isn't present in corefx, so this checks none of the code pages are present.
479                 // When it is, comment out this line and remove the previous foreach/assert.
480                 // Assert.Equal(CrossplatformDefaultEncodings, Encoding.GetEncodings().OrderBy(i => i.CodePage).Select(i => Map(i.CodePage, i.WebName)));
481 
482                 Assert.Contains(mappedEncoding, CrossplatformDefaultEncodings());
483             }
484 
485             // Add the code page provider.
486             Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
487 
488             // Make sure added code pages are identical between the provider and the Encoding class.
489             foreach (object[] mapping in CodePageInfo())
490             {
491                 Encoding encoding = Encoding.GetEncoding((int)mapping[0]);
492 
493                 Encoding codePageEncoding = CodePagesEncodingProvider.Instance.GetEncoding((int)mapping[0]);
494                 Assert.Equal(encoding, codePageEncoding);
495                 Assert.Equal(encoding.CodePage, (int)mapping[0]);
496                 Assert.Equal(encoding.WebName, (string)mapping[1]);
497 
498                 // If available, validate serializing and deserializing with BinaryFormatter
499                 ValidateSerializeDeserialize(encoding);
500 
501                 // Get encoding via query string.
502                 Assert.Equal(Encoding.GetEncoding((string)mapping[2]), CodePagesEncodingProvider.Instance.GetEncoding((string)mapping[2]));
503             }
504             // Adding the code page provider should keep the originals, too.
505             ValidateDefaultEncodings();
506             // Currently the class EncodingInfo isn't present in corefx, so this checks the complete list
507             // When it is, comment out this line and remove the previous foreach/assert.
508             // Assert.Equal(CrossplatformDefaultEncodings().Union(CodePageInfo().Select(i => Map((int)i[0], (string)i[1])).OrderBy(i => i.Key)),
509             //               Encoding.GetEncodings().OrderBy(i => i.CodePage).Select(i => Map(i.CodePage, i.WebName)));
510 
511             // Default encoding may have changed, should still be something on the combined list.
512             defaultEncoding = Encoding.GetEncoding(0);
513             Assert.NotNull(defaultEncoding);
514             mappedEncoding = Map(defaultEncoding.CodePage, defaultEncoding.WebName);
515             Assert.Contains(mappedEncoding, CrossplatformDefaultEncodings().Union(CodePageInfo().Select(i => Map((int)i[0], (string)i[1]))));
516 
517             TestRegister1252();
518         }
519 
ValidateSerializeDeserialize(Encoding e)520         static void ValidateSerializeDeserialize(Encoding e)
521         {
522             // Make sure the Encoding roundtrips
523             Assert.Equal(e, BinaryFormatterHelpers.Clone(e));
524 
525             // Get an encoder and decoder from the encoding, and clone them
526             Encoder origEncoder = e.GetEncoder();
527             Decoder origDecoder = e.GetDecoder();
528             Encoder clonedEncoder = BinaryFormatterHelpers.Clone(origEncoder);
529             Decoder clonedDecoder = BinaryFormatterHelpers.Clone(origDecoder);
530 
531             // Encode and decode some text with each pairing
532             const string InputText = "abcdefghijklmnopqrstuvwxyz";
533             char[] inputTextChars = InputText.ToCharArray();
534             var pairs = new[]
535             {
536                 Tuple.Create(origEncoder, origDecoder),
537                 Tuple.Create(origEncoder, clonedDecoder),
538                 Tuple.Create(clonedEncoder, origDecoder),
539                 Tuple.Create(clonedEncoder, clonedDecoder),
540             };
541             var results = new List<char[]>();
542             foreach (Tuple<Encoder, Decoder> pair in pairs)
543             {
544                 byte[] encodedBytes = new byte[pair.Item1.GetByteCount(inputTextChars, 0, inputTextChars.Length, true)];
545                 Assert.Equal(encodedBytes.Length, pair.Item1.GetBytes(inputTextChars, 0, inputTextChars.Length, encodedBytes, 0, true));
546                 char[] decodedChars = new char[pair.Item2.GetCharCount(encodedBytes, 0, encodedBytes.Length)];
547                 Assert.Equal(decodedChars.Length, pair.Item2.GetChars(encodedBytes, 0, encodedBytes.Length, decodedChars, 0));
548                 results.Add(decodedChars);
549             }
550 
551             // Validate that all of the pairings produced the same results
552             foreach (char[] a in results)
553             {
554                 foreach (char[] b in results)
555                 {
556                     Assert.Equal(a, b);
557                 }
558             }
559         }
560 
ValidateDefaultEncodings()561         private static void ValidateDefaultEncodings()
562         {
563             foreach (var mapping in CrossplatformDefaultEncodings())
564             {
565                 Encoding encoding = Encoding.GetEncoding(mapping.Key);
566                 Assert.NotNull(encoding);
567                 Assert.Equal(encoding, Encoding.GetEncoding(mapping.Value));
568                 Assert.Equal(mapping.Value, encoding.WebName);
569             }
570         }
571 
572         [Theory]
573         [MemberData(nameof(SpecificCodepageEncodings))]
TestRoundtrippingSpecificCodepageEncoding(string encodingName, byte[] bytes, string expected)574         public static void TestRoundtrippingSpecificCodepageEncoding(string encodingName, byte[] bytes, string expected)
575         {
576             Encoding encoding = CodePagesEncodingProvider.Instance.GetEncoding(encodingName);
577             string encoded = encoding.GetString(bytes, 0, bytes.Length);
578             Assert.Equal(expected, encoded);
579             Assert.Equal(bytes, encoding.GetBytes(encoded));
580             byte[] resultBytes = encoding.GetBytes(encoded);
581         }
582 
583         [Theory]
584         [MemberData(nameof(CodePageInfo))]
TestCodepageEncoding(int codePage, string webName, string queryString)585         public static void TestCodepageEncoding(int codePage, string webName, string queryString)
586         {
587             Encoding encoding;
588             // There are two names that have duplicate associated CodePages. For those two names,
589             // we have to test with the expectation that querying the name will always return the
590             // same codepage.
591             if (codePage != 20932 && codePage != 50222)
592             {
593                 encoding = CodePagesEncodingProvider.Instance.GetEncoding(queryString);
594                 Assert.Equal(encoding, CodePagesEncodingProvider.Instance.GetEncoding(codePage));
595                 Assert.Equal(encoding, CodePagesEncodingProvider.Instance.GetEncoding(webName));
596             }
597             else
598             {
599                 encoding = CodePagesEncodingProvider.Instance.GetEncoding(codePage);
600                 Assert.NotEqual(encoding, CodePagesEncodingProvider.Instance.GetEncoding(queryString));
601                 Assert.NotEqual(encoding, CodePagesEncodingProvider.Instance.GetEncoding(webName));
602             }
603 
604             Assert.NotNull(encoding);
605             Assert.Equal(codePage, encoding.CodePage);
606             Assert.Equal(webName, encoding.WebName);
607 
608             // Small round-trip for ASCII alphanumeric range (some code pages use different punctuation!)
609             // Start with space.
610             string asciiPrintable = " 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
611             char[] traveled = encoding.GetChars(encoding.GetBytes(asciiPrintable));
612             Assert.Equal(asciiPrintable.ToCharArray(), traveled);
613         }
614 
615         [Theory]
616         [MemberData(nameof(MultibyteCharacterEncodings))]
TestSpecificMultibyteCharacterEncodings(string codepageName, byte[] bytes, int[] expected)617         public static void TestSpecificMultibyteCharacterEncodings(string codepageName, byte[] bytes, int[] expected)
618         {
619             Decoder decoder = CodePagesEncodingProvider.Instance.GetEncoding(codepageName).GetDecoder();
620             char[] buffer = new char[expected.Length];
621 
622             for (int byteIndex = 0, charIndex = 0, charCount = 0; byteIndex < bytes.Length; byteIndex++, charIndex += charCount)
623             {
624                 charCount = decoder.GetChars(bytes, byteIndex, 1, buffer, charIndex);
625             }
626 
627             Assert.Equal(expected, buffer.Select(c => (int)c));
628         }
629 
630         [Theory]
631         [MemberData(nameof(CodePageInfo))]
TestEncodingDisplayNames(int codePage, string webName, string queryString)632         public static void TestEncodingDisplayNames(int codePage, string webName, string queryString)
633         {
634             var encoding = CodePagesEncodingProvider.Instance.GetEncoding(codePage);
635 
636             string name = encoding.EncodingName;
637 
638             // Names can't be empty, and must be printable characters.
639             Assert.False(string.IsNullOrWhiteSpace(name));
640             Assert.All(name, c => Assert.True(c >= ' ' && c < '~' + 1, "Name: " + name + " contains character: " + c));
641         }
642 
643         // This test is run as part of the default mappings test, since it modifies global state which that test
644         // depends on.
TestRegister1252()645         public static void TestRegister1252()
646         {
647             // This test case ensure we can map all 1252 codepage codepoints without any exception.
648             string s1252Result =
649             "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009\u000a\u000b\u000c\u000d\u000e\u000f" +
650             "\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f" +
651             "\u0020\u0021\u0022\u0023\u0024\u0025\u0026\u0027\u0028\u0029\u002a\u002b\u002c\u002d\u002e\u002f" +
652             "\u0030\u0031\u0032\u0033\u0034\u0035\u0036\u0037\u0038\u0039\u003a\u003b\u003c\u003d\u003e\u003f" +
653             "\u0040\u0041\u0042\u0043\u0044\u0045\u0046\u0047\u0048\u0049\u004a\u004b\u004c\u004d\u004e\u004f" +
654             "\u0050\u0051\u0052\u0053\u0054\u0055\u0056\u0057\u0058\u0059\u005a\u005b\u005c\u005d\u005e\u005f" +
655             "\u0060\u0061\u0062\u0063\u0064\u0065\u0066\u0067\u0068\u0069\u006a\u006b\u006c\u006d\u006e\u006f" +
656             "\u0070\u0071\u0072\u0073\u0074\u0075\u0076\u0077\u0078\u0079\u007a\u007b\u007c\u007d\u007e\u007f" +
657             "\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u0160\u2039\u0152\u008d\u017d\u008f" +
658             "\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u0161\u203a\u0153\u009d\u017e\u0178" +
659             "\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af" +
660             "\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf" +
661             "\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf" +
662             "\u00d0\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u00de\u00df" +
663             "\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef" +
664             "\u00f0\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u00fe\u00ff";
665 
666             Encoding.RegisterProvider(CodePagesEncodingProvider.Instance);
667             Encoding win1252 = Encoding.GetEncoding("windows-1252", EncoderFallback.ExceptionFallback, DecoderFallback.ExceptionFallback);
668             byte[] enc = new byte[256];
669             for (int j = 0; j < 256; j++)
670             {
671                 enc[j] = (byte)j;
672             }
673 
674             Assert.Equal(s1252Result, win1252.GetString(enc));
675         }
676 
677     }
678 
679     public class CultureSetup : IDisposable
680     {
681         private readonly CultureInfo _originalUICulture;
682 
CultureSetup()683         public CultureSetup()
684         {
685             _originalUICulture = CultureInfo.CurrentUICulture;
686             CultureInfo.CurrentUICulture = new CultureInfo("en-US");
687         }
688 
Dispose()689         public void Dispose()
690         {
691             CultureInfo.CurrentUICulture = _originalUICulture;
692         }
693     }
694 }
695