1 // Licensed to the .NET Foundation under one or more agreements. 2 // The .NET Foundation licenses this file to you under the MIT license. 3 // See the LICENSE file in the project root for more information. 4 5 using System.Collections.Generic; 6 using System.Globalization; 7 using System.Linq; 8 using System.Runtime.Serialization.Formatters.Tests; 9 using Xunit; 10 11 namespace System.Text.Tests 12 { 13 public partial class EncodingTest : IClassFixture<CultureSetup> 14 { EncodingTest(CultureSetup setup)15 public EncodingTest(CultureSetup setup) 16 { 17 // Setting up the culture happens externally, and only once, which is what we want. 18 // xUnit will keep track of it, do nothing. 19 } 20 CodePageInfo()21 public static IEnumerable<object[]> CodePageInfo() 22 { 23 // The layout is code page, IANA(web) name, and query string. 24 // Query strings may be undocumented, and IANA names will be returned from Encoding objects. 25 // Entries are sorted by code page. 26 yield return new object[] { 37, "ibm037", "ibm037" }; 27 yield return new object[] { 37, "ibm037", "cp037" }; 28 yield return new object[] { 37, "ibm037", "csibm037" }; 29 yield return new object[] { 37, "ibm037", "ebcdic-cp-ca" }; 30 yield return new object[] { 37, "ibm037", "ebcdic-cp-nl" }; 31 yield return new object[] { 37, "ibm037", "ebcdic-cp-us" }; 32 yield return new object[] { 37, "ibm037", "ebcdic-cp-wt" }; 33 yield return new object[] { 437, "ibm437", "ibm437" }; 34 yield return new object[] { 437, "ibm437", "437" }; 35 yield return new object[] { 437, "ibm437", "cp437" }; 36 yield return new object[] { 437, "ibm437", "cspc8codepage437" }; 37 yield return new object[] { 500, "ibm500", "ibm500" }; 38 yield return new object[] { 500, "ibm500", "cp500" }; 39 yield return new object[] { 500, "ibm500", "csibm500" }; 40 yield return new object[] { 500, "ibm500", "ebcdic-cp-be" }; 41 yield return new object[] { 500, "ibm500", "ebcdic-cp-ch" }; 42 yield return new object[] { 708, "asmo-708", "asmo-708" }; 43 yield return new object[] { 720, "dos-720", "dos-720" }; 44 yield return new object[] { 737, "ibm737", "ibm737" }; 45 yield return new object[] { 775, "ibm775", "ibm775" }; 46 yield return new object[] { 850, "ibm850", "ibm850" }; 47 yield return new object[] { 850, "ibm850", "cp850" }; 48 yield return new object[] { 852, "ibm852", "ibm852" }; 49 yield return new object[] { 852, "ibm852", "cp852" }; 50 yield return new object[] { 855, "ibm855", "ibm855" }; 51 yield return new object[] { 855, "ibm855", "cp855" }; 52 yield return new object[] { 857, "ibm857", "ibm857" }; 53 yield return new object[] { 857, "ibm857", "cp857" }; 54 yield return new object[] { 858, "ibm00858", "ibm00858" }; 55 yield return new object[] { 858, "ibm00858", "ccsid00858" }; 56 yield return new object[] { 858, "ibm00858", "cp00858" }; 57 yield return new object[] { 858, "ibm00858", "cp858" }; 58 yield return new object[] { 858, "ibm00858", "pc-multilingual-850+euro" }; 59 yield return new object[] { 860, "ibm860", "ibm860" }; 60 yield return new object[] { 860, "ibm860", "cp860" }; 61 yield return new object[] { 861, "ibm861", "ibm861" }; 62 yield return new object[] { 861, "ibm861", "cp861" }; 63 yield return new object[] { 862, "dos-862", "dos-862" }; 64 yield return new object[] { 862, "dos-862", "cp862" }; 65 yield return new object[] { 862, "dos-862", "ibm862" }; 66 yield return new object[] { 863, "ibm863", "ibm863" }; 67 yield return new object[] { 863, "ibm863", "cp863" }; 68 yield return new object[] { 864, "ibm864", "ibm864" }; 69 yield return new object[] { 864, "ibm864", "cp864" }; 70 yield return new object[] { 865, "ibm865", "ibm865" }; 71 yield return new object[] { 865, "ibm865", "cp865" }; 72 yield return new object[] { 866, "cp866", "cp866" }; 73 yield return new object[] { 866, "cp866", "ibm866" }; 74 yield return new object[] { 869, "ibm869", "ibm869" }; 75 yield return new object[] { 869, "ibm869", "cp869" }; 76 yield return new object[] { 870, "ibm870", "ibm870" }; 77 yield return new object[] { 870, "ibm870", "cp870" }; 78 yield return new object[] { 870, "ibm870", "csibm870" }; 79 yield return new object[] { 870, "ibm870", "ebcdic-cp-roece" }; 80 yield return new object[] { 870, "ibm870", "ebcdic-cp-yu" }; 81 yield return new object[] { 874, "windows-874", "windows-874" }; 82 yield return new object[] { 874, "windows-874", "dos-874" }; 83 yield return new object[] { 874, "windows-874", "iso-8859-11" }; 84 yield return new object[] { 874, "windows-874", "tis-620" }; 85 yield return new object[] { 875, "cp875", "cp875" }; 86 yield return new object[] { 932, "shift_jis", "shift_jis" }; 87 yield return new object[] { 932, "shift_jis", "csshiftjis" }; 88 yield return new object[] { 932, "shift_jis", "cswindows31j" }; 89 yield return new object[] { 932, "shift_jis", "ms_kanji" }; 90 yield return new object[] { 932, "shift_jis", "shift-jis" }; 91 yield return new object[] { 932, "shift_jis", "sjis" }; 92 yield return new object[] { 932, "shift_jis", "x-ms-cp932" }; 93 yield return new object[] { 932, "shift_jis", "x-sjis" }; 94 yield return new object[] { 936, "gb2312", "gb2312" }; 95 yield return new object[] { 936, "gb2312", "chinese" }; 96 yield return new object[] { 936, "gb2312", "cn-gb" }; 97 yield return new object[] { 936, "gb2312", "csgb2312" }; 98 yield return new object[] { 936, "gb2312", "csgb231280" }; 99 yield return new object[] { 936, "gb2312", "csiso58gb231280" }; 100 yield return new object[] { 936, "gb2312", "gb_2312-80" }; 101 yield return new object[] { 936, "gb2312", "gb231280" }; 102 yield return new object[] { 936, "gb2312", "gb2312-80" }; 103 yield return new object[] { 936, "gb2312", "gbk" }; 104 yield return new object[] { 936, "gb2312", "iso-ir-58" }; 105 yield return new object[] { 949, "ks_c_5601-1987", "ks_c_5601-1987" }; 106 yield return new object[] { 949, "ks_c_5601-1987", "csksc56011987" }; 107 yield return new object[] { 949, "ks_c_5601-1987", "iso-ir-149" }; 108 yield return new object[] { 949, "ks_c_5601-1987", "korean" }; 109 yield return new object[] { 949, "ks_c_5601-1987", "ks_c_5601" }; 110 yield return new object[] { 949, "ks_c_5601-1987", "ks_c_5601_1987" }; 111 yield return new object[] { 949, "ks_c_5601-1987", "ks_c_5601-1989" }; 112 yield return new object[] { 949, "ks_c_5601-1987", "ksc_5601" }; 113 yield return new object[] { 949, "ks_c_5601-1987", "ksc5601" }; 114 yield return new object[] { 949, "ks_c_5601-1987", "ks-c5601" }; 115 yield return new object[] { 949, "ks_c_5601-1987", "ks-c-5601" }; 116 yield return new object[] { 950, "big5", "big5" }; 117 yield return new object[] { 950, "big5", "big5-hkscs" }; 118 yield return new object[] { 950, "big5", "cn-big5" }; 119 yield return new object[] { 950, "big5", "csbig5" }; 120 yield return new object[] { 950, "big5", "x-x-big5" }; 121 yield return new object[] { 1026, "ibm1026", "ibm1026" }; 122 yield return new object[] { 1026, "ibm1026", "cp1026" }; 123 yield return new object[] { 1026, "ibm1026", "csibm1026" }; 124 yield return new object[] { 1047, "ibm01047", "ibm01047" }; 125 yield return new object[] { 1140, "ibm01140", "ibm01140" }; 126 yield return new object[] { 1140, "ibm01140", "ccsid01140" }; 127 yield return new object[] { 1140, "ibm01140", "cp01140" }; 128 yield return new object[] { 1140, "ibm01140", "ebcdic-us-37+euro" }; 129 yield return new object[] { 1141, "ibm01141", "ibm01141" }; 130 yield return new object[] { 1141, "ibm01141", "ccsid01141" }; 131 yield return new object[] { 1141, "ibm01141", "cp01141" }; 132 yield return new object[] { 1141, "ibm01141", "ebcdic-de-273+euro" }; 133 yield return new object[] { 1142, "ibm01142", "ibm01142" }; 134 yield return new object[] { 1142, "ibm01142", "ccsid01142" }; 135 yield return new object[] { 1142, "ibm01142", "cp01142" }; 136 yield return new object[] { 1142, "ibm01142", "ebcdic-dk-277+euro" }; 137 yield return new object[] { 1142, "ibm01142", "ebcdic-no-277+euro" }; 138 yield return new object[] { 1143, "ibm01143", "ibm01143" }; 139 yield return new object[] { 1143, "ibm01143", "ccsid01143" }; 140 yield return new object[] { 1143, "ibm01143", "cp01143" }; 141 yield return new object[] { 1143, "ibm01143", "ebcdic-fi-278+euro" }; 142 yield return new object[] { 1143, "ibm01143", "ebcdic-se-278+euro" }; 143 yield return new object[] { 1144, "ibm01144", "ibm01144" }; 144 yield return new object[] { 1144, "ibm01144", "ccsid01144" }; 145 yield return new object[] { 1144, "ibm01144", "cp01144" }; 146 yield return new object[] { 1144, "ibm01144", "ebcdic-it-280+euro" }; 147 yield return new object[] { 1145, "ibm01145", "ibm01145" }; 148 yield return new object[] { 1145, "ibm01145", "ccsid01145" }; 149 yield return new object[] { 1145, "ibm01145", "cp01145" }; 150 yield return new object[] { 1145, "ibm01145", "ebcdic-es-284+euro" }; 151 yield return new object[] { 1146, "ibm01146", "ibm01146" }; 152 yield return new object[] { 1146, "ibm01146", "ccsid01146" }; 153 yield return new object[] { 1146, "ibm01146", "cp01146" }; 154 yield return new object[] { 1146, "ibm01146", "ebcdic-gb-285+euro" }; 155 yield return new object[] { 1147, "ibm01147", "ibm01147" }; 156 yield return new object[] { 1147, "ibm01147", "ccsid01147" }; 157 yield return new object[] { 1147, "ibm01147", "cp01147" }; 158 yield return new object[] { 1147, "ibm01147", "ebcdic-fr-297+euro" }; 159 yield return new object[] { 1148, "ibm01148", "ibm01148" }; 160 yield return new object[] { 1148, "ibm01148", "ccsid01148" }; 161 yield return new object[] { 1148, "ibm01148", "cp01148" }; 162 yield return new object[] { 1148, "ibm01148", "ebcdic-international-500+euro" }; 163 yield return new object[] { 1149, "ibm01149", "ibm01149" }; 164 yield return new object[] { 1149, "ibm01149", "ccsid01149" }; 165 yield return new object[] { 1149, "ibm01149", "cp01149" }; 166 yield return new object[] { 1149, "ibm01149", "ebcdic-is-871+euro" }; 167 yield return new object[] { 1250, "windows-1250", "windows-1250" }; 168 yield return new object[] { 1250, "windows-1250", "x-cp1250" }; 169 yield return new object[] { 1251, "windows-1251", "windows-1251" }; 170 yield return new object[] { 1251, "windows-1251", "x-cp1251" }; 171 yield return new object[] { 1252, "windows-1252", "windows-1252" }; 172 yield return new object[] { 1252, "windows-1252", "x-ansi" }; 173 yield return new object[] { 1253, "windows-1253", "windows-1253" }; 174 yield return new object[] { 1254, "windows-1254", "windows-1254" }; 175 yield return new object[] { 1255, "windows-1255", "windows-1255" }; 176 yield return new object[] { 1256, "windows-1256", "windows-1256" }; 177 yield return new object[] { 1256, "windows-1256", "cp1256" }; 178 yield return new object[] { 1257, "windows-1257", "windows-1257" }; 179 yield return new object[] { 1258, "windows-1258", "windows-1258" }; 180 yield return new object[] { 1361, "johab", "johab" }; 181 yield return new object[] { 10000, "macintosh", "macintosh" }; 182 yield return new object[] { 10001, "x-mac-japanese", "x-mac-japanese" }; 183 yield return new object[] { 10002, "x-mac-chinesetrad", "x-mac-chinesetrad" }; 184 yield return new object[] { 10003, "x-mac-korean", "x-mac-korean" }; 185 yield return new object[] { 10004, "x-mac-arabic", "x-mac-arabic" }; 186 yield return new object[] { 10005, "x-mac-hebrew", "x-mac-hebrew" }; 187 yield return new object[] { 10006, "x-mac-greek", "x-mac-greek" }; 188 yield return new object[] { 10007, "x-mac-cyrillic", "x-mac-cyrillic" }; 189 yield return new object[] { 10008, "x-mac-chinesesimp", "x-mac-chinesesimp" }; 190 yield return new object[] { 10010, "x-mac-romanian", "x-mac-romanian" }; 191 yield return new object[] { 10017, "x-mac-ukrainian", "x-mac-ukrainian" }; 192 yield return new object[] { 10021, "x-mac-thai", "x-mac-thai" }; 193 yield return new object[] { 10029, "x-mac-ce", "x-mac-ce" }; 194 yield return new object[] { 10079, "x-mac-icelandic", "x-mac-icelandic" }; 195 yield return new object[] { 10081, "x-mac-turkish", "x-mac-turkish" }; 196 yield return new object[] { 10082, "x-mac-croatian", "x-mac-croatian" }; 197 yield return new object[] { 20000, "x-chinese-cns", "x-chinese-cns" }; 198 yield return new object[] { 20001, "x-cp20001", "x-cp20001" }; 199 yield return new object[] { 20002, "x-chinese-eten", "x-chinese-eten" }; 200 yield return new object[] { 20003, "x-cp20003", "x-cp20003" }; 201 yield return new object[] { 20004, "x-cp20004", "x-cp20004" }; 202 yield return new object[] { 20005, "x-cp20005", "x-cp20005" }; 203 yield return new object[] { 20105, "x-ia5", "x-ia5" }; 204 yield return new object[] { 20105, "x-ia5", "irv" }; 205 yield return new object[] { 20106, "x-ia5-german", "x-ia5-german" }; 206 yield return new object[] { 20106, "x-ia5-german", "din_66003" }; 207 yield return new object[] { 20106, "x-ia5-german", "german" }; 208 yield return new object[] { 20107, "x-ia5-swedish", "x-ia5-swedish" }; 209 yield return new object[] { 20107, "x-ia5-swedish", "sen_850200_b" }; 210 yield return new object[] { 20107, "x-ia5-swedish", "swedish" }; 211 yield return new object[] { 20108, "x-ia5-norwegian", "x-ia5-norwegian" }; 212 yield return new object[] { 20108, "x-ia5-norwegian", "norwegian" }; 213 yield return new object[] { 20108, "x-ia5-norwegian", "ns_4551-1" }; 214 yield return new object[] { 20261, "x-cp20261", "x-cp20261" }; 215 yield return new object[] { 20269, "x-cp20269", "x-cp20269" }; 216 yield return new object[] { 20273, "ibm273", "ibm273" }; 217 yield return new object[] { 20273, "ibm273", "cp273" }; 218 yield return new object[] { 20273, "ibm273", "csibm273" }; 219 yield return new object[] { 20277, "ibm277", "ibm277" }; 220 yield return new object[] { 20277, "ibm277", "csibm277" }; 221 yield return new object[] { 20277, "ibm277", "ebcdic-cp-dk" }; 222 yield return new object[] { 20277, "ibm277", "ebcdic-cp-no" }; 223 yield return new object[] { 20278, "ibm278", "ibm278" }; 224 yield return new object[] { 20278, "ibm278", "cp278" }; 225 yield return new object[] { 20278, "ibm278", "csibm278" }; 226 yield return new object[] { 20278, "ibm278", "ebcdic-cp-fi" }; 227 yield return new object[] { 20278, "ibm278", "ebcdic-cp-se" }; 228 yield return new object[] { 20280, "ibm280", "ibm280" }; 229 yield return new object[] { 20280, "ibm280", "cp280" }; 230 yield return new object[] { 20280, "ibm280", "csibm280" }; 231 yield return new object[] { 20280, "ibm280", "ebcdic-cp-it" }; 232 yield return new object[] { 20284, "ibm284", "ibm284" }; 233 yield return new object[] { 20284, "ibm284", "cp284" }; 234 yield return new object[] { 20284, "ibm284", "csibm284" }; 235 yield return new object[] { 20284, "ibm284", "ebcdic-cp-es" }; 236 yield return new object[] { 20285, "ibm285", "ibm285" }; 237 yield return new object[] { 20285, "ibm285", "cp285" }; 238 yield return new object[] { 20285, "ibm285", "csibm285" }; 239 yield return new object[] { 20285, "ibm285", "ebcdic-cp-gb" }; 240 yield return new object[] { 20290, "ibm290", "ibm290" }; 241 yield return new object[] { 20290, "ibm290", "cp290" }; 242 yield return new object[] { 20290, "ibm290", "csibm290" }; 243 yield return new object[] { 20290, "ibm290", "ebcdic-jp-kana" }; 244 yield return new object[] { 20297, "ibm297", "ibm297" }; 245 yield return new object[] { 20297, "ibm297", "cp297" }; 246 yield return new object[] { 20297, "ibm297", "csibm297" }; 247 yield return new object[] { 20297, "ibm297", "ebcdic-cp-fr" }; 248 yield return new object[] { 20420, "ibm420", "ibm420" }; 249 yield return new object[] { 20420, "ibm420", "cp420" }; 250 yield return new object[] { 20420, "ibm420", "csibm420" }; 251 yield return new object[] { 20420, "ibm420", "ebcdic-cp-ar1" }; 252 yield return new object[] { 20423, "ibm423", "ibm423" }; 253 yield return new object[] { 20423, "ibm423", "cp423" }; 254 yield return new object[] { 20423, "ibm423", "csibm423" }; 255 yield return new object[] { 20423, "ibm423", "ebcdic-cp-gr" }; 256 yield return new object[] { 20424, "ibm424", "ibm424" }; 257 yield return new object[] { 20424, "ibm424", "cp424" }; 258 yield return new object[] { 20424, "ibm424", "csibm424" }; 259 yield return new object[] { 20424, "ibm424", "ebcdic-cp-he" }; 260 yield return new object[] { 20833, "x-ebcdic-koreanextended", "x-ebcdic-koreanextended" }; 261 yield return new object[] { 20838, "ibm-thai", "ibm-thai" }; 262 yield return new object[] { 20838, "ibm-thai", "csibmthai" }; 263 yield return new object[] { 20866, "koi8-r", "koi8-r" }; 264 yield return new object[] { 20866, "koi8-r", "cskoi8r" }; 265 yield return new object[] { 20866, "koi8-r", "koi" }; 266 yield return new object[] { 20866, "koi8-r", "koi8" }; 267 yield return new object[] { 20866, "koi8-r", "koi8r" }; 268 yield return new object[] { 20871, "ibm871", "ibm871" }; 269 yield return new object[] { 20871, "ibm871", "cp871" }; 270 yield return new object[] { 20871, "ibm871", "csibm871" }; 271 yield return new object[] { 20871, "ibm871", "ebcdic-cp-is" }; 272 yield return new object[] { 20880, "ibm880", "ibm880" }; 273 yield return new object[] { 20880, "ibm880", "cp880" }; 274 yield return new object[] { 20880, "ibm880", "csibm880" }; 275 yield return new object[] { 20880, "ibm880", "ebcdic-cyrillic" }; 276 yield return new object[] { 20905, "ibm905", "ibm905" }; 277 yield return new object[] { 20905, "ibm905", "cp905" }; 278 yield return new object[] { 20905, "ibm905", "csibm905" }; 279 yield return new object[] { 20905, "ibm905", "ebcdic-cp-tr" }; 280 yield return new object[] { 20924, "ibm00924", "ibm00924" }; 281 yield return new object[] { 20924, "ibm00924", "ccsid00924" }; 282 yield return new object[] { 20924, "ibm00924", "cp00924" }; 283 yield return new object[] { 20924, "ibm00924", "ebcdic-latin9--euro" }; 284 yield return new object[] { 20932, "euc-jp", "euc-jp" }; 285 yield return new object[] { 20936, "x-cp20936", "x-cp20936" }; 286 yield return new object[] { 20949, "x-cp20949", "x-cp20949" }; 287 yield return new object[] { 21025, "cp1025", "cp1025" }; 288 yield return new object[] { 21866, "koi8-u", "koi8-u" }; 289 yield return new object[] { 21866, "koi8-u", "koi8-ru" }; 290 yield return new object[] { 28592, "iso-8859-2", "iso-8859-2" }; 291 yield return new object[] { 28592, "iso-8859-2", "csisolatin2" }; 292 yield return new object[] { 28592, "iso-8859-2", "iso_8859-2" }; 293 yield return new object[] { 28592, "iso-8859-2", "iso_8859-2:1987" }; 294 yield return new object[] { 28592, "iso-8859-2", "iso8859-2" }; 295 yield return new object[] { 28592, "iso-8859-2", "iso-ir-101" }; 296 yield return new object[] { 28592, "iso-8859-2", "l2" }; 297 yield return new object[] { 28592, "iso-8859-2", "latin2" }; 298 yield return new object[] { 28593, "iso-8859-3", "iso-8859-3" }; 299 yield return new object[] { 28593, "iso-8859-3", "csisolatin3" }; 300 yield return new object[] { 28593, "iso-8859-3", "iso_8859-3" }; 301 yield return new object[] { 28593, "iso-8859-3", "iso_8859-3:1988" }; 302 yield return new object[] { 28593, "iso-8859-3", "iso-ir-109" }; 303 yield return new object[] { 28593, "iso-8859-3", "l3" }; 304 yield return new object[] { 28593, "iso-8859-3", "latin3" }; 305 yield return new object[] { 28594, "iso-8859-4", "iso-8859-4" }; 306 yield return new object[] { 28594, "iso-8859-4", "csisolatin4" }; 307 yield return new object[] { 28594, "iso-8859-4", "iso_8859-4" }; 308 yield return new object[] { 28594, "iso-8859-4", "iso_8859-4:1988" }; 309 yield return new object[] { 28594, "iso-8859-4", "iso-ir-110" }; 310 yield return new object[] { 28594, "iso-8859-4", "l4" }; 311 yield return new object[] { 28594, "iso-8859-4", "latin4" }; 312 yield return new object[] { 28595, "iso-8859-5", "iso-8859-5" }; 313 yield return new object[] { 28595, "iso-8859-5", "csisolatincyrillic" }; 314 yield return new object[] { 28595, "iso-8859-5", "cyrillic" }; 315 yield return new object[] { 28595, "iso-8859-5", "iso_8859-5" }; 316 yield return new object[] { 28595, "iso-8859-5", "iso_8859-5:1988" }; 317 yield return new object[] { 28595, "iso-8859-5", "iso-ir-144" }; 318 yield return new object[] { 28596, "iso-8859-6", "iso-8859-6" }; 319 yield return new object[] { 28596, "iso-8859-6", "arabic" }; 320 yield return new object[] { 28596, "iso-8859-6", "csisolatinarabic" }; 321 yield return new object[] { 28596, "iso-8859-6", "ecma-114" }; 322 yield return new object[] { 28596, "iso-8859-6", "iso_8859-6" }; 323 yield return new object[] { 28596, "iso-8859-6", "iso_8859-6:1987" }; 324 yield return new object[] { 28596, "iso-8859-6", "iso-ir-127" }; 325 yield return new object[] { 28597, "iso-8859-7", "iso-8859-7" }; 326 yield return new object[] { 28597, "iso-8859-7", "csisolatingreek" }; 327 yield return new object[] { 28597, "iso-8859-7", "ecma-118" }; 328 yield return new object[] { 28597, "iso-8859-7", "elot_928" }; 329 yield return new object[] { 28597, "iso-8859-7", "greek" }; 330 yield return new object[] { 28597, "iso-8859-7", "greek8" }; 331 yield return new object[] { 28597, "iso-8859-7", "iso_8859-7" }; 332 yield return new object[] { 28597, "iso-8859-7", "iso_8859-7:1987" }; 333 yield return new object[] { 28597, "iso-8859-7", "iso-ir-126" }; 334 yield return new object[] { 28598, "iso-8859-8", "iso-8859-8" }; 335 yield return new object[] { 28598, "iso-8859-8", "csisolatinhebrew" }; 336 yield return new object[] { 28598, "iso-8859-8", "hebrew" }; 337 yield return new object[] { 28598, "iso-8859-8", "iso_8859-8" }; 338 yield return new object[] { 28598, "iso-8859-8", "iso_8859-8:1988" }; 339 yield return new object[] { 28598, "iso-8859-8", "iso-8859-8 visual" }; 340 yield return new object[] { 28598, "iso-8859-8", "iso-ir-138" }; 341 yield return new object[] { 28598, "iso-8859-8", "logical" }; 342 yield return new object[] { 28598, "iso-8859-8", "visual" }; 343 yield return new object[] { 28599, "iso-8859-9", "iso-8859-9" }; 344 yield return new object[] { 28599, "iso-8859-9", "csisolatin5" }; 345 yield return new object[] { 28599, "iso-8859-9", "iso_8859-9" }; 346 yield return new object[] { 28599, "iso-8859-9", "iso_8859-9:1989" }; 347 yield return new object[] { 28599, "iso-8859-9", "iso-ir-148" }; 348 yield return new object[] { 28599, "iso-8859-9", "l5" }; 349 yield return new object[] { 28599, "iso-8859-9", "latin5" }; 350 yield return new object[] { 28603, "iso-8859-13", "iso-8859-13" }; 351 yield return new object[] { 28605, "iso-8859-15", "iso-8859-15" }; 352 yield return new object[] { 28605, "iso-8859-15", "csisolatin9" }; 353 yield return new object[] { 28605, "iso-8859-15", "iso_8859-15" }; 354 yield return new object[] { 28605, "iso-8859-15", "l9" }; 355 yield return new object[] { 28605, "iso-8859-15", "latin9" }; 356 yield return new object[] { 29001, "x-europa", "x-europa" }; 357 yield return new object[] { 38598, "iso-8859-8-i", "iso-8859-8-i" }; 358 yield return new object[] { 50220, "iso-2022-jp", "iso-2022-jp" }; 359 yield return new object[] { 50221, "csiso2022jp", "csiso2022jp" }; 360 yield return new object[] { 50222, "iso-2022-jp", "iso-2022-jp" }; 361 yield return new object[] { 50225, "iso-2022-kr", "iso-2022-kr" }; 362 yield return new object[] { 50225, "iso-2022-kr", "csiso2022kr" }; 363 yield return new object[] { 50225, "iso-2022-kr", "iso-2022-kr-7" }; 364 yield return new object[] { 50225, "iso-2022-kr", "iso-2022-kr-7bit" }; 365 yield return new object[] { 50227, "x-cp50227", "x-cp50227" }; 366 yield return new object[] { 50227, "x-cp50227", "cp50227" }; 367 yield return new object[] { 51932, "euc-jp", "euc-jp" }; 368 yield return new object[] { 51932, "euc-jp", "cseucpkdfmtjapanese" }; 369 yield return new object[] { 51932, "euc-jp", "extended_unix_code_packed_format_for_japanese" }; 370 yield return new object[] { 51932, "euc-jp", "iso-2022-jpeuc" }; 371 yield return new object[] { 51932, "euc-jp", "x-euc" }; 372 yield return new object[] { 51932, "euc-jp", "x-euc-jp" }; 373 yield return new object[] { 51936, "euc-cn", "euc-cn" }; 374 yield return new object[] { 51936, "euc-cn", "x-euc-cn" }; 375 yield return new object[] { 51949, "euc-kr", "euc-kr" }; 376 yield return new object[] { 51949, "euc-kr", "cseuckr" }; 377 yield return new object[] { 51949, "euc-kr", "iso-2022-kr-8" }; 378 yield return new object[] { 51949, "euc-kr", "iso-2022-kr-8bit" }; 379 yield return new object[] { 52936, "hz-gb-2312", "hz-gb-2312" }; 380 yield return new object[] { 54936, "gb18030", "gb18030" }; 381 yield return new object[] { 57002, "x-iscii-de", "x-iscii-de" }; 382 yield return new object[] { 57003, "x-iscii-be", "x-iscii-be" }; 383 yield return new object[] { 57004, "x-iscii-ta", "x-iscii-ta" }; 384 yield return new object[] { 57005, "x-iscii-te", "x-iscii-te" }; 385 yield return new object[] { 57006, "x-iscii-as", "x-iscii-as" }; 386 yield return new object[] { 57007, "x-iscii-or", "x-iscii-or" }; 387 yield return new object[] { 57008, "x-iscii-ka", "x-iscii-ka" }; 388 yield return new object[] { 57009, "x-iscii-ma", "x-iscii-ma" }; 389 yield return new object[] { 57010, "x-iscii-gu", "x-iscii-gu" }; 390 yield return new object[] { 57011, "x-iscii-pa", "x-iscii-pa" }; 391 } 392 SpecificCodepageEncodings()393 public static IEnumerable<object[]> SpecificCodepageEncodings() 394 { 395 // Layout is codepage encoding, bytes, and matching unicode string. 396 yield return new object[] { "Windows-1256", new byte[] { 0xC7, 0xE1, 0xE1, 0xE5, 0x20, 0xC7, 0xCD, 0xCF }, "\x0627\x0644\x0644\x0647\x0020\x0627\x062D\x062F" }; 397 yield return new object[] {"Windows-1252", new byte[] { 0xD0, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6, 0xD7, 0xD8, 0xD9, 0xDA, 0xDB, 0xDC, 0xDD, 0xDE, 0xDF } , 398 "\x00D0\x00D1\x00D2\x00D3\x00D4\x00D5\x00D6\x00D7\x00D8\x00D9\x00DA\x00DB\x00DC\x00DD\x00DE\x00DF"}; 399 yield return new object[] { "GB2312", new byte[] { 0xCD, 0xE2, 0xCD, 0xE3, 0xCD, 0xE4 }, "\x5916\x8C4C\x5F2F" }; 400 yield return new object[] {"GB18030", new byte[] { 0x81, 0x30, 0x89, 0x37, 0x81, 0x30, 0x89, 0x38, 0xA8, 0xA4, 0xA8, 0xA2, 0x81, 0x30, 0x89, 0x39, 0x81, 0x30, 0x8A, 0x30 } , 401 "\x00DE\x00DF\x00E0\x00E1\x00E2\x00E3"}; 402 } 403 MultibyteCharacterEncodings()404 public static IEnumerable<object[]> MultibyteCharacterEncodings() 405 { 406 // Layout is the encoding, bytes, and expected result. 407 yield return new object[] { "iso-2022-jp", 408 new byte[] { 0xA, 409 0x1B, 0x24, 0x42, 0x25, 0x4A, 0x25, 0x4A, 410 0x1B, 0x28, 0x42, 411 0x1B, 0x24, 0x42, 0x25, 0x4A, 412 0x1B, 0x28, 0x42, 413 0x1B, 0x24, 0x42, 0x25, 0x4A, 414 0x1B, 0x28, 0x42, 415 0x1B, 0x1, 0x2, 0x3, 0x4, 416 0x1B, 0x24, 0x42, 0x25, 0x4A, 0x0E, 0x25, 0x4A, 417 0x1B, 0x28, 0x42, 0x41, 0x42, 0x0E, 0x25, 0x0F, 0x43 }, 418 new int[] { 0xA, 0x30CA, 0x30CA, 0x30CA, 0x30CA, 0x1B, 0x1, 0x2, 0x3, 0x4, 419 0x30CA, 0xFF65, 0xFF8A, 0x41, 0x42, 0xFF65, 0x43} 420 }; 421 422 yield return new object[] { "GB18030", 423 new byte[] { 0x41, 0x42, 0x43, 0x81, 0x40, 0x82, 0x80, 0x81, 0x30, 0x82, 0x31, 0x81, 0x20 }, 424 new int[] { 0x41, 0x42, 0x43, 0x4E02, 0x500B, 0x8B, 0x3F, 0x20 } 425 }; 426 427 yield return new object[] { "shift_jis", 428 new byte[] { 0x41, 0x42, 0x43, 0x81, 0x42, 0xE0, 0x43, 0x44, 0x45 }, 429 new int[] { 0x41, 0x42, 0x43, 0x3002, 0x6F86, 0x44, 0x45 } 430 }; 431 432 yield return new object[] { "iso-2022-kr", 433 new byte[] { 0x0E, 0x21, 0x7E, 0x1B, 0x24, 0x29, 0x43, 0x21, 0x7E, 0x0F, 0x21, 0x7E, 0x1B, 0x24, 0x29, 0x43, 0x21, 0x7E }, 434 new int[] { 0xFFE2, 0xFFE2, 0x21, 0x7E, 0x21, 0x7E } 435 }; 436 437 yield return new object[] { "hz-gb-2312", 438 new byte[] { 0x7E, 0x42, 0x7E, 0x7E, 0x7E, 0x7B, 0x21, 0x7E, 0x7E, 0x7D, 0x42, 0x42, 0x7E, 0xA, 0x43, 0x43 }, 439 new int[] { 0x7E, 0x42, 0x7E, 0x3013, 0x42, 0x42, 0x43, 0x43, } 440 }; 441 } 442 CrossplatformDefaultEncodings()443 private static IEnumerable<KeyValuePair<int, string>> CrossplatformDefaultEncodings() 444 { 445 yield return Map(1200, "utf-16"); 446 yield return Map(12000, "utf-32"); 447 yield return Map(20127, "us-ascii"); 448 yield return Map(65000, "utf-7"); 449 yield return Map(65001, "utf-8"); 450 } 451 Map(int codePage, string webName)452 private static KeyValuePair<int, string> Map(int codePage, string webName) 453 { 454 return new KeyValuePair<int, string>(codePage, webName); 455 } 456 457 [Fact] TestDefaultEncodings()458 public static void TestDefaultEncodings() 459 { 460 ValidateDefaultEncodings(); 461 462 // The default encoding should be something from the known list. 463 Encoding defaultEncoding = Encoding.GetEncoding(0); 464 Assert.NotNull(defaultEncoding); 465 KeyValuePair<int, string> mappedEncoding = Map(defaultEncoding.CodePage, defaultEncoding.WebName); 466 467 if (defaultEncoding.CodePage == Encoding.UTF8.CodePage) 468 { 469 // if the default encoding is not UTF8 that means either we are running on the full framework 470 // or the encoding provider is registered throw the call Encoding.RegisterProvider. 471 // at that time we shouldn't expect exceptions when creating the following encodings. 472 foreach (object[] mapping in CodePageInfo()) 473 { 474 Assert.Throws<NotSupportedException>(() => Encoding.GetEncoding((int)mapping[0])); 475 AssertExtensions.Throws<ArgumentException>("name", () => Encoding.GetEncoding((string)mapping[2])); 476 } 477 478 // Currently the class EncodingInfo isn't present in corefx, so this checks none of the code pages are present. 479 // When it is, comment out this line and remove the previous foreach/assert. 480 // Assert.Equal(CrossplatformDefaultEncodings, Encoding.GetEncodings().OrderBy(i => i.CodePage).Select(i => Map(i.CodePage, i.WebName))); 481 482 Assert.Contains(mappedEncoding, CrossplatformDefaultEncodings()); 483 } 484 485 // Add the code page provider. 486 Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); 487 488 // Make sure added code pages are identical between the provider and the Encoding class. 489 foreach (object[] mapping in CodePageInfo()) 490 { 491 Encoding encoding = Encoding.GetEncoding((int)mapping[0]); 492 493 Encoding codePageEncoding = CodePagesEncodingProvider.Instance.GetEncoding((int)mapping[0]); 494 Assert.Equal(encoding, codePageEncoding); 495 Assert.Equal(encoding.CodePage, (int)mapping[0]); 496 Assert.Equal(encoding.WebName, (string)mapping[1]); 497 498 // If available, validate serializing and deserializing with BinaryFormatter 499 ValidateSerializeDeserialize(encoding); 500 501 // Get encoding via query string. 502 Assert.Equal(Encoding.GetEncoding((string)mapping[2]), CodePagesEncodingProvider.Instance.GetEncoding((string)mapping[2])); 503 } 504 // Adding the code page provider should keep the originals, too. 505 ValidateDefaultEncodings(); 506 // Currently the class EncodingInfo isn't present in corefx, so this checks the complete list 507 // When it is, comment out this line and remove the previous foreach/assert. 508 // Assert.Equal(CrossplatformDefaultEncodings().Union(CodePageInfo().Select(i => Map((int)i[0], (string)i[1])).OrderBy(i => i.Key)), 509 // Encoding.GetEncodings().OrderBy(i => i.CodePage).Select(i => Map(i.CodePage, i.WebName))); 510 511 // Default encoding may have changed, should still be something on the combined list. 512 defaultEncoding = Encoding.GetEncoding(0); 513 Assert.NotNull(defaultEncoding); 514 mappedEncoding = Map(defaultEncoding.CodePage, defaultEncoding.WebName); 515 Assert.Contains(mappedEncoding, CrossplatformDefaultEncodings().Union(CodePageInfo().Select(i => Map((int)i[0], (string)i[1])))); 516 517 TestRegister1252(); 518 } 519 ValidateSerializeDeserialize(Encoding e)520 static void ValidateSerializeDeserialize(Encoding e) 521 { 522 // Make sure the Encoding roundtrips 523 Assert.Equal(e, BinaryFormatterHelpers.Clone(e)); 524 525 // Get an encoder and decoder from the encoding, and clone them 526 Encoder origEncoder = e.GetEncoder(); 527 Decoder origDecoder = e.GetDecoder(); 528 Encoder clonedEncoder = BinaryFormatterHelpers.Clone(origEncoder); 529 Decoder clonedDecoder = BinaryFormatterHelpers.Clone(origDecoder); 530 531 // Encode and decode some text with each pairing 532 const string InputText = "abcdefghijklmnopqrstuvwxyz"; 533 char[] inputTextChars = InputText.ToCharArray(); 534 var pairs = new[] 535 { 536 Tuple.Create(origEncoder, origDecoder), 537 Tuple.Create(origEncoder, clonedDecoder), 538 Tuple.Create(clonedEncoder, origDecoder), 539 Tuple.Create(clonedEncoder, clonedDecoder), 540 }; 541 var results = new List<char[]>(); 542 foreach (Tuple<Encoder, Decoder> pair in pairs) 543 { 544 byte[] encodedBytes = new byte[pair.Item1.GetByteCount(inputTextChars, 0, inputTextChars.Length, true)]; 545 Assert.Equal(encodedBytes.Length, pair.Item1.GetBytes(inputTextChars, 0, inputTextChars.Length, encodedBytes, 0, true)); 546 char[] decodedChars = new char[pair.Item2.GetCharCount(encodedBytes, 0, encodedBytes.Length)]; 547 Assert.Equal(decodedChars.Length, pair.Item2.GetChars(encodedBytes, 0, encodedBytes.Length, decodedChars, 0)); 548 results.Add(decodedChars); 549 } 550 551 // Validate that all of the pairings produced the same results 552 foreach (char[] a in results) 553 { 554 foreach (char[] b in results) 555 { 556 Assert.Equal(a, b); 557 } 558 } 559 } 560 ValidateDefaultEncodings()561 private static void ValidateDefaultEncodings() 562 { 563 foreach (var mapping in CrossplatformDefaultEncodings()) 564 { 565 Encoding encoding = Encoding.GetEncoding(mapping.Key); 566 Assert.NotNull(encoding); 567 Assert.Equal(encoding, Encoding.GetEncoding(mapping.Value)); 568 Assert.Equal(mapping.Value, encoding.WebName); 569 } 570 } 571 572 [Theory] 573 [MemberData(nameof(SpecificCodepageEncodings))] TestRoundtrippingSpecificCodepageEncoding(string encodingName, byte[] bytes, string expected)574 public static void TestRoundtrippingSpecificCodepageEncoding(string encodingName, byte[] bytes, string expected) 575 { 576 Encoding encoding = CodePagesEncodingProvider.Instance.GetEncoding(encodingName); 577 string encoded = encoding.GetString(bytes, 0, bytes.Length); 578 Assert.Equal(expected, encoded); 579 Assert.Equal(bytes, encoding.GetBytes(encoded)); 580 byte[] resultBytes = encoding.GetBytes(encoded); 581 } 582 583 [Theory] 584 [MemberData(nameof(CodePageInfo))] TestCodepageEncoding(int codePage, string webName, string queryString)585 public static void TestCodepageEncoding(int codePage, string webName, string queryString) 586 { 587 Encoding encoding; 588 // There are two names that have duplicate associated CodePages. For those two names, 589 // we have to test with the expectation that querying the name will always return the 590 // same codepage. 591 if (codePage != 20932 && codePage != 50222) 592 { 593 encoding = CodePagesEncodingProvider.Instance.GetEncoding(queryString); 594 Assert.Equal(encoding, CodePagesEncodingProvider.Instance.GetEncoding(codePage)); 595 Assert.Equal(encoding, CodePagesEncodingProvider.Instance.GetEncoding(webName)); 596 } 597 else 598 { 599 encoding = CodePagesEncodingProvider.Instance.GetEncoding(codePage); 600 Assert.NotEqual(encoding, CodePagesEncodingProvider.Instance.GetEncoding(queryString)); 601 Assert.NotEqual(encoding, CodePagesEncodingProvider.Instance.GetEncoding(webName)); 602 } 603 604 Assert.NotNull(encoding); 605 Assert.Equal(codePage, encoding.CodePage); 606 Assert.Equal(webName, encoding.WebName); 607 608 // Small round-trip for ASCII alphanumeric range (some code pages use different punctuation!) 609 // Start with space. 610 string asciiPrintable = " 0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; 611 char[] traveled = encoding.GetChars(encoding.GetBytes(asciiPrintable)); 612 Assert.Equal(asciiPrintable.ToCharArray(), traveled); 613 } 614 615 [Theory] 616 [MemberData(nameof(MultibyteCharacterEncodings))] TestSpecificMultibyteCharacterEncodings(string codepageName, byte[] bytes, int[] expected)617 public static void TestSpecificMultibyteCharacterEncodings(string codepageName, byte[] bytes, int[] expected) 618 { 619 Decoder decoder = CodePagesEncodingProvider.Instance.GetEncoding(codepageName).GetDecoder(); 620 char[] buffer = new char[expected.Length]; 621 622 for (int byteIndex = 0, charIndex = 0, charCount = 0; byteIndex < bytes.Length; byteIndex++, charIndex += charCount) 623 { 624 charCount = decoder.GetChars(bytes, byteIndex, 1, buffer, charIndex); 625 } 626 627 Assert.Equal(expected, buffer.Select(c => (int)c)); 628 } 629 630 [Theory] 631 [MemberData(nameof(CodePageInfo))] TestEncodingDisplayNames(int codePage, string webName, string queryString)632 public static void TestEncodingDisplayNames(int codePage, string webName, string queryString) 633 { 634 var encoding = CodePagesEncodingProvider.Instance.GetEncoding(codePage); 635 636 string name = encoding.EncodingName; 637 638 // Names can't be empty, and must be printable characters. 639 Assert.False(string.IsNullOrWhiteSpace(name)); 640 Assert.All(name, c => Assert.True(c >= ' ' && c < '~' + 1, "Name: " + name + " contains character: " + c)); 641 } 642 643 // This test is run as part of the default mappings test, since it modifies global state which that test 644 // depends on. TestRegister1252()645 public static void TestRegister1252() 646 { 647 // This test case ensure we can map all 1252 codepage codepoints without any exception. 648 string s1252Result = 649 "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\u0008\u0009\u000a\u000b\u000c\u000d\u000e\u000f" + 650 "\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f" + 651 "\u0020\u0021\u0022\u0023\u0024\u0025\u0026\u0027\u0028\u0029\u002a\u002b\u002c\u002d\u002e\u002f" + 652 "\u0030\u0031\u0032\u0033\u0034\u0035\u0036\u0037\u0038\u0039\u003a\u003b\u003c\u003d\u003e\u003f" + 653 "\u0040\u0041\u0042\u0043\u0044\u0045\u0046\u0047\u0048\u0049\u004a\u004b\u004c\u004d\u004e\u004f" + 654 "\u0050\u0051\u0052\u0053\u0054\u0055\u0056\u0057\u0058\u0059\u005a\u005b\u005c\u005d\u005e\u005f" + 655 "\u0060\u0061\u0062\u0063\u0064\u0065\u0066\u0067\u0068\u0069\u006a\u006b\u006c\u006d\u006e\u006f" + 656 "\u0070\u0071\u0072\u0073\u0074\u0075\u0076\u0077\u0078\u0079\u007a\u007b\u007c\u007d\u007e\u007f" + 657 "\u20ac\u0081\u201a\u0192\u201e\u2026\u2020\u2021\u02c6\u2030\u0160\u2039\u0152\u008d\u017d\u008f" + 658 "\u0090\u2018\u2019\u201c\u201d\u2022\u2013\u2014\u02dc\u2122\u0161\u203a\u0153\u009d\u017e\u0178" + 659 "\u00a0\u00a1\u00a2\u00a3\u00a4\u00a5\u00a6\u00a7\u00a8\u00a9\u00aa\u00ab\u00ac\u00ad\u00ae\u00af" + 660 "\u00b0\u00b1\u00b2\u00b3\u00b4\u00b5\u00b6\u00b7\u00b8\u00b9\u00ba\u00bb\u00bc\u00bd\u00be\u00bf" + 661 "\u00c0\u00c1\u00c2\u00c3\u00c4\u00c5\u00c6\u00c7\u00c8\u00c9\u00ca\u00cb\u00cc\u00cd\u00ce\u00cf" + 662 "\u00d0\u00d1\u00d2\u00d3\u00d4\u00d5\u00d6\u00d7\u00d8\u00d9\u00da\u00db\u00dc\u00dd\u00de\u00df" + 663 "\u00e0\u00e1\u00e2\u00e3\u00e4\u00e5\u00e6\u00e7\u00e8\u00e9\u00ea\u00eb\u00ec\u00ed\u00ee\u00ef" + 664 "\u00f0\u00f1\u00f2\u00f3\u00f4\u00f5\u00f6\u00f7\u00f8\u00f9\u00fa\u00fb\u00fc\u00fd\u00fe\u00ff"; 665 666 Encoding.RegisterProvider(CodePagesEncodingProvider.Instance); 667 Encoding win1252 = Encoding.GetEncoding("windows-1252", EncoderFallback.ExceptionFallback, DecoderFallback.ExceptionFallback); 668 byte[] enc = new byte[256]; 669 for (int j = 0; j < 256; j++) 670 { 671 enc[j] = (byte)j; 672 } 673 674 Assert.Equal(s1252Result, win1252.GetString(enc)); 675 } 676 677 } 678 679 public class CultureSetup : IDisposable 680 { 681 private readonly CultureInfo _originalUICulture; 682 CultureSetup()683 public CultureSetup() 684 { 685 _originalUICulture = CultureInfo.CurrentUICulture; 686 CultureInfo.CurrentUICulture = new CultureInfo("en-US"); 687 } 688 Dispose()689 public void Dispose() 690 { 691 CultureInfo.CurrentUICulture = _originalUICulture; 692 } 693 } 694 } 695