1# 2# test_codecencodings_cn.py 3# Codec encoding tests for PRC encodings. 4# 5 6from test import multibytecodec_support 7import unittest 8 9class Test_GB2312(multibytecodec_support.TestBase, unittest.TestCase): 10 encoding = 'gb2312' 11 tstring = multibytecodec_support.load_teststring('gb2312') 12 codectests = ( 13 # invalid bytes 14 (b"abc\x81\x81\xc1\xc4", "strict", None), 15 (b"abc\xc8", "strict", None), 16 (b"abc\x81\x81\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"), 17 (b"abc\x81\x81\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"), 18 (b"abc\x81\x81\xc1\xc4", "ignore", "abc\u804a"), 19 (b"\xc1\x64", "strict", None), 20 ) 21 22class Test_GBK(multibytecodec_support.TestBase, unittest.TestCase): 23 encoding = 'gbk' 24 tstring = multibytecodec_support.load_teststring('gbk') 25 codectests = ( 26 # invalid bytes 27 (b"abc\x80\x80\xc1\xc4", "strict", None), 28 (b"abc\xc8", "strict", None), 29 (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"), 30 (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"), 31 (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"), 32 (b"\x83\x34\x83\x31", "strict", None), 33 ("\u30fb", "strict", None), 34 ) 35 36class Test_GB18030(multibytecodec_support.TestBase, unittest.TestCase): 37 encoding = 'gb18030' 38 tstring = multibytecodec_support.load_teststring('gb18030') 39 codectests = ( 40 # invalid bytes 41 (b"abc\x80\x80\xc1\xc4", "strict", None), 42 (b"abc\xc8", "strict", None), 43 (b"abc\x80\x80\xc1\xc4", "replace", "abc\ufffd\ufffd\u804a"), 44 (b"abc\x80\x80\xc1\xc4\xc8", "replace", "abc\ufffd\ufffd\u804a\ufffd"), 45 (b"abc\x80\x80\xc1\xc4", "ignore", "abc\u804a"), 46 (b"abc\x84\x39\x84\x39\xc1\xc4", "replace", "abc\ufffd9\ufffd9\u804a"), 47 ("\u30fb", "strict", b"\x819\xa79"), 48 (b"abc\x84\x32\x80\x80def", "replace", 'abc\ufffd2\ufffd\ufffddef'), 49 (b"abc\x81\x30\x81\x30def", "strict", 'abc\x80def'), 50 (b"abc\x86\x30\x81\x30def", "replace", 'abc\ufffd0\ufffd0def'), 51 # issue29990 52 (b"\xff\x30\x81\x30", "strict", None), 53 (b"\x81\x30\xff\x30", "strict", None), 54 (b"abc\x81\x39\xff\x39\xc1\xc4", "replace", "abc\ufffd\x39\ufffd\x39\u804a"), 55 (b"abc\xab\x36\xff\x30def", "replace", 'abc\ufffd\x36\ufffd\x30def'), 56 (b"abc\xbf\x38\xff\x32\xc1\xc4", "ignore", "abc\x38\x32\u804a"), 57 ) 58 has_iso10646 = True 59 60class Test_HZ(multibytecodec_support.TestBase, unittest.TestCase): 61 encoding = 'hz' 62 tstring = multibytecodec_support.load_teststring('hz') 63 codectests = ( 64 # test '~\n' (3 lines) 65 (b'This sentence is in ASCII.\n' 66 b'The next sentence is in GB.~{<:Ky2;S{#,~}~\n' 67 b'~{NpJ)l6HK!#~}Bye.\n', 68 'strict', 69 'This sentence is in ASCII.\n' 70 'The next sentence is in GB.' 71 '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002' 72 'Bye.\n'), 73 # test '~\n' (4 lines) 74 (b'This sentence is in ASCII.\n' 75 b'The next sentence is in GB.~\n' 76 b'~{<:Ky2;S{#,NpJ)l6HK!#~}~\n' 77 b'Bye.\n', 78 'strict', 79 'This sentence is in ASCII.\n' 80 'The next sentence is in GB.' 81 '\u5df1\u6240\u4e0d\u6b32\uff0c\u52ff\u65bd\u65bc\u4eba\u3002' 82 'Bye.\n'), 83 # invalid bytes 84 (b'ab~cd', 'replace', 'ab\uFFFDcd'), 85 (b'ab\xffcd', 'replace', 'ab\uFFFDcd'), 86 (b'ab~{\x81\x81\x41\x44~}cd', 'replace', 'ab\uFFFD\uFFFD\u804Acd'), 87 (b'ab~{\x41\x44~}cd', 'replace', 'ab\u804Acd'), 88 (b"ab~{\x79\x79\x41\x44~}cd", "replace", "ab\ufffd\ufffd\u804acd"), 89 # issue 30003 90 ('ab~cd', 'strict', b'ab~~cd'), # escape ~ 91 (b'~{Dc~~:C~}', 'strict', None), # ~~ only in ASCII mode 92 (b'~{Dc~\n:C~}', 'strict', None), # ~\n only in ASCII mode 93 ) 94 95if __name__ == "__main__": 96 unittest.main() 97