1import unittest 2 3from precis_i18n.codepointset import CodepointSet 4 5 6class TestCodepointSet(unittest.TestCase): 7 def test_contains(self): 8 cps = CodepointSet('0000\n') 9 actual = [cp in cps for cp in range(-1, 4)] 10 self.assertEqual(actual, [False, True, False, False, False]) 11 12 self.assertFalse(0x010FFFF in cps) 13 14 cps = CodepointSet('0000..0001\n') 15 actual = [cp in cps for cp in range(-1, 4)] 16 self.assertEqual(actual, [False, True, True, False, False]) 17 18 cps = CodepointSet('0000\n0001\n0002') 19 actual = [cp in cps for cp in range(-1, 4)] 20 self.assertEqual(actual, [False, True, True, True, False]) 21 22 cps = CodepointSet('0000\n0002') 23 actual = [cp in cps for cp in range(-1, 4)] 24 self.assertEqual(actual, [False, True, False, True, False]) 25 26 cps = CodepointSet('10000..10FFFF') 27 self.assertTrue(0x10FFFF in cps) 28 self.assertFalse(0x110000 in cps) 29 30 def test_len(self): 31 cps = CodepointSet('0000\n') 32 self.assertEqual(len(cps), 1) 33 34 cps = CodepointSet('0000..0001\n') 35 self.assertEqual(len(cps), 2) 36 37 cps = CodepointSet('0000\n0001\n0002') 38 self.assertEqual(len(cps), 3) 39 40 cps = CodepointSet('0000\n0002') 41 self.assertEqual(len(cps), 2) 42 43 cps = CodepointSet('10000..10FFFF') 44 self.assertEqual(len(cps), 0x10FFFF - 0x10000 + 1) 45 46 def test_equals(self): 47 cps = CodepointSet('0000..00FF') 48 self.assertEqual(cps, CodepointSet('0000..00FF')) 49 self.assertNotEqual(cps, CodepointSet('0000..00FE')) 50 51 # Non-CodepointSet always not-equal. 52 self.assertFalse(cps == 'what?') 53 54 def test_repr(self): 55 cps = CodepointSet('') 56 self.assertEqual(repr(cps), "CodepointSet('')") 57 58 cps = CodepointSet('0000') 59 self.assertEqual(repr(cps), "CodepointSet('0000')") 60 61 cps = CodepointSet('0000..00FF') 62 self.assertEqual(repr(cps), "CodepointSet('0000..00FF')") 63 64 cps = CodepointSet('0001..FFFF\n100000..10FFFF') 65 self.assertEqual(repr(cps), 66 r"CodepointSet('0001..FFFF\n100000..10FFFF')") 67 68 cps = CodepointSet('FFFF..1FFFF') 69 self.assertEqual(repr(cps), "CodepointSet('FFFF..1FFFF')") 70 71 cps = CodepointSet('10000..1FFFF') 72 self.assertEqual(repr(cps), "CodepointSet('10000..1FFFF')") 73 74 cps = CodepointSet('FFFE\n10000..1FFFF') 75 self.assertEqual(repr(cps), r"CodepointSet('FFFE\n10000..1FFFF')") 76 77 def test_coalesce(self): 78 cps = CodepointSet('0000\n0001\n0002') 79 self.assertEqual(cps, CodepointSet('0000..0002')) 80 81 cps = CodepointSet('0000\n0002\n0003..0004') 82 self.assertEqual(cps, CodepointSet('0000\n0002..0004')) 83 84 def test_malformed_range(self): 85 with self.assertRaises(ValueError): 86 CodepointSet('0002..0000\n0001') 87 88 with self.assertRaises(ValueError): 89 CodepointSet('0000..0001\n0000..0001\n0002') 90 91 with self.assertRaises(ValueError): 92 CodepointSet('0000\n0002\n0002..0004') 93 94 with self.assertRaises(ValueError): 95 CodepointSet('110000') 96 97 with self.assertRaises(ValueError): 98 CodepointSet('0000\n000G') 99 100 def test_even_odd(self): 101 data = '\n'.join("%04X" % cp for cp in range(0, 10000, 2)) 102 cps = CodepointSet(data) 103 for cp in range(10000): 104 if cp in cps: 105 self.assertTrue((cp % 2) == 0) 106 else: 107 self.assertFalse((cp % 2) == 0) 108 109 def test_parse(self): 110 cps = CodepointSet('A\nBB\n') 111 self.assertEqual(repr(cps), r"CodepointSet('000A\n00BB')") 112 113 cps = CodepointSet('AAA\nBBB..CCC\n') 114 self.assertEqual(repr(cps), r"CodepointSet('0AAA\n0BBB..0CCC')") 115 116 cps = CodepointSet('\n \n # comment \n \n') 117 self.assertEqual(repr(cps), "CodepointSet('')") 118