1#!/usr/local/bin/python3.8
2# vim:fileencoding=utf-8
3
4
5__license__ = 'GPL v3'
6__copyright__ = '2014, Kovid Goyal <kovid at kovidgoyal.net>'
7
8import unittest, sys
9from contextlib import contextmanager
10
11import calibre.utils.icu as icu
12from polyglot.builtins import iteritems, cmp
13
14
15@contextmanager
16def make_collation_func(name, locale, numeric=True, maker=icu.make_sort_key_func, func='strcmp'):
17    def coll():
18        ans = icu._icu.Collator(locale)
19        ans.numeric = numeric
20        return ans
21
22    yield maker(coll, func)
23
24
25class TestICU(unittest.TestCase):
26
27    ae = unittest.TestCase.assertEqual
28    ane= unittest.TestCase.assertNotEqual
29
30    def setUp(self):
31        icu.change_locale('en')
32
33    def test_sorting(self):
34        ' Test the various sorting APIs '
35        german = '''Sonntag Montag Dienstag Januar Februar März Fuße Fluße Flusse flusse fluße flüße flüsse'''.split()
36        german_good = '''Dienstag Februar flusse Flusse fluße Fluße flüsse flüße Fuße Januar März Montag Sonntag'''.split()
37        french = '''dimanche lundi mardi janvier février mars déjà Meme deja même dejà bpef bœg Boef Mémé bœf boef bnef pêche pèché pêché pêche pêché'''.split()
38        french_good = '''bnef boef Boef bœf bœg bpef deja dejà déjà dimanche février janvier lundi mardi mars Meme Mémé même pèché pêche pêche pêché pêché'''.split()  # noqa
39
40        # Test corner cases
41        sort_key = icu.sort_key
42        s = '\U0001f431'
43        self.ae(sort_key(s), sort_key(s.encode(sys.getdefaultencoding())), 'UTF-8 encoded object not correctly decoded to generate sort key')
44        self.ae(s.encode('utf-16'), s.encode('utf-16'), 'Undecodable bytestring not returned as itself')
45        self.ae(b'', sort_key(None))
46        self.ae(0, icu.strcmp(None, b''))
47        self.ae(0, icu.strcmp(s, s.encode(sys.getdefaultencoding())))
48
49        # Test locales
50        with make_collation_func('dsk', 'de', maker=icu.make_sort_key_func, func='sort_key') as dsk:
51            self.ae(german_good, sorted(german, key=dsk))
52            with make_collation_func('dcmp', 'de', maker=icu.make_two_arg_func, func='strcmp') as dcmp:
53                for x in german:
54                    for y in german:
55                        self.ae(cmp(dsk(x), dsk(y)), dcmp(x, y))
56
57        with make_collation_func('fsk', 'fr', maker=icu.make_sort_key_func, func='sort_key') as fsk:
58            self.ae(french_good, sorted(french, key=fsk))
59            with make_collation_func('fcmp', 'fr', maker=icu.make_two_arg_func) as fcmp:
60                for x in french:
61                    for y in french:
62                        self.ae(cmp(fsk(x), fsk(y)), fcmp(x, y))
63
64        with make_collation_func('ssk', 'es', maker=icu.make_sort_key_func, func='sort_key') as ssk:
65            self.assertNotEqual(ssk('peña'), ssk('pena'))
66            with make_collation_func('scmp', 'es', maker=icu.make_two_arg_func) as scmp:
67                self.assertNotEqual(0, scmp('pena', 'peña'))
68
69        for k, v in iteritems({'pèché': 'peche', 'flüße':'Flusse', 'Štepánek':'ŠtepaneK'}):
70            self.ae(0, icu.primary_strcmp(k, v))
71
72        # Test different types of collation
73        self.ae(icu.primary_sort_key('Aä'), icu.primary_sort_key('aa'))
74        self.assertLess(icu.numeric_sort_key('something 2'), icu.numeric_sort_key('something 11'))
75        self.assertLess(icu.case_sensitive_sort_key('A'), icu.case_sensitive_sort_key('a'))
76        self.ae(0, icu.strcmp('a', 'A'))
77        self.ae(cmp('a', 'A'), icu.case_sensitive_strcmp('a', 'A'))
78        self.ae(0, icu.primary_strcmp('ä', 'A'))
79
80    def test_change_case(self):
81        ' Test the various ways of changing the case '
82        from calibre.utils.titlecase import titlecase
83        # Test corner cases
84        self.ae('A', icu.upper(b'a'))
85        for x in ('', None, False, 1):
86            self.ae(x, icu.capitalize(x))
87
88        for x in ('a', 'Alice\'s code', 'macdonald\'s machIne', '02 the wars'):
89            self.ae(icu.upper(x), x.upper())
90            self.ae(icu.lower(x), x.lower())
91            # ICU's title case algorithm is different from ours, when there are
92            # capitals inside words
93            self.ae(icu.title_case(x), titlecase(x).replace('machIne', 'Machine'))
94            self.ae(icu.capitalize(x), x[0].upper() + x[1:].lower())
95            self.ae(icu.swapcase(x), x.swapcase())
96
97    def test_find(self):
98        ' Test searching for substrings '
99        self.ae((1, 1), icu.find(b'a', b'1ab'))
100        self.ae((1, 1), icu.find('\U0001f431', 'x\U0001f431x'))
101        self.ae((1, 1), icu.find('y', '\U0001f431y'))
102        self.ae((0, 4), icu.primary_find('pena', 'peña'))
103        for k, v in iteritems({'pèché': 'peche', 'flüße':'Flusse', 'Štepánek':'ŠtepaneK'}):
104            self.ae((1, len(k)), icu.primary_find(v, ' ' + k), 'Failed to find %s in %s' % (v, k))
105        self.assertTrue(icu.startswith(b'abc', b'ab'))
106        self.assertTrue(icu.startswith('abc', 'abc'))
107        self.assertFalse(icu.startswith('xyz', 'a'))
108        self.assertTrue(icu.startswith('xxx', ''))
109        self.assertTrue(icu.primary_startswith('pena', 'peña'))
110        self.assertTrue(icu.contains('\U0001f431', '\U0001f431'))
111        self.assertTrue(icu.contains('something', 'some other something else'))
112        self.assertTrue(icu.contains('', 'a'))
113        self.assertTrue(icu.contains('', ''))
114        self.assertFalse(icu.contains('xxx', 'xx'))
115        self.assertTrue(icu.primary_contains('pena', 'peña'))
116
117    def test_collation_order(self):
118        'Testing collation ordering'
119        for group in [
120            (self.ae,  ('Šaa', 'Smith', 'Solženicyn', 'Štepánek')),
121            (self.ae,  ('11', '011')),
122            (self.ane, ('2', '1')),
123            (self.ae,  ('100 Smith', '0100 Smith')),
124        ]:
125            last = None
126            assert_func = group[0]
127            for x in group[1]:
128                order, _ = icu.numeric_collator().collation_order(x)
129                if last is not None:
130                    assert_func(last, order, 'Order for %s not correct: %s != %s' % (x, last, order))
131                last = order
132
133        self.ae(dict(icu.partition_by_first_letter(['A1', '', 'a1', '\U0001f431', '\U0001f431x'])),
134                {' ':[''], 'A':['A1', 'a1'], '\U0001f431':['\U0001f431', '\U0001f431x']})
135
136    def test_collation_order_for_partitioning(self):
137        'Testing collation ordering for partitioning'
138        for group in [
139            (self.ae, ('Smith', 'Šaa', 'Solženicyn', 'Štepánek')),
140            (self.ane, ('11', '011')),
141            (self.ae, ('102 Smith', '100 Smith')),
142            (self.ane, ('100 Smith', '0100 Smith')),
143        ]:
144            last = None
145            assert_func = group[0]
146            for x in group[1]:
147                order, _ = icu.non_numeric_sort_collator().collation_order(x)
148                if last is not None:
149                    assert_func(last, order, 'Order for %s not correct: %s != %s' % (x, last, order))
150                last = order
151
152        self.ae(dict(icu.partition_by_first_letter(['A1', '', 'a1', '\U0001f431', '\U0001f431x'])),
153                {' ':[''], 'A':['A1', 'a1'], '\U0001f431':['\U0001f431', '\U0001f431x']})
154
155    def test_roundtrip(self):
156        ' Test roundtripping '
157        for r in ('xxx\0\u2219\U0001f431xxx', '\0', '', 'simple'):
158            self.ae(r, icu._icu.roundtrip(r))
159        self.ae(icu._icu.roundtrip('\ud8e81'), '\ufffd1')
160        self.ae(icu._icu.roundtrip('\udc01\ud8e8'), '\ufffd\ufffd')
161        for x, l in [('', 0), ('a', 1), ('\U0001f431', 1)]:
162            self.ae(icu._icu.string_length(x), l)
163        for x, l in [('', 0), ('a', 1), ('\U0001f431', 2)]:
164            self.ae(icu._icu.utf16_length(x), l)
165        self.ae(icu._icu.chr(0x1f431), '\U0001f431')
166        self.ae(icu._icu.ord_string('abc'*100), tuple(map(ord, 'abc'*100)))
167        self.ae(icu._icu.ord_string('\U0001f431'), (0x1f431,))
168
169    def test_character_name(self):
170        ' Test character naming '
171        from calibre.utils.unicode_names import character_name_from_code
172        for q, e in {
173                '\U0001f431': 'CAT FACE'
174                }.items():
175            self.ae(icu.character_name(q), e)
176            self.ae(character_name_from_code(icu.ord_string(q)[0]), e)
177
178    def test_contractions(self):
179        ' Test contractions '
180        self.skipTest('Skipping as this depends too much on ICU version')
181        c = icu._icu.Collator('cs')
182        self.ae(icu.contractions(c), frozenset({'Z\u030c', 'z\u030c', 'Ch',
183            'C\u030c', 'ch', 'cH', 'c\u030c', 's\u030c', 'r\u030c', 'CH',
184            'S\u030c', 'R\u030c'}))
185
186    def test_break_iterator(self):
187        ' Test the break iterator '
188        from calibre.spell.break_iterator import split_into_words as split, index_of, split_into_words_and_positions, count_words
189        for q in ('one two three', ' one two three', 'one\ntwo  three ', ):
190            self.ae(split(str(q)), ['one', 'two', 'three'], 'Failed to split: %r' % q)
191        self.ae(split('I I\'m'), ['I', "I'm"])
192        self.ae(split('out-of-the-box'), ['out-of-the-box'])
193        self.ae(split('-one two-'), ['-one', 'two-'])
194        self.ae(split('-one a-b-c-d e'), ['-one', 'a-b-c-d', 'e'])
195        self.ae(split('-one -a-b-c-d- e'), ['-one', '-a-b-c-d-', 'e'])
196        self.ae(split_into_words_and_positions('one \U0001f431 three'), [(0, 3), (6, 5)])
197        self.ae(count_words('a b c d e f'), 6)
198        for needle, haystack, pos in (
199                ('word', 'a word b', 2),
200                ('word', 'a word', 2),
201                ('one-two', 'a one-two punch', 2),
202                ('one-two', 'one-two punch', 0),
203                ('one-two', 'one-two', 0),
204                ('one', 'one-two one', 8),
205                ('one-two', 'one-two-three one-two', 14),
206                ('one', 'onet one', 5),
207                ('two', 'one-two two', 8),
208                ('two', 'two-one two', 8),
209                ('-two', 'one-two -two', 8),
210                ('-two', 'two', -1),
211                ('i', 'i', 0),
212                ('i', 'six i', 4),
213                ('i', '', -1), ('', '', -1), ('', 'i', -1),
214                ('i', 'six clicks', -1),
215                ('i', '\U0001f431 i', 2),
216                ('-a', 'b -a', 2),
217                ('a-', 'a-b a- d', 4),
218                ('-a-', 'b -a -a-', 5),
219                ('-a-', '-a-', 0),
220                ('-a-', 'a-', -1),
221                ('-a-', '-a', -1),
222                ('-a-', 'a', -1),
223                ('a-', 'a-', 0),
224                ('-a', '-a', 0),
225                ('a-b-c-', 'a-b-c-d', -1),
226                ('a-b-c-', 'a-b-c-.', 0),
227                ('a-b-c-', 'a-b-c-d a-b-c- d', 8),
228        ):
229            fpos = index_of(needle, haystack)
230            self.ae(pos, fpos, 'Failed to find index of %r in %r (%d != %d)' % (needle, haystack, pos, fpos))
231
232
233def find_tests():
234    return unittest.defaultTestLoader.loadTestsFromTestCase(TestICU)
235
236
237class TestRunner(unittest.main):
238
239    def createTests(self):
240        self.test = find_tests()
241
242
243def run(verbosity=4):
244    TestRunner(verbosity=verbosity, exit=False)
245
246
247def test_build():
248    result = TestRunner(verbosity=0, buffer=True, catchbreak=True, failfast=True, argv=sys.argv[:1], exit=False).result
249    if not result.wasSuccessful():
250        raise SystemExit(1)
251
252
253if __name__ == '__main__':
254    run(verbosity=4)
255