1# Copyright 2014 Christoph Reiter
2#
3# This program is free software; you can redistribute it and/or modify
4# it under the terms of the GNU General Public License as published by
5# the Free Software Foundation; either version 2 of the License, or
6# (at your option) any later version.
7
8import re
9import unicodedata
10
11from tests import TestCase
12
13from quodlibet.unisearch import compile
14from quodlibet.unisearch.db import diacritic_for_letters
15from quodlibet.unisearch.parser import re_replace_literals, re_add_variants
16
17
18class TUniSearch(TestCase):
19
20    def test_mapping(self):
21        cache = diacritic_for_letters(False)
22        new = diacritic_for_letters(True)
23        self.assertEqual(sorted(cache.items()), sorted(new.items()))
24
25    def test_normalize_input(self):
26        assert re.match(
27            re_add_variants(unicodedata.normalize("NFD", u"ö")), u"ö")
28
29    def test_re_replace(self):
30        r = re_add_variants(u"aa")
31        self.assertTrue(u"[" in r and u"]" in r and r.count(u"ä") == 2)
32
33    def test_re_replace_multi(self):
34        r = re_add_variants(u"ae")
35        self.assertEqual(r, u"(?:[aàáâãäåāăąǎǟǡǻȁȃȧḁạảấầẩẫậắằẳẵặ]"
36                            u"[eèéêëēĕėęěȅȇȩḕḗḙḛḝẹẻẽếềểễệ]|[æǣǽ])")
37
38        r = re_add_variants(u"SS")
39        self.assertEqual(r, u"(?:[SŚŜŞŠȘṠṢṤṦṨꞄ][SŚŜŞŠȘṠṢṤṦṨꞄ]|ẞ)")
40
41        r = re_add_variants(u"ss")
42        self.assertEqual(r, u"(?:[sśŝşšșṡṣṥṧṩꞅ][sśŝşšșṡṣṥṧṩꞅ]|ß)")
43
44    def test_punct(self):
45        r = re_add_variants(u"'")
46        assert "`" in r
47        assert "'" in r
48        r = re_add_variants(u"''")
49        assert "\"" in r
50        r = re_add_variants(u'"')
51        assert "”" in r
52        assert "“" in r
53        r = re_add_variants(u'\\*')
54        assert re.match(r, "*")
55
56    def test_re_replace_multi_fixme(self):
57        # we don't handler overlapping sequences, so this doesn't match "LỺ"
58        r = re_add_variants(u"LLL")
59        self.assertEqual(r, u"(?:[LĹĻĽḶḸḺḼŁ][LĹĻĽḶḸḺḼŁ]|Ỻ)[LĹĻĽḶḸḺḼŁ]")
60
61    def test_re_replace_multi_nested(self):
62        r = re_add_variants(u"(եւ)")
63        self.assertEqual(r, u"((?:եւ|և))")
64        r = re_add_variants(u"(եւ)+")
65        self.assertEqual(r, u"((?:եւ|և))+")
66
67    def test_re_replace_escape(self):
68        r = re_add_variants(u"n\\n")
69        self.assertEqual(r, u"[nñńņňǹṅṇṉṋʼn]\n")
70
71    def test_construct_regexp(self):
72        res = [
73            (u"\\.", None),
74            (u"..", None),
75            (u"\\.", None),
76            (u"^a\aa[ha-z]k{1,3}h*h+h?(x|yy)(a+b|cd)$", None),
77            (u"(?=Asimov)", None),
78            (u"(?!Asimov)", None),
79            (u"(?<=abc)def", None),
80            (u"(?<!foo)", None),
81            (u"(?#foo)", u""),
82            (u"(.+) \1", None),
83            (u"\\A\\b\\B\\d\\D\\s\\S\\w\\W\\Z\a",
84             u"\\A\\b\\B[\\d][\\D][\\s][\\S][\\w][\\W]\\Z\a"),
85            (u"a{3,5}?a+?a*?a??", None),
86            (u"^foo$", None),
87            (u"[-+]?(\\d+(\\.\\d*)?|\\.\\d+)([eE][-+]?\\d+)?",
88             u"[\\-\\+]?([\\d]+(\\.[\\d]*)?|\\.[\\d]+)([eE][\\-\\+]?[\\d]+)?"),
89            (u"(\\$\\d*)", u"(\\$[\\d]*)"),
90            (u"\\$\\.\\^\\[\\]\\:\\-\\+\\?\\\\", None),
91            (u"[^a][^ab]", None),
92            (u"[ab][abc]", None),
93            (u"[.]", u"\\."),
94            (u"[^a-z]", None),
95            (u"[^a-z\\w]", None),
96            ("(x|yy)", None),
97        ]
98
99        for r, o in res:
100            if o is None:
101                o = r
102            self.assertEqual(re_replace_literals(r, {}), o)
103
104    def test_construct_regexp_37_change(self):
105        # Starting with 3.7 the parser throws out some subpattern
106        # nodes. We try to recover them or test against the old and new result.
107        res = [
108            (u"(?:foo)", ("(?:foo)", "foo")),
109            (u"(?:foo)x", ("(?:foo)x", "foox")),
110            (u"(?:foo)(?:bar)", ("(?:foo)(?:bar)", "foobar")),
111            (u"(?:foo|bla)", None),
112            (u"(?:foo|bla)x", None),
113        ]
114
115        for r, o in res:
116            out = re_replace_literals(r, {})
117            if o is None:
118                o = r
119            if isinstance(o, tuple):
120                assert out in o
121            else:
122                assert out == o
123
124    def test_construct_regexp_broken(self):
125        self.assertRaises(re.error, re_replace_literals, u"[", {})
126        self.assertRaises(NotImplementedError,
127                          re_replace_literals,
128                          u"(?P<quote>['\"]).*?(?P=quote)", {})
129
130    def test_seq(self):
131        assert re_add_variants(u"[x-y]") == u"[ẋẍýÿŷȳẏẙỳỵỷỹx-y]"
132        assert re_add_variants(u"[f-gm]") == u"[ḟꝼĝğġģǧǵḡᵹf-gmḿṁṃ]"
133        assert re_add_variants(u"[^m]") == u"[^mḿṁṃ]"
134        assert re_add_variants(u"[^m-m\\w]") == u"[^ḿṁṃm-m\\w]"
135        assert re_add_variants(u"[^m-m]") == "[^ḿṁṃm-m]"
136        assert re_add_variants(u"[^ö]") == u"[^ö]"
137        assert re_add_variants(u"[LLL]") == u"[LĹĻĽḶḸḺḼŁ]"
138
139    def test_literal(self):
140        assert re_add_variants(u"f") == u"[fḟꝼ]"
141        assert u"ø" in re_add_variants(u"o")
142        assert u"Ø" in re_add_variants(u"O")
143        assert re_add_variants(u"[^f]") == u"[^fḟꝼ]"
144
145
146class TCompileMatch(TestCase):
147
148    def test_basics_default(self):
149        assert compile(u"foo")(u"foo")
150        assert compile(u"foo")(u"fooo")
151        assert not compile(u"foo")(u"fo")
152
153    def test_ignore_case(self):
154        assert compile(u"foo", ignore_case=True)(u"Foo")
155        assert not compile(u"foo", ignore_case=False)(u"Foo")
156
157    def test_assert_dot_all(self):
158        assert compile(u"a.b", dot_all=True)(u"a\nb")
159        assert not compile(u"a.b", dot_all=False)(u"a\nb")
160        assert compile(u"a.b", dot_all=False)(u"a b")
161
162    def test_unicode_equivalence(self):
163        assert compile(u"\u212B")(u"\u00C5")
164        assert compile(u"\u00C5")(u"\u212B")
165        assert compile(u"A\u030a")(u"\u00C5")
166        assert compile(u"A\u030a")(u"\u212B")
167        assert compile(u"o\u0308")(u"o\u0308")
168        assert compile(u"o\u0308")(u"\xf6")
169        assert compile(u"\xf6")(u"o\u0308")
170
171    def test_assert_asym(self):
172        assert compile(u"o", asym=True)(u"ö")
173        assert not compile(u"o", asym=False)(u"ö")
174
175    def test_assert_asym_unicode_equivalence(self):
176        assert compile(u"A", asym=True)(u"\u00C5")
177        assert compile(u"A\u030a", asym=True)(u"\u212B")
178        assert compile(u"\u00C5", asym=True)(u"\u212B")
179        assert compile(u"\u212B", asym=True)(u"\u00C5")
180
181    def test_invalid(self):
182        with self.assertRaises(ValueError):
183            compile(u"(F", asym=False)
184
185        with self.assertRaises(ValueError):
186            compile(u"(F", asym=True)
187