1# Copyright 2014 Christoph Reiter 2# 3# This program is free software; you can redistribute it and/or modify 4# it under the terms of the GNU General Public License as published by 5# the Free Software Foundation; either version 2 of the License, or 6# (at your option) any later version. 7 8import re 9import unicodedata 10 11from tests import TestCase 12 13from quodlibet.unisearch import compile 14from quodlibet.unisearch.db import diacritic_for_letters 15from quodlibet.unisearch.parser import re_replace_literals, re_add_variants 16 17 18class TUniSearch(TestCase): 19 20 def test_mapping(self): 21 cache = diacritic_for_letters(False) 22 new = diacritic_for_letters(True) 23 self.assertEqual(sorted(cache.items()), sorted(new.items())) 24 25 def test_normalize_input(self): 26 assert re.match( 27 re_add_variants(unicodedata.normalize("NFD", u"ö")), u"ö") 28 29 def test_re_replace(self): 30 r = re_add_variants(u"aa") 31 self.assertTrue(u"[" in r and u"]" in r and r.count(u"ä") == 2) 32 33 def test_re_replace_multi(self): 34 r = re_add_variants(u"ae") 35 self.assertEqual(r, u"(?:[aàáâãäåāăąǎǟǡǻȁȃȧḁạảấầẩẫậắằẳẵặ]" 36 u"[eèéêëēĕėęěȅȇȩḕḗḙḛḝẹẻẽếềểễệ]|[æǣǽ])") 37 38 r = re_add_variants(u"SS") 39 self.assertEqual(r, u"(?:[SŚŜŞŠȘṠṢṤṦṨꞄ][SŚŜŞŠȘṠṢṤṦṨꞄ]|ẞ)") 40 41 r = re_add_variants(u"ss") 42 self.assertEqual(r, u"(?:[sśŝşšșṡṣṥṧṩꞅ][sśŝşšșṡṣṥṧṩꞅ]|ß)") 43 44 def test_punct(self): 45 r = re_add_variants(u"'") 46 assert "`" in r 47 assert "'" in r 48 r = re_add_variants(u"''") 49 assert "\"" in r 50 r = re_add_variants(u'"') 51 assert "”" in r 52 assert "“" in r 53 r = re_add_variants(u'\\*') 54 assert re.match(r, "*") 55 56 def test_re_replace_multi_fixme(self): 57 # we don't handler overlapping sequences, so this doesn't match "LỺ" 58 r = re_add_variants(u"LLL") 59 self.assertEqual(r, u"(?:[LĹĻĽḶḸḺḼŁ][LĹĻĽḶḸḺḼŁ]|Ỻ)[LĹĻĽḶḸḺḼŁ]") 60 61 def test_re_replace_multi_nested(self): 62 r = re_add_variants(u"(եւ)") 63 self.assertEqual(r, u"((?:եւ|և))") 64 r = re_add_variants(u"(եւ)+") 65 self.assertEqual(r, u"((?:եւ|և))+") 66 67 def test_re_replace_escape(self): 68 r = re_add_variants(u"n\\n") 69 self.assertEqual(r, u"[nñńņňǹṅṇṉṋʼn]\n") 70 71 def test_construct_regexp(self): 72 res = [ 73 (u"\\.", None), 74 (u"..", None), 75 (u"\\.", None), 76 (u"^a\aa[ha-z]k{1,3}h*h+h?(x|yy)(a+b|cd)$", None), 77 (u"(?=Asimov)", None), 78 (u"(?!Asimov)", None), 79 (u"(?<=abc)def", None), 80 (u"(?<!foo)", None), 81 (u"(?#foo)", u""), 82 (u"(.+) \1", None), 83 (u"\\A\\b\\B\\d\\D\\s\\S\\w\\W\\Z\a", 84 u"\\A\\b\\B[\\d][\\D][\\s][\\S][\\w][\\W]\\Z\a"), 85 (u"a{3,5}?a+?a*?a??", None), 86 (u"^foo$", None), 87 (u"[-+]?(\\d+(\\.\\d*)?|\\.\\d+)([eE][-+]?\\d+)?", 88 u"[\\-\\+]?([\\d]+(\\.[\\d]*)?|\\.[\\d]+)([eE][\\-\\+]?[\\d]+)?"), 89 (u"(\\$\\d*)", u"(\\$[\\d]*)"), 90 (u"\\$\\.\\^\\[\\]\\:\\-\\+\\?\\\\", None), 91 (u"[^a][^ab]", None), 92 (u"[ab][abc]", None), 93 (u"[.]", u"\\."), 94 (u"[^a-z]", None), 95 (u"[^a-z\\w]", None), 96 ("(x|yy)", None), 97 ] 98 99 for r, o in res: 100 if o is None: 101 o = r 102 self.assertEqual(re_replace_literals(r, {}), o) 103 104 def test_construct_regexp_37_change(self): 105 # Starting with 3.7 the parser throws out some subpattern 106 # nodes. We try to recover them or test against the old and new result. 107 res = [ 108 (u"(?:foo)", ("(?:foo)", "foo")), 109 (u"(?:foo)x", ("(?:foo)x", "foox")), 110 (u"(?:foo)(?:bar)", ("(?:foo)(?:bar)", "foobar")), 111 (u"(?:foo|bla)", None), 112 (u"(?:foo|bla)x", None), 113 ] 114 115 for r, o in res: 116 out = re_replace_literals(r, {}) 117 if o is None: 118 o = r 119 if isinstance(o, tuple): 120 assert out in o 121 else: 122 assert out == o 123 124 def test_construct_regexp_broken(self): 125 self.assertRaises(re.error, re_replace_literals, u"[", {}) 126 self.assertRaises(NotImplementedError, 127 re_replace_literals, 128 u"(?P<quote>['\"]).*?(?P=quote)", {}) 129 130 def test_seq(self): 131 assert re_add_variants(u"[x-y]") == u"[ẋẍýÿŷȳẏẙỳỵỷỹx-y]" 132 assert re_add_variants(u"[f-gm]") == u"[ḟꝼĝğġģǧǵḡᵹf-gmḿṁṃ]" 133 assert re_add_variants(u"[^m]") == u"[^mḿṁṃ]" 134 assert re_add_variants(u"[^m-m\\w]") == u"[^ḿṁṃm-m\\w]" 135 assert re_add_variants(u"[^m-m]") == "[^ḿṁṃm-m]" 136 assert re_add_variants(u"[^ö]") == u"[^ö]" 137 assert re_add_variants(u"[LLL]") == u"[LĹĻĽḶḸḺḼŁ]" 138 139 def test_literal(self): 140 assert re_add_variants(u"f") == u"[fḟꝼ]" 141 assert u"ø" in re_add_variants(u"o") 142 assert u"Ø" in re_add_variants(u"O") 143 assert re_add_variants(u"[^f]") == u"[^fḟꝼ]" 144 145 146class TCompileMatch(TestCase): 147 148 def test_basics_default(self): 149 assert compile(u"foo")(u"foo") 150 assert compile(u"foo")(u"fooo") 151 assert not compile(u"foo")(u"fo") 152 153 def test_ignore_case(self): 154 assert compile(u"foo", ignore_case=True)(u"Foo") 155 assert not compile(u"foo", ignore_case=False)(u"Foo") 156 157 def test_assert_dot_all(self): 158 assert compile(u"a.b", dot_all=True)(u"a\nb") 159 assert not compile(u"a.b", dot_all=False)(u"a\nb") 160 assert compile(u"a.b", dot_all=False)(u"a b") 161 162 def test_unicode_equivalence(self): 163 assert compile(u"\u212B")(u"\u00C5") 164 assert compile(u"\u00C5")(u"\u212B") 165 assert compile(u"A\u030a")(u"\u00C5") 166 assert compile(u"A\u030a")(u"\u212B") 167 assert compile(u"o\u0308")(u"o\u0308") 168 assert compile(u"o\u0308")(u"\xf6") 169 assert compile(u"\xf6")(u"o\u0308") 170 171 def test_assert_asym(self): 172 assert compile(u"o", asym=True)(u"ö") 173 assert not compile(u"o", asym=False)(u"ö") 174 175 def test_assert_asym_unicode_equivalence(self): 176 assert compile(u"A", asym=True)(u"\u00C5") 177 assert compile(u"A\u030a", asym=True)(u"\u212B") 178 assert compile(u"\u00C5", asym=True)(u"\u212B") 179 assert compile(u"\u212B", asym=True)(u"\u00C5") 180 181 def test_invalid(self): 182 with self.assertRaises(ValueError): 183 compile(u"(F", asym=False) 184 185 with self.assertRaises(ValueError): 186 compile(u"(F", asym=True) 187