1# Simple test to ensure that we can load the xapian module and exercise basic 2# functionality successfully. 3# 4# Copyright (C) 2004,2005,2006,2007,2008,2010,2011,2012,2013,2014,2015,2016,2017,2019 Olly Betts 5# Copyright (C) 2007 Lemur Consulting Ltd 6# 7# This program is free software; you can redistribute it and/or 8# modify it under the terms of the GNU General Public License as 9# published by the Free Software Foundation; either version 2 of the 10# License, or (at your option) any later version. 11# 12# This program is distributed in the hope that it will be useful, 13# but WITHOUT ANY WARRANTY; without even the implied warranty of 14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15# GNU General Public License for more details. 16# 17# You should have received a copy of the GNU General Public License 18# along with this program; if not, write to the Free Software 19# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 20# USA 21 22import sys 23import re 24import xapian 25 26from testsuite import * 27 28mystemmers = set() 29mystemmer_id = 0 30# Stemmer which strips English vowels. 31class MyStemmer(xapian.StemImplementation): 32 def __init__(self): 33 global mystemmers 34 global mystemmer_id 35 super(MyStemmer, self).__init__() 36 mystemmers.add(mystemmer_id) 37 self._id = mystemmer_id 38 mystemmer_id += 1 39 40 def __call__(self, s): 41 return re.sub(br'[aeiou]', b'', s) 42 43 def __del__(self): 44 global mystemmers 45 if self._id not in mystemmers: 46 raise TestFail("MyStemmer #%d deleted more than once" % self._id) 47 mystemmers.remove(self._id) 48 49def test_all(): 50 # Test the version number reporting functions give plausible results. 51 v = "%d.%d.%d" % (xapian.major_version(), 52 xapian.minor_version(), 53 xapian.revision()) 54 v2 = xapian.version_string() 55 expect(v2, v, "Unexpected version output") 56 57 # A regexp check would be better, but seems to create a bogus "leak" of -1 58 # objects in Python 3. 59 expect(len(xapian.__version__.split('.')), 3, 'xapian.__version__ not X.Y.Z') 60 expect((xapian.__version__.split('.'))[0], '1', 'xapian.__version__ not "1.Y.Z"') 61 62 def access_cvar(): 63 res = xapian.cvar 64 print("Unhandled constants: ", res) 65 return res 66 67 # Check that SWIG isn't generating cvar (regression test for ticket#297). 68 # 69 # Python 3.5 generates a different exception message here to earlier 70 # versions, so we need a check which matches both. 71 expect_exception(AttributeError, 72 lambda msg: msg.find("has no attribute 'cvar'") != -1, 73 access_cvar) 74 75 stem = xapian.Stem(b"english") 76 expect(str(stem), "Xapian::Stem(english)", "Unexpected str(stem)") 77 78 doc = xapian.Document() 79 doc.set_data(b"a\0b") 80 if doc.get_data() == b"a": 81 raise TestFail("get_data+set_data truncates at a zero byte") 82 expect(doc.get_data(), b"a\0b", "get_data+set_data doesn't transparently handle a zero byte") 83 doc.set_data(b"is there anybody out there?") 84 doc.add_term(b"XYzzy") 85 doc.add_posting(stem(b"is"), 1) 86 doc.add_posting(stem(b"there"), 2) 87 doc.add_posting(stem(b"anybody"), 3) 88 doc.add_posting(stem(b"out"), 4) 89 doc.add_posting(stem(b"there"), 5) 90 91 db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY) 92 db.add_document(doc) 93 expect(db.get_doccount(), 1, "Unexpected db.get_doccount()") 94 terms = ["smoke", "test", "terms"] 95 expect_query(xapian.Query(xapian.Query.OP_OR, [t.encode('utf-8') for t in terms]), 96 "(smoke OR test OR terms)") 97 query1 = xapian.Query(xapian.Query.OP_PHRASE, (b"smoke", b"test", b"tuple")) 98 query2 = xapian.Query(xapian.Query.OP_XOR, (xapian.Query(b"smoke"), query1, b"string")) 99 expect_query(query1, "(smoke PHRASE 3 test PHRASE 3 tuple)") 100 expect_query(query2, "(smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string)") 101 subqs = ["a", "b"] 102 expect_query(xapian.Query(xapian.Query.OP_OR, [s.encode('utf-8') for s in subqs]), "(a OR b)") 103 expect_query(xapian.Query(xapian.Query.OP_VALUE_RANGE, 0, b'1', b'4'), 104 "VALUE_RANGE 0 1 4") 105 106 # Check database factory functions are wrapped as expected (or not wrapped 107 # in the first cases): 108 109 expect_exception(AttributeError, 110 lambda msg: msg.find("has no attribute 'open_stub'") != -1, 111 lambda : xapian.open_stub(b"nosuchdir/nosuchdb")) 112 expect_exception(AttributeError, 113 lambda msg: msg.find("has no attribute 'open_stub'") != -1, 114 lambda : xapian.open_stub(b"nosuchdir/nosuchdb", xapian.DB_OPEN)) 115 116 expect_exception(AttributeError, 117 lambda msg: msg.find("has no attribute 'chert_open'") != -1, 118 lambda : xapian.chert_open(b"nosuchdir/nosuchdb")) 119 expect_exception(AttributeError, 120 lambda msg: msg.find("has no attribute 'chert_open'") != -1, 121 lambda : xapian.chert_open(b"nosuchdir/nosuchdb", xapian.DB_CREATE)) 122 123 expect_exception(xapian.DatabaseNotFoundError, None, 124 lambda : xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_STUB)) 125 expect_exception(xapian.DatabaseNotFoundError, None, 126 lambda : xapian.WritableDatabase(b"nosuchdir/nosuchdb", xapian.DB_OPEN|xapian.DB_BACKEND_STUB)) 127 128 expect_exception(xapian.DatabaseNotFoundError, None, 129 lambda : xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_GLASS)) 130 expect_exception(xapian.DatabaseCreateError, None, 131 lambda : xapian.WritableDatabase(b"nosuchdir/nosuchdb", xapian.DB_CREATE|xapian.DB_BACKEND_GLASS)) 132 133 expect_exception(xapian.DatabaseNotFoundError, None, 134 lambda : xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_CHERT)) 135 expect_exception(xapian.DatabaseCreateError, None, 136 lambda : xapian.WritableDatabase(b"nosuchdir/nosuchdb", xapian.DB_CREATE|xapian.DB_BACKEND_CHERT)) 137 138 expect_exception(xapian.NetworkError, None, 139 xapian.remote_open, b"/bin/false", b"") 140 expect_exception(xapian.NetworkError, None, 141 xapian.remote_open_writable, b"/bin/false", b"") 142 143 expect_exception(xapian.NetworkError, None, 144 xapian.remote_open, b"127.0.0.1", 0, 1) 145 expect_exception(xapian.NetworkError, None, 146 xapian.remote_open_writable, b"127.0.0.1", 0, 1) 147 148 # Check wrapping of MatchAll and MatchNothing: 149 150 expect_query(xapian.Query.MatchAll, "<alldocuments>") 151 expect_query(xapian.Query.MatchNothing, "") 152 153 # Feature test for Query.__iter__ 154 term_count = 0 155 for term in query2: 156 term_count += 1 157 expect(term_count, 4, "Unexpected number of terms in query2") 158 159 enq = xapian.Enquire(db) 160 161 # Check Xapian::BAD_VALUENO is wrapped suitably. 162 enq.set_collapse_key(xapian.BAD_VALUENO) 163 164 enq.set_query(xapian.Query(xapian.Query.OP_OR, b"there", b"is")) 165 mset = enq.get_mset(0, 10) 166 expect(mset.size(), 1, "Unexpected mset.size()") 167 expect(len(mset), 1, "Unexpected mset.size()") 168 169 # Feature test for Enquire.matching_terms(docid) 170 term_count = 0 171 for term in enq.matching_terms(mset.get_hit(0)): 172 term_count += 1 173 expect(term_count, 2, "Unexpected number of matching terms") 174 175 # Feature test for MSet.__iter__ 176 msize = 0 177 for match in mset: 178 msize += 1 179 expect(msize, mset.size(), "Unexpected number of entries in mset") 180 181 terms = b" ".join(enq.matching_terms(mset.get_hit(0))) 182 expect(terms, b"is there", "Unexpected terms") 183 184 # Feature test for ESet.__iter__ 185 rset = xapian.RSet() 186 rset.add_document(1) 187 eset = enq.get_eset(10, rset) 188 term_count = 0 189 for term in eset: 190 term_count += 1 191 expect(term_count, 3, "Unexpected number of expand terms") 192 193 # Feature test for Database.__iter__ 194 term_count = 0 195 for term in db: 196 term_count += 1 197 expect(term_count, 5, "Unexpected number of terms in db") 198 199 # Feature test for Database.allterms 200 term_count = 0 201 for term in db.allterms(): 202 term_count += 1 203 expect(term_count, 5, "Unexpected number of terms in db.allterms") 204 205 # Feature test for Database.postlist 206 count = 0 207 for posting in db.postlist(b"there"): 208 count += 1 209 expect(count, 1, "Unexpected number of entries in db.postlist('there')") 210 211 # Feature test for Database.postlist with empty term (alldocspostlist) 212 count = 0 213 for posting in db.postlist(b""): 214 count += 1 215 expect(count, 1, "Unexpected number of entries in db.postlist('')") 216 217 # Feature test for Database.termlist 218 count = 0 219 for term in db.termlist(1): 220 count += 1 221 expect(count, 5, "Unexpected number of entries in db.termlist(1)") 222 223 # Feature test for Database.positionlist 224 count = 0 225 for term in db.positionlist(1, b"there"): 226 count += 1 227 expect(count, 2, "Unexpected number of entries in db.positionlist(1, 'there')") 228 229 # Feature test for Document.termlist 230 count = 0 231 for term in doc.termlist(): 232 count += 1 233 expect(count, 5, "Unexpected number of entries in doc.termlist()") 234 235 # Feature test for TermIter.skip_to 236 term = doc.termlist() 237 term.skip_to(b'n') 238 while True: 239 try: 240 x = next(term) 241 except StopIteration: 242 break 243 if x.term < b'n': 244 raise TestFail("TermIter.skip_to didn't skip term '%s'" % x.term.decode('utf-8')) 245 246 # Feature test for Document.values 247 count = 0 248 for term in list(doc.values()): 249 count += 1 250 expect(count, 0, "Unexpected number of entries in doc.values") 251 252 # Check exception handling for Xapian::DocNotFoundError 253 expect_exception(xapian.DocNotFoundError, "Docid 3 not found", db.get_document, 3) 254 255 # Check value of OP_ELITE_SET 256 expect(xapian.Query.OP_ELITE_SET, 10, "Unexpected value for OP_ELITE_SET") 257 258 # Feature test for MatchDecider 259 doc = xapian.Document() 260 doc.set_data(b"Two") 261 doc.add_posting(stem(b"out"), 1) 262 doc.add_posting(stem(b"outside"), 1) 263 doc.add_posting(stem(b"source"), 2) 264 doc.add_value(0, b"yes") 265 db.add_document(doc) 266 267 class testmatchdecider(xapian.MatchDecider): 268 def __call__(self, doc): 269 return doc.get_value(0) == b"yes" 270 271 query = xapian.Query(stem(b"out")) 272 enquire = xapian.Enquire(db) 273 enquire.set_query(query) 274 mset = enquire.get_mset(0, 10, None, testmatchdecider()) 275 expect(mset.size(), 1, "Unexpected number of documents returned by match decider") 276 expect(mset.get_docid(0), 2, "MatchDecider mset has wrong docid in") 277 278 # Feature test for ExpandDecider 279 class testexpanddecider(xapian.ExpandDecider): 280 def __call__(self, term): 281 return (not term.startswith(b'a')) 282 283 enquire = xapian.Enquire(db) 284 rset = xapian.RSet() 285 rset.add_document(1) 286 eset = enquire.get_eset(10, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, testexpanddecider()) 287 eset_terms = [item.term for item in eset] 288 expect(len(eset_terms), eset.size(), "Unexpected number of terms returned by expand") 289 if [t for t in eset_terms if t.startswith(b'a')]: 290 raise TestFail("ExpandDecider was not used") 291 292 # Check min_wt argument to get_eset() works (new in 1.2.5). 293 eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ) 294 expect([i.weight for i in eset][-1] < 1.9, True, "test get_eset() without min_wt") 295 eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, None, 1.9) 296 expect([i.weight for i in eset][-1] >= 1.9, True, "test get_eset() min_wt") 297 298 # Check QueryParser parsing error. 299 qp = xapian.QueryParser() 300 expect_exception(xapian.QueryParserError, "Syntax: <expression> AND <expression>", qp.parse_query, b"test AND") 301 302 # Check QueryParser pure NOT option 303 qp = xapian.QueryParser() 304 expect_query(qp.parse_query(b"NOT test", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), 305 "(<alldocuments> AND_NOT test@1)") 306 307 # Check QueryParser partial option 308 qp = xapian.QueryParser() 309 qp.set_database(db) 310 qp.set_default_op(xapian.Query.OP_AND) 311 qp.set_stemming_strategy(qp.STEM_SOME) 312 qp.set_stemmer(xapian.Stem(b'en')) 313 expect_query(qp.parse_query(b"foo o", qp.FLAG_PARTIAL), 314 "(Zfoo@1 AND (WILDCARD SYNONYM o OR Zo@2))") 315 316 expect_query(qp.parse_query(b"foo outside", qp.FLAG_PARTIAL), 317 "(Zfoo@1 AND (WILDCARD SYNONYM outside OR Zoutsid@2))") 318 319 # Test supplying unicode strings 320 expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar')), 321 '(foo OR bar)') 322 expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar\xa3')), 323 '(foo OR bar\\xa3)') 324 expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar\xc2\xa3')), 325 '(foo OR bar\u00a3)') 326 expect_query(xapian.Query(xapian.Query.OP_OR, b'foo', b'bar'), 327 '(foo OR bar)') 328 329 expect_query(qp.parse_query(b"NOT t\xe9st", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT), 330 "(<alldocuments> AND_NOT Zt\u00e9st@1)") 331 332 doc = xapian.Document() 333 doc.set_data(b"Unicode with an acc\xe9nt") 334 doc.add_posting(stem(b"out\xe9r"), 1) 335 expect(doc.get_data(), b"Unicode with an acc\xe9nt") 336 term = next(doc.termlist()).term 337 expect(term, b"out\xe9r") 338 339 # Check simple stopper 340 stop = xapian.SimpleStopper() 341 qp.set_stopper(stop) 342 expect(stop(b'a'), False) 343 expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN), 344 "(Zfoo@1 AND Zbar@2 AND Za@3)") 345 346 stop.add(b'a') 347 expect(stop(b'a'), True) 348 expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN), 349 "(Zfoo@1 AND Zbar@2)") 350 351 # Feature test for custom Stopper 352 class my_b_stopper(xapian.Stopper): 353 def __call__(self, term): 354 return term == b"b" 355 356 def get_description(self): 357 return "my_b_stopper" 358 359 stop = my_b_stopper() 360 expect(stop.get_description(), "my_b_stopper") 361 qp.set_stopper(stop) 362 expect(stop(b'a'), False) 363 expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN), 364 "(Zfoo@1 AND Zbar@2 AND Za@3)") 365 366 expect(stop(b'b'), True) 367 expect_query(qp.parse_query(b"foo bar b", qp.FLAG_BOOLEAN), 368 "(Zfoo@1 AND Zbar@2)") 369 370 # Test TermGenerator 371 termgen = xapian.TermGenerator() 372 doc = xapian.Document() 373 termgen.set_document(doc) 374 termgen.index_text(b'foo bar baz foo') 375 expect([(item.term, item.wdf, [pos for pos in item.positer]) for item in doc.termlist()], [(b'bar', 1, [2]), (b'baz', 1, [3]), (b'foo', 2, [1, 4])]) 376 377 378 # Check DateValueRangeProcessor works 379 context("checking that DateValueRangeProcessor works") 380 qp = xapian.QueryParser() 381 vrpdate = xapian.DateValueRangeProcessor(1, 1, 1960) 382 qp.add_valuerangeprocessor(vrpdate) 383 query = qp.parse_query(b'12/03/99..12/04/01') 384 expect(str(query), 'Query(VALUE_RANGE 1 19991203 20011204)') 385 386 # Regression test for bug#193, fixed in 1.0.3. 387 context("running regression test for bug#193") 388 vrp = xapian.NumberValueRangeProcessor(0, b'$', True) 389 a = '$10' 390 b = '20' 391 slot, a, b = vrp(a, b.encode('utf-8')) 392 expect(slot, 0) 393 expect(xapian.sortable_unserialise(a), 10) 394 expect(xapian.sortable_unserialise(b), 20) 395 396 # Feature test for xapian.FieldProcessor 397 context("running feature test for xapian.FieldProcessor") 398 class testfieldprocessor(xapian.FieldProcessor): 399 def __call__(self, s): 400 if s == 'spam': 401 raise Exception('already spam') 402 return xapian.Query("spam") 403 404 qp.add_prefix('spam', testfieldprocessor()) 405 qp.add_boolean_prefix('boolspam', testfieldprocessor()) 406 qp.add_boolean_prefix('boolspam2', testfieldprocessor(), False) # Old-style 407 qp.add_boolean_prefix('boolspam3', testfieldprocessor(), '') 408 qp.add_boolean_prefix('boolspam4', testfieldprocessor(), 'group') 409 qp.add_boolean_prefix('boolspam5', testfieldprocessor(), None) 410 query = qp.parse_query('spam:ignored') 411 expect(str(query), 'Query(spam)') 412 413 # FIXME: This doesn't currently work: 414 # expect_exception(Exception, 'already spam', qp.parse_query, 'spam:spam') 415 416 # Regression tests copied from PHP (probably always worked in python, but 417 # let's check...) 418 context("running regression tests for issues which were found in PHP") 419 420 # PHP overload resolution involving boolean types failed. 421 enq.set_sort_by_value(1, True) 422 423 # Regression test - fixed in 0.9.10.1. 424 oqparser = xapian.QueryParser() 425 oquery = oqparser.parse_query(b"I like tea") 426 427 # Regression test for bug fixed in 1.4.4: 428 # https://bugs.debian.org/849722 429 oqparser.add_boolean_prefix('tag', 'K', '') 430 # Make sure other cases also work: 431 oqparser.add_boolean_prefix('zag', 'XR', False) # Old-style 432 oqparser.add_boolean_prefix('rag', 'XR', None) 433 oqparser.add_boolean_prefix('nag', 'XB', '') 434 oqparser.add_boolean_prefix('bag', 'XB', 'blergh') 435 oqparser.add_boolean_prefix('jag', 'XB', b'blergh') 436 437 # Regression test for bug#192 - fixed in 1.0.3. 438 enq.set_cutoff(100) 439 440 # Test setting and getting metadata 441 expect(db.get_metadata(b'Foo'), b'') 442 db.set_metadata(b'Foo', b'Foo') 443 expect(db.get_metadata(b'Foo'), b'Foo') 444 expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, b'') 445 expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.set_metadata, b'', b'Foo') 446 expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, b'') 447 448 # Test OP_SCALE_WEIGHT and corresponding constructor 449 expect_query(xapian.Query(xapian.Query.OP_SCALE_WEIGHT, xapian.Query(b'foo'), 5), 450 "5 * foo") 451 452def test_userstem(): 453 mystem = MyStemmer() 454 stem = xapian.Stem(mystem) 455 expect(stem(b'test'), b'tst') 456 stem2 = xapian.Stem(mystem) 457 expect(stem2(b'toastie'), b'tst') 458 459 indexer = xapian.TermGenerator() 460 indexer.set_stemmer(xapian.Stem(MyStemmer())) 461 462 doc = xapian.Document() 463 indexer.set_document(doc) 464 indexer.index_text(b'hello world') 465 466 s = '/' 467 for t in doc.termlist(): 468 s += t.term.decode('utf-8') 469 s += '/' 470 expect(s, '/Zhll/Zwrld/hello/world/') 471 472 parser = xapian.QueryParser() 473 parser.set_stemmer(xapian.Stem(MyStemmer())) 474 parser.set_stemming_strategy(xapian.QueryParser.STEM_ALL) 475 expect_query(parser.parse_query(b'color television'), '(clr@1 OR tlvsn@2)') 476 477def test_internal_enums_not_wrapped(): 478 leaf_constants = [c for c in dir(xapian.Query) if c.startswith('LEAF_')] 479 expect(leaf_constants, []) 480 481def test_internals_not_wrapped(): 482 internals = [] 483 for c in dir(xapian): 484 # Skip Python stuff like __file__ and __version__. 485 if c.startswith('__'): continue 486 if c.endswith('_'): internals.append(c) 487 # Skip non-classes 488 if not c[0].isupper(): continue 489 cls = eval('xapian.' + c) 490 if type(cls) != type(object): continue 491 for m in dir(cls): 492 if m.startswith('__'): continue 493 if m.endswith('_'): internals.append(c + '.' + m) 494 495 expect(internals, []) 496 497def test_zz9_check_leaks(): 498 import gc 499 gc.collect() 500 if len(mystemmers): 501 raise TestFail("%d MyStemmer objects not deleted" % len(mystemmers)) 502 503# Run all tests (ie, callables with names starting "test_"). 504if not runtests(globals()): 505 sys.exit(1) 506 507# vim:syntax=python:set expandtab: 508