1# Simple test to ensure that we can load the xapian module and exercise basic
2# functionality successfully.
3#
4# Copyright (C) 2004,2005,2006,2007,2008,2010,2011,2012,2013,2014,2015,2016,2017,2019 Olly Betts
5# Copyright (C) 2007 Lemur Consulting Ltd
6#
7# This program is free software; you can redistribute it and/or
8# modify it under the terms of the GNU General Public License as
9# published by the Free Software Foundation; either version 2 of the
10# License, or (at your option) any later version.
11#
12# This program is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with this program; if not, write to the Free Software
19# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
20# USA
21
22import sys
23import re
24import xapian
25
26from testsuite import *
27
28mystemmers = set()
29mystemmer_id = 0
30# Stemmer which strips English vowels.
31class MyStemmer(xapian.StemImplementation):
32    def __init__(self):
33        global mystemmers
34        global mystemmer_id
35        super(MyStemmer, self).__init__()
36        mystemmers.add(mystemmer_id)
37        self._id = mystemmer_id
38        mystemmer_id += 1
39
40    def __call__(self, s):
41        return re.sub(br'[aeiou]', b'', s)
42
43    def __del__(self):
44        global mystemmers
45        if self._id not in mystemmers:
46            raise TestFail("MyStemmer #%d deleted more than once" % self._id)
47        mystemmers.remove(self._id)
48
49def test_all():
50    # Test the version number reporting functions give plausible results.
51    v = "%d.%d.%d" % (xapian.major_version(),
52                      xapian.minor_version(),
53                      xapian.revision())
54    v2 = xapian.version_string()
55    expect(v2, v, "Unexpected version output")
56
57    # A regexp check would be better, but seems to create a bogus "leak" of -1
58    # objects in Python 3.
59    expect(len(xapian.__version__.split('.')), 3, 'xapian.__version__ not X.Y.Z')
60    expect((xapian.__version__.split('.'))[0], '1', 'xapian.__version__ not "1.Y.Z"')
61
62    def access_cvar():
63        res = xapian.cvar
64        print("Unhandled constants: ", res)
65        return res
66
67    # Check that SWIG isn't generating cvar (regression test for ticket#297).
68    #
69    # Python 3.5 generates a different exception message here to earlier
70    # versions, so we need a check which matches both.
71    expect_exception(AttributeError,
72                     lambda msg: msg.find("has no attribute 'cvar'") != -1,
73                     access_cvar)
74
75    stem = xapian.Stem(b"english")
76    expect(str(stem), "Xapian::Stem(english)", "Unexpected str(stem)")
77
78    doc = xapian.Document()
79    doc.set_data(b"a\0b")
80    if doc.get_data() == b"a":
81        raise TestFail("get_data+set_data truncates at a zero byte")
82    expect(doc.get_data(), b"a\0b", "get_data+set_data doesn't transparently handle a zero byte")
83    doc.set_data(b"is there anybody out there?")
84    doc.add_term(b"XYzzy")
85    doc.add_posting(stem(b"is"), 1)
86    doc.add_posting(stem(b"there"), 2)
87    doc.add_posting(stem(b"anybody"), 3)
88    doc.add_posting(stem(b"out"), 4)
89    doc.add_posting(stem(b"there"), 5)
90
91    db = xapian.WritableDatabase('', xapian.DB_BACKEND_INMEMORY)
92    db.add_document(doc)
93    expect(db.get_doccount(), 1, "Unexpected db.get_doccount()")
94    terms = ["smoke", "test", "terms"]
95    expect_query(xapian.Query(xapian.Query.OP_OR, [t.encode('utf-8') for t in terms]),
96                 "(smoke OR test OR terms)")
97    query1 = xapian.Query(xapian.Query.OP_PHRASE, (b"smoke", b"test", b"tuple"))
98    query2 = xapian.Query(xapian.Query.OP_XOR, (xapian.Query(b"smoke"), query1, b"string"))
99    expect_query(query1, "(smoke PHRASE 3 test PHRASE 3 tuple)")
100    expect_query(query2, "(smoke XOR (smoke PHRASE 3 test PHRASE 3 tuple) XOR string)")
101    subqs = ["a", "b"]
102    expect_query(xapian.Query(xapian.Query.OP_OR, [s.encode('utf-8') for s in subqs]), "(a OR b)")
103    expect_query(xapian.Query(xapian.Query.OP_VALUE_RANGE, 0, b'1', b'4'),
104                 "VALUE_RANGE 0 1 4")
105
106    # Check database factory functions are wrapped as expected (or not wrapped
107    # in the first cases):
108
109    expect_exception(AttributeError,
110            lambda msg: msg.find("has no attribute 'open_stub'") != -1,
111            lambda : xapian.open_stub(b"nosuchdir/nosuchdb"))
112    expect_exception(AttributeError,
113            lambda msg: msg.find("has no attribute 'open_stub'") != -1,
114            lambda : xapian.open_stub(b"nosuchdir/nosuchdb", xapian.DB_OPEN))
115
116    expect_exception(AttributeError,
117            lambda msg: msg.find("has no attribute 'chert_open'") != -1,
118            lambda : xapian.chert_open(b"nosuchdir/nosuchdb"))
119    expect_exception(AttributeError,
120            lambda msg: msg.find("has no attribute 'chert_open'") != -1,
121            lambda : xapian.chert_open(b"nosuchdir/nosuchdb", xapian.DB_CREATE))
122
123    expect_exception(xapian.DatabaseNotFoundError, None,
124            lambda : xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_STUB))
125    expect_exception(xapian.DatabaseNotFoundError, None,
126            lambda : xapian.WritableDatabase(b"nosuchdir/nosuchdb", xapian.DB_OPEN|xapian.DB_BACKEND_STUB))
127
128    expect_exception(xapian.DatabaseNotFoundError, None,
129            lambda : xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_GLASS))
130    expect_exception(xapian.DatabaseCreateError, None,
131            lambda : xapian.WritableDatabase(b"nosuchdir/nosuchdb", xapian.DB_CREATE|xapian.DB_BACKEND_GLASS))
132
133    expect_exception(xapian.DatabaseNotFoundError, None,
134            lambda : xapian.Database(b"nosuchdir/nosuchdb", xapian.DB_BACKEND_CHERT))
135    expect_exception(xapian.DatabaseCreateError, None,
136            lambda : xapian.WritableDatabase(b"nosuchdir/nosuchdb", xapian.DB_CREATE|xapian.DB_BACKEND_CHERT))
137
138    expect_exception(xapian.NetworkError, None,
139                     xapian.remote_open, b"/bin/false", b"")
140    expect_exception(xapian.NetworkError, None,
141                     xapian.remote_open_writable, b"/bin/false", b"")
142
143    expect_exception(xapian.NetworkError, None,
144                     xapian.remote_open, b"127.0.0.1", 0, 1)
145    expect_exception(xapian.NetworkError, None,
146                     xapian.remote_open_writable, b"127.0.0.1", 0, 1)
147
148    # Check wrapping of MatchAll and MatchNothing:
149
150    expect_query(xapian.Query.MatchAll, "<alldocuments>")
151    expect_query(xapian.Query.MatchNothing, "")
152
153    # Feature test for Query.__iter__
154    term_count = 0
155    for term in query2:
156        term_count += 1
157    expect(term_count, 4, "Unexpected number of terms in query2")
158
159    enq = xapian.Enquire(db)
160
161    # Check Xapian::BAD_VALUENO is wrapped suitably.
162    enq.set_collapse_key(xapian.BAD_VALUENO)
163
164    enq.set_query(xapian.Query(xapian.Query.OP_OR, b"there", b"is"))
165    mset = enq.get_mset(0, 10)
166    expect(mset.size(), 1, "Unexpected mset.size()")
167    expect(len(mset), 1, "Unexpected mset.size()")
168
169    # Feature test for Enquire.matching_terms(docid)
170    term_count = 0
171    for term in enq.matching_terms(mset.get_hit(0)):
172        term_count += 1
173    expect(term_count, 2, "Unexpected number of matching terms")
174
175    # Feature test for MSet.__iter__
176    msize = 0
177    for match in mset:
178        msize += 1
179    expect(msize, mset.size(), "Unexpected number of entries in mset")
180
181    terms = b" ".join(enq.matching_terms(mset.get_hit(0)))
182    expect(terms, b"is there", "Unexpected terms")
183
184    # Feature test for ESet.__iter__
185    rset = xapian.RSet()
186    rset.add_document(1)
187    eset = enq.get_eset(10, rset)
188    term_count = 0
189    for term in eset:
190        term_count += 1
191    expect(term_count, 3, "Unexpected number of expand terms")
192
193    # Feature test for Database.__iter__
194    term_count = 0
195    for term in db:
196        term_count += 1
197    expect(term_count, 5, "Unexpected number of terms in db")
198
199    # Feature test for Database.allterms
200    term_count = 0
201    for term in db.allterms():
202        term_count += 1
203    expect(term_count, 5, "Unexpected number of terms in db.allterms")
204
205    # Feature test for Database.postlist
206    count = 0
207    for posting in db.postlist(b"there"):
208        count += 1
209    expect(count, 1, "Unexpected number of entries in db.postlist('there')")
210
211    # Feature test for Database.postlist with empty term (alldocspostlist)
212    count = 0
213    for posting in db.postlist(b""):
214        count += 1
215    expect(count, 1, "Unexpected number of entries in db.postlist('')")
216
217    # Feature test for Database.termlist
218    count = 0
219    for term in db.termlist(1):
220        count += 1
221    expect(count, 5, "Unexpected number of entries in db.termlist(1)")
222
223    # Feature test for Database.positionlist
224    count = 0
225    for term in db.positionlist(1, b"there"):
226        count += 1
227    expect(count, 2, "Unexpected number of entries in db.positionlist(1, 'there')")
228
229    # Feature test for Document.termlist
230    count = 0
231    for term in doc.termlist():
232        count += 1
233    expect(count, 5, "Unexpected number of entries in doc.termlist()")
234
235    # Feature test for TermIter.skip_to
236    term = doc.termlist()
237    term.skip_to(b'n')
238    while True:
239        try:
240            x = next(term)
241        except StopIteration:
242            break
243        if x.term < b'n':
244            raise TestFail("TermIter.skip_to didn't skip term '%s'" % x.term.decode('utf-8'))
245
246    # Feature test for Document.values
247    count = 0
248    for term in list(doc.values()):
249        count += 1
250    expect(count, 0, "Unexpected number of entries in doc.values")
251
252    # Check exception handling for Xapian::DocNotFoundError
253    expect_exception(xapian.DocNotFoundError, "Docid 3 not found", db.get_document, 3)
254
255    # Check value of OP_ELITE_SET
256    expect(xapian.Query.OP_ELITE_SET, 10, "Unexpected value for OP_ELITE_SET")
257
258    # Feature test for MatchDecider
259    doc = xapian.Document()
260    doc.set_data(b"Two")
261    doc.add_posting(stem(b"out"), 1)
262    doc.add_posting(stem(b"outside"), 1)
263    doc.add_posting(stem(b"source"), 2)
264    doc.add_value(0, b"yes")
265    db.add_document(doc)
266
267    class testmatchdecider(xapian.MatchDecider):
268        def __call__(self, doc):
269            return doc.get_value(0) == b"yes"
270
271    query = xapian.Query(stem(b"out"))
272    enquire = xapian.Enquire(db)
273    enquire.set_query(query)
274    mset = enquire.get_mset(0, 10, None, testmatchdecider())
275    expect(mset.size(), 1, "Unexpected number of documents returned by match decider")
276    expect(mset.get_docid(0), 2, "MatchDecider mset has wrong docid in")
277
278    # Feature test for ExpandDecider
279    class testexpanddecider(xapian.ExpandDecider):
280        def __call__(self, term):
281            return (not term.startswith(b'a'))
282
283    enquire = xapian.Enquire(db)
284    rset = xapian.RSet()
285    rset.add_document(1)
286    eset = enquire.get_eset(10, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, testexpanddecider())
287    eset_terms = [item.term for item in eset]
288    expect(len(eset_terms), eset.size(), "Unexpected number of terms returned by expand")
289    if [t for t in eset_terms if t.startswith(b'a')]:
290        raise TestFail("ExpandDecider was not used")
291
292    # Check min_wt argument to get_eset() works (new in 1.2.5).
293    eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ)
294    expect([i.weight for i in eset][-1] < 1.9, True, "test get_eset() without min_wt")
295    eset = enquire.get_eset(100, rset, xapian.Enquire.USE_EXACT_TERMFREQ, 1.0, None, 1.9)
296    expect([i.weight for i in eset][-1] >= 1.9, True, "test get_eset() min_wt")
297
298    # Check QueryParser parsing error.
299    qp = xapian.QueryParser()
300    expect_exception(xapian.QueryParserError, "Syntax: <expression> AND <expression>", qp.parse_query, b"test AND")
301
302    # Check QueryParser pure NOT option
303    qp = xapian.QueryParser()
304    expect_query(qp.parse_query(b"NOT test", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT),
305                 "(<alldocuments> AND_NOT test@1)")
306
307    # Check QueryParser partial option
308    qp = xapian.QueryParser()
309    qp.set_database(db)
310    qp.set_default_op(xapian.Query.OP_AND)
311    qp.set_stemming_strategy(qp.STEM_SOME)
312    qp.set_stemmer(xapian.Stem(b'en'))
313    expect_query(qp.parse_query(b"foo o", qp.FLAG_PARTIAL),
314                 "(Zfoo@1 AND (WILDCARD SYNONYM o OR Zo@2))")
315
316    expect_query(qp.parse_query(b"foo outside", qp.FLAG_PARTIAL),
317                 "(Zfoo@1 AND (WILDCARD SYNONYM outside OR Zoutsid@2))")
318
319    # Test supplying unicode strings
320    expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar')),
321                 '(foo OR bar)')
322    expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar\xa3')),
323                 '(foo OR bar\\xa3)')
324    expect_query(xapian.Query(xapian.Query.OP_OR, (b'foo', b'bar\xc2\xa3')),
325                 '(foo OR bar\u00a3)')
326    expect_query(xapian.Query(xapian.Query.OP_OR, b'foo', b'bar'),
327                 '(foo OR bar)')
328
329    expect_query(qp.parse_query(b"NOT t\xe9st", qp.FLAG_BOOLEAN + qp.FLAG_PURE_NOT),
330                 "(<alldocuments> AND_NOT Zt\u00e9st@1)")
331
332    doc = xapian.Document()
333    doc.set_data(b"Unicode with an acc\xe9nt")
334    doc.add_posting(stem(b"out\xe9r"), 1)
335    expect(doc.get_data(), b"Unicode with an acc\xe9nt")
336    term = next(doc.termlist()).term
337    expect(term, b"out\xe9r")
338
339    # Check simple stopper
340    stop = xapian.SimpleStopper()
341    qp.set_stopper(stop)
342    expect(stop(b'a'), False)
343    expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN),
344                 "(Zfoo@1 AND Zbar@2 AND Za@3)")
345
346    stop.add(b'a')
347    expect(stop(b'a'), True)
348    expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN),
349                 "(Zfoo@1 AND Zbar@2)")
350
351    # Feature test for custom Stopper
352    class my_b_stopper(xapian.Stopper):
353        def __call__(self, term):
354            return term == b"b"
355
356        def get_description(self):
357            return "my_b_stopper"
358
359    stop = my_b_stopper()
360    expect(stop.get_description(), "my_b_stopper")
361    qp.set_stopper(stop)
362    expect(stop(b'a'), False)
363    expect_query(qp.parse_query(b"foo bar a", qp.FLAG_BOOLEAN),
364                 "(Zfoo@1 AND Zbar@2 AND Za@3)")
365
366    expect(stop(b'b'), True)
367    expect_query(qp.parse_query(b"foo bar b", qp.FLAG_BOOLEAN),
368                 "(Zfoo@1 AND Zbar@2)")
369
370    # Test TermGenerator
371    termgen = xapian.TermGenerator()
372    doc = xapian.Document()
373    termgen.set_document(doc)
374    termgen.index_text(b'foo bar baz foo')
375    expect([(item.term, item.wdf, [pos for pos in item.positer]) for item in doc.termlist()], [(b'bar', 1, [2]), (b'baz', 1, [3]), (b'foo', 2, [1, 4])])
376
377
378    # Check DateValueRangeProcessor works
379    context("checking that DateValueRangeProcessor works")
380    qp = xapian.QueryParser()
381    vrpdate = xapian.DateValueRangeProcessor(1, 1, 1960)
382    qp.add_valuerangeprocessor(vrpdate)
383    query = qp.parse_query(b'12/03/99..12/04/01')
384    expect(str(query), 'Query(VALUE_RANGE 1 19991203 20011204)')
385
386    # Regression test for bug#193, fixed in 1.0.3.
387    context("running regression test for bug#193")
388    vrp = xapian.NumberValueRangeProcessor(0, b'$', True)
389    a = '$10'
390    b = '20'
391    slot, a, b = vrp(a, b.encode('utf-8'))
392    expect(slot, 0)
393    expect(xapian.sortable_unserialise(a), 10)
394    expect(xapian.sortable_unserialise(b), 20)
395
396    # Feature test for xapian.FieldProcessor
397    context("running feature test for xapian.FieldProcessor")
398    class testfieldprocessor(xapian.FieldProcessor):
399        def __call__(self, s):
400            if s == 'spam':
401                raise Exception('already spam')
402            return xapian.Query("spam")
403
404    qp.add_prefix('spam', testfieldprocessor())
405    qp.add_boolean_prefix('boolspam', testfieldprocessor())
406    qp.add_boolean_prefix('boolspam2', testfieldprocessor(), False) # Old-style
407    qp.add_boolean_prefix('boolspam3', testfieldprocessor(), '')
408    qp.add_boolean_prefix('boolspam4', testfieldprocessor(), 'group')
409    qp.add_boolean_prefix('boolspam5', testfieldprocessor(), None)
410    query = qp.parse_query('spam:ignored')
411    expect(str(query), 'Query(spam)')
412
413    # FIXME: This doesn't currently work:
414    # expect_exception(Exception, 'already spam', qp.parse_query, 'spam:spam')
415
416    # Regression tests copied from PHP (probably always worked in python, but
417    # let's check...)
418    context("running regression tests for issues which were found in PHP")
419
420    # PHP overload resolution involving boolean types failed.
421    enq.set_sort_by_value(1, True)
422
423    # Regression test - fixed in 0.9.10.1.
424    oqparser = xapian.QueryParser()
425    oquery = oqparser.parse_query(b"I like tea")
426
427    # Regression test for bug fixed in 1.4.4:
428    # https://bugs.debian.org/849722
429    oqparser.add_boolean_prefix('tag', 'K', '')
430    # Make sure other cases also work:
431    oqparser.add_boolean_prefix('zag', 'XR', False) # Old-style
432    oqparser.add_boolean_prefix('rag', 'XR', None)
433    oqparser.add_boolean_prefix('nag', 'XB', '')
434    oqparser.add_boolean_prefix('bag', 'XB', 'blergh')
435    oqparser.add_boolean_prefix('jag', 'XB', b'blergh')
436
437    # Regression test for bug#192 - fixed in 1.0.3.
438    enq.set_cutoff(100)
439
440    # Test setting and getting metadata
441    expect(db.get_metadata(b'Foo'), b'')
442    db.set_metadata(b'Foo', b'Foo')
443    expect(db.get_metadata(b'Foo'), b'Foo')
444    expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, b'')
445    expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.set_metadata, b'', b'Foo')
446    expect_exception(xapian.InvalidArgumentError, "Empty metadata keys are invalid", db.get_metadata, b'')
447
448    # Test OP_SCALE_WEIGHT and corresponding constructor
449    expect_query(xapian.Query(xapian.Query.OP_SCALE_WEIGHT, xapian.Query(b'foo'), 5),
450                 "5 * foo")
451
452def test_userstem():
453    mystem = MyStemmer()
454    stem = xapian.Stem(mystem)
455    expect(stem(b'test'), b'tst')
456    stem2 = xapian.Stem(mystem)
457    expect(stem2(b'toastie'), b'tst')
458
459    indexer = xapian.TermGenerator()
460    indexer.set_stemmer(xapian.Stem(MyStemmer()))
461
462    doc = xapian.Document()
463    indexer.set_document(doc)
464    indexer.index_text(b'hello world')
465
466    s = '/'
467    for t in doc.termlist():
468        s += t.term.decode('utf-8')
469        s += '/'
470    expect(s, '/Zhll/Zwrld/hello/world/')
471
472    parser = xapian.QueryParser()
473    parser.set_stemmer(xapian.Stem(MyStemmer()))
474    parser.set_stemming_strategy(xapian.QueryParser.STEM_ALL)
475    expect_query(parser.parse_query(b'color television'), '(clr@1 OR tlvsn@2)')
476
477def test_internal_enums_not_wrapped():
478    leaf_constants = [c for c in dir(xapian.Query) if c.startswith('LEAF_')]
479    expect(leaf_constants, [])
480
481def test_internals_not_wrapped():
482    internals = []
483    for c in dir(xapian):
484        # Skip Python stuff like __file__ and __version__.
485        if c.startswith('__'): continue
486        if c.endswith('_'): internals.append(c)
487        # Skip non-classes
488        if not c[0].isupper(): continue
489        cls = eval('xapian.' + c)
490        if type(cls) != type(object): continue
491        for m in dir(cls):
492            if m.startswith('__'): continue
493            if m.endswith('_'): internals.append(c + '.' + m)
494
495    expect(internals, [])
496
497def test_zz9_check_leaks():
498    import gc
499    gc.collect()
500    if len(mystemmers):
501        raise TestFail("%d MyStemmer objects not deleted" % len(mystemmers))
502
503# Run all tests (ie, callables with names starting "test_").
504if not runtests(globals()):
505    sys.exit(1)
506
507# vim:syntax=python:set expandtab:
508