1"""
2    test_search
3    ~~~~~~~~~~~
4
5    Test the search index builder.
6
7    :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS.
8    :license: BSD, see LICENSE for details.
9"""
10
11from collections import namedtuple
12from io import BytesIO
13
14import pytest
15from docutils import frontend, utils
16from docutils.parsers import rst
17
18from sphinx.search import IndexBuilder
19from sphinx.util import jsdump
20
21DummyEnvironment = namedtuple('DummyEnvironment', ['version', 'domains'])
22
23
24class DummyDomain:
25    def __init__(self, data):
26        self.data = data
27        self.object_types = {}
28
29    def get_objects(self):
30        return self.data
31
32
33settings = parser = None
34
35
36def setup_module():
37    global settings, parser
38    optparser = frontend.OptionParser(components=(rst.Parser,))
39    settings = optparser.get_default_values()
40    parser = rst.Parser()
41
42
43def jsload(path):
44    searchindex = path.read_text()
45    assert searchindex.startswith('Search.setIndex(')
46    assert searchindex.endswith(')')
47
48    return jsdump.loads(searchindex[16:-1])
49
50
51def is_registered_term(index, keyword):
52    return index['terms'].get(keyword, []) != []
53
54
55FILE_CONTENTS = '''\
56section_title
57=============
58
59.. test that comments are not indexed: boson
60
61test that non-comments are indexed: fermion
62'''
63
64
65@pytest.mark.sphinx(testroot='ext-viewcode')
66def test_objects_are_escaped(app, status, warning):
67    app.builder.build_all()
68    index = jsload(app.outdir / 'searchindex.js')
69    assert 'n::Array&lt;T, d&gt;' in index.get('objects').get('')  # n::Array<T,d> is escaped
70
71
72@pytest.mark.sphinx(testroot='search')
73def test_meta_keys_are_handled_for_language_en(app, status, warning):
74    app.builder.build_all()
75    searchindex = jsload(app.outdir / 'searchindex.js')
76    assert not is_registered_term(searchindex, 'thisnoteith')
77    assert is_registered_term(searchindex, 'thisonetoo')
78    assert is_registered_term(searchindex, 'findthiskei')
79    assert is_registered_term(searchindex, 'thistoo')
80    assert not is_registered_term(searchindex, 'onlygerman')
81    assert is_registered_term(searchindex, 'notgerman')
82    assert not is_registered_term(searchindex, 'onlytoogerman')
83
84
85@pytest.mark.sphinx(testroot='search', confoverrides={'html_search_language': 'de'})
86def test_meta_keys_are_handled_for_language_de(app, status, warning):
87    app.builder.build_all()
88    searchindex = jsload(app.outdir / 'searchindex.js')
89    assert not is_registered_term(searchindex, 'thisnoteith')
90    assert is_registered_term(searchindex, 'thisonetoo')
91    assert not is_registered_term(searchindex, 'findthiskei')
92    assert not is_registered_term(searchindex, 'thistoo')
93    assert is_registered_term(searchindex, 'onlygerman')
94    assert not is_registered_term(searchindex, 'notgerman')
95    assert is_registered_term(searchindex, 'onlytoogerman')
96
97
98@pytest.mark.sphinx(testroot='search')
99def test_stemmer_does_not_remove_short_words(app, status, warning):
100    app.builder.build_all()
101    searchindex = (app.outdir / 'searchindex.js').read_text()
102    assert 'zfs' in searchindex
103
104
105@pytest.mark.sphinx(testroot='search')
106def test_stemmer(app, status, warning):
107    searchindex = jsload(app.outdir / 'searchindex.js')
108    print(searchindex)
109    assert is_registered_term(searchindex, 'findthisstemmedkei')
110    assert is_registered_term(searchindex, 'intern')
111
112
113@pytest.mark.sphinx(testroot='search')
114def test_term_in_heading_and_section(app, status, warning):
115    searchindex = (app.outdir / 'searchindex.js').read_text()
116    # if search term is in the title of one doc and in the text of another
117    # both documents should be a hit in the search index as a title,
118    # respectively text hit
119    assert 'textinhead:2' in searchindex
120    assert 'textinhead:0' in searchindex
121
122
123@pytest.mark.sphinx(testroot='search')
124def test_term_in_raw_directive(app, status, warning):
125    searchindex = jsload(app.outdir / 'searchindex.js')
126    assert not is_registered_term(searchindex, 'raw')
127    assert is_registered_term(searchindex, 'rawword')
128    assert not is_registered_term(searchindex, 'latex_keyword')
129
130
131def test_IndexBuilder():
132    domain = DummyDomain([('objname', 'objdispname', 'objtype', 'docname', '#anchor', 1),
133                          ('objname2', 'objdispname2', 'objtype2', 'docname2', '', -1)])
134    env = DummyEnvironment('1.0', {'dummy': domain})
135    doc = utils.new_document(b'test data', settings)
136    doc['file'] = 'dummy'
137    parser.parse(FILE_CONTENTS, doc)
138
139    # feed
140    index = IndexBuilder(env, 'en', {}, None)
141    index.feed('docname', 'filename', 'title', doc)
142    index.feed('docname2', 'filename2', 'title2', doc)
143    assert index._titles == {'docname': 'title', 'docname2': 'title2'}
144    assert index._filenames == {'docname': 'filename', 'docname2': 'filename2'}
145    assert index._mapping == {
146        'fermion': {'docname', 'docname2'},
147        'comment': {'docname', 'docname2'},
148        'non': {'docname', 'docname2'},
149        'index': {'docname', 'docname2'},
150        'test': {'docname', 'docname2'}
151    }
152    assert index._title_mapping == {'section_titl': {'docname', 'docname2'}}
153    assert index._objtypes == {}
154    assert index._objnames == {}
155
156    # freeze
157    assert index.freeze() == {
158        'docnames': ('docname', 'docname2'),
159        'envversion': '1.0',
160        'filenames': ['filename', 'filename2'],
161        'objects': {'': {'objdispname': (0, 0, 1, '#anchor')}},
162        'objnames': {0: ('dummy', 'objtype', 'objtype')},
163        'objtypes': {0: 'dummy:objtype'},
164        'terms': {'comment': [0, 1],
165                  'fermion': [0, 1],
166                  'index': [0, 1],
167                  'non': [0, 1],
168                  'test': [0, 1]},
169        'titles': ('title', 'title2'),
170        'titleterms': {'section_titl': [0, 1]}
171    }
172    assert index._objtypes == {('dummy', 'objtype'): 0}
173    assert index._objnames == {0: ('dummy', 'objtype', 'objtype')}
174
175    # dump / load
176    stream = BytesIO()
177    index.dump(stream, 'pickle')
178    stream.seek(0)
179
180    index2 = IndexBuilder(env, 'en', {}, None)
181    index2.load(stream, 'pickle')
182
183    assert index2._titles == index._titles
184    assert index2._filenames == index._filenames
185    assert index2._mapping == index._mapping
186    assert index2._title_mapping == index._title_mapping
187    assert index2._objtypes == {}
188    assert index2._objnames == {}
189
190    # freeze after load
191    assert index2.freeze() == index.freeze()
192    assert index2._objtypes == index._objtypes
193    assert index2._objnames == index._objnames
194
195    # prune
196    index.prune(['docname2'])
197    assert index._titles == {'docname2': 'title2'}
198    assert index._filenames == {'docname2': 'filename2'}
199    assert index._mapping == {
200        'fermion': {'docname2'},
201        'comment': {'docname2'},
202        'non': {'docname2'},
203        'index': {'docname2'},
204        'test': {'docname2'}
205    }
206    assert index._title_mapping == {'section_titl': {'docname2'}}
207    assert index._objtypes == {('dummy', 'objtype'): 0}
208    assert index._objnames == {0: ('dummy', 'objtype', 'objtype')}
209
210    # freeze after prune
211    assert index.freeze() == {
212        'docnames': ('docname2',),
213        'envversion': '1.0',
214        'filenames': ['filename2'],
215        'objects': {},
216        'objnames': {0: ('dummy', 'objtype', 'objtype')},
217        'objtypes': {0: 'dummy:objtype'},
218        'terms': {'comment': 0,
219                  'fermion': 0,
220                  'index': 0,
221                  'non': 0,
222                  'test': 0},
223        'titles': ('title2',),
224        'titleterms': {'section_titl': 0}
225    }
226    assert index._objtypes == {('dummy', 'objtype'): 0}
227    assert index._objnames == {0: ('dummy', 'objtype', 'objtype')}
228
229
230def test_IndexBuilder_lookup():
231    env = DummyEnvironment('1.0', {})
232
233    # zh
234    index = IndexBuilder(env, 'zh', {}, None)
235    assert index.lang.lang == 'zh'
236
237    # zh_CN
238    index = IndexBuilder(env, 'zh_CN', {}, None)
239    assert index.lang.lang == 'zh'
240
241
242@pytest.mark.sphinx(
243    testroot='search',
244    confoverrides={'html_search_language': 'zh'},
245    srcdir='search_zh'
246)
247def test_search_index_gen_zh(app, status, warning):
248    app.builder.build_all()
249    # jsdump fails if search language is 'zh'; hence we just get the text:
250    searchindex = (app.outdir / 'searchindex.js').read_text()
251    assert 'chinesetest ' not in searchindex
252    assert 'chinesetest' in searchindex
253    assert 'chinesetesttwo' in searchindex
254    assert 'cas' in searchindex
255
256
257@pytest.mark.sphinx(testroot='search')
258def test_nosearch(app):
259    app.build()
260    index = jsload(app.outdir / 'searchindex.js')
261    assert index['docnames'] == ['index', 'nosearch', 'tocitem']
262    assert 'latex' not in index['terms']
263    assert 'zfs' in index['terms']
264    assert index['terms']['zfs'] == 0  # zfs on nosearch.rst is not registered to index
265