1""" 2 test_search 3 ~~~~~~~~~~~ 4 5 Test the search index builder. 6 7 :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS. 8 :license: BSD, see LICENSE for details. 9""" 10 11from collections import namedtuple 12from io import BytesIO 13 14import pytest 15from docutils import frontend, utils 16from docutils.parsers import rst 17 18from sphinx.search import IndexBuilder 19from sphinx.util import jsdump 20 21DummyEnvironment = namedtuple('DummyEnvironment', ['version', 'domains']) 22 23 24class DummyDomain: 25 def __init__(self, data): 26 self.data = data 27 self.object_types = {} 28 29 def get_objects(self): 30 return self.data 31 32 33settings = parser = None 34 35 36def setup_module(): 37 global settings, parser 38 optparser = frontend.OptionParser(components=(rst.Parser,)) 39 settings = optparser.get_default_values() 40 parser = rst.Parser() 41 42 43def jsload(path): 44 searchindex = path.read_text() 45 assert searchindex.startswith('Search.setIndex(') 46 assert searchindex.endswith(')') 47 48 return jsdump.loads(searchindex[16:-1]) 49 50 51def is_registered_term(index, keyword): 52 return index['terms'].get(keyword, []) != [] 53 54 55FILE_CONTENTS = '''\ 56section_title 57============= 58 59.. test that comments are not indexed: boson 60 61test that non-comments are indexed: fermion 62''' 63 64 65@pytest.mark.sphinx(testroot='ext-viewcode') 66def test_objects_are_escaped(app, status, warning): 67 app.builder.build_all() 68 index = jsload(app.outdir / 'searchindex.js') 69 assert 'n::Array<T, d>' in index.get('objects').get('') # n::Array<T,d> is escaped 70 71 72@pytest.mark.sphinx(testroot='search') 73def test_meta_keys_are_handled_for_language_en(app, status, warning): 74 app.builder.build_all() 75 searchindex = jsload(app.outdir / 'searchindex.js') 76 assert not is_registered_term(searchindex, 'thisnoteith') 77 assert is_registered_term(searchindex, 'thisonetoo') 78 assert is_registered_term(searchindex, 'findthiskei') 79 assert is_registered_term(searchindex, 'thistoo') 80 assert not is_registered_term(searchindex, 'onlygerman') 81 assert is_registered_term(searchindex, 'notgerman') 82 assert not is_registered_term(searchindex, 'onlytoogerman') 83 84 85@pytest.mark.sphinx(testroot='search', confoverrides={'html_search_language': 'de'}) 86def test_meta_keys_are_handled_for_language_de(app, status, warning): 87 app.builder.build_all() 88 searchindex = jsload(app.outdir / 'searchindex.js') 89 assert not is_registered_term(searchindex, 'thisnoteith') 90 assert is_registered_term(searchindex, 'thisonetoo') 91 assert not is_registered_term(searchindex, 'findthiskei') 92 assert not is_registered_term(searchindex, 'thistoo') 93 assert is_registered_term(searchindex, 'onlygerman') 94 assert not is_registered_term(searchindex, 'notgerman') 95 assert is_registered_term(searchindex, 'onlytoogerman') 96 97 98@pytest.mark.sphinx(testroot='search') 99def test_stemmer_does_not_remove_short_words(app, status, warning): 100 app.builder.build_all() 101 searchindex = (app.outdir / 'searchindex.js').read_text() 102 assert 'zfs' in searchindex 103 104 105@pytest.mark.sphinx(testroot='search') 106def test_stemmer(app, status, warning): 107 searchindex = jsload(app.outdir / 'searchindex.js') 108 print(searchindex) 109 assert is_registered_term(searchindex, 'findthisstemmedkei') 110 assert is_registered_term(searchindex, 'intern') 111 112 113@pytest.mark.sphinx(testroot='search') 114def test_term_in_heading_and_section(app, status, warning): 115 searchindex = (app.outdir / 'searchindex.js').read_text() 116 # if search term is in the title of one doc and in the text of another 117 # both documents should be a hit in the search index as a title, 118 # respectively text hit 119 assert 'textinhead:2' in searchindex 120 assert 'textinhead:0' in searchindex 121 122 123@pytest.mark.sphinx(testroot='search') 124def test_term_in_raw_directive(app, status, warning): 125 searchindex = jsload(app.outdir / 'searchindex.js') 126 assert not is_registered_term(searchindex, 'raw') 127 assert is_registered_term(searchindex, 'rawword') 128 assert not is_registered_term(searchindex, 'latex_keyword') 129 130 131def test_IndexBuilder(): 132 domain = DummyDomain([('objname', 'objdispname', 'objtype', 'docname', '#anchor', 1), 133 ('objname2', 'objdispname2', 'objtype2', 'docname2', '', -1)]) 134 env = DummyEnvironment('1.0', {'dummy': domain}) 135 doc = utils.new_document(b'test data', settings) 136 doc['file'] = 'dummy' 137 parser.parse(FILE_CONTENTS, doc) 138 139 # feed 140 index = IndexBuilder(env, 'en', {}, None) 141 index.feed('docname', 'filename', 'title', doc) 142 index.feed('docname2', 'filename2', 'title2', doc) 143 assert index._titles == {'docname': 'title', 'docname2': 'title2'} 144 assert index._filenames == {'docname': 'filename', 'docname2': 'filename2'} 145 assert index._mapping == { 146 'fermion': {'docname', 'docname2'}, 147 'comment': {'docname', 'docname2'}, 148 'non': {'docname', 'docname2'}, 149 'index': {'docname', 'docname2'}, 150 'test': {'docname', 'docname2'} 151 } 152 assert index._title_mapping == {'section_titl': {'docname', 'docname2'}} 153 assert index._objtypes == {} 154 assert index._objnames == {} 155 156 # freeze 157 assert index.freeze() == { 158 'docnames': ('docname', 'docname2'), 159 'envversion': '1.0', 160 'filenames': ['filename', 'filename2'], 161 'objects': {'': {'objdispname': (0, 0, 1, '#anchor')}}, 162 'objnames': {0: ('dummy', 'objtype', 'objtype')}, 163 'objtypes': {0: 'dummy:objtype'}, 164 'terms': {'comment': [0, 1], 165 'fermion': [0, 1], 166 'index': [0, 1], 167 'non': [0, 1], 168 'test': [0, 1]}, 169 'titles': ('title', 'title2'), 170 'titleterms': {'section_titl': [0, 1]} 171 } 172 assert index._objtypes == {('dummy', 'objtype'): 0} 173 assert index._objnames == {0: ('dummy', 'objtype', 'objtype')} 174 175 # dump / load 176 stream = BytesIO() 177 index.dump(stream, 'pickle') 178 stream.seek(0) 179 180 index2 = IndexBuilder(env, 'en', {}, None) 181 index2.load(stream, 'pickle') 182 183 assert index2._titles == index._titles 184 assert index2._filenames == index._filenames 185 assert index2._mapping == index._mapping 186 assert index2._title_mapping == index._title_mapping 187 assert index2._objtypes == {} 188 assert index2._objnames == {} 189 190 # freeze after load 191 assert index2.freeze() == index.freeze() 192 assert index2._objtypes == index._objtypes 193 assert index2._objnames == index._objnames 194 195 # prune 196 index.prune(['docname2']) 197 assert index._titles == {'docname2': 'title2'} 198 assert index._filenames == {'docname2': 'filename2'} 199 assert index._mapping == { 200 'fermion': {'docname2'}, 201 'comment': {'docname2'}, 202 'non': {'docname2'}, 203 'index': {'docname2'}, 204 'test': {'docname2'} 205 } 206 assert index._title_mapping == {'section_titl': {'docname2'}} 207 assert index._objtypes == {('dummy', 'objtype'): 0} 208 assert index._objnames == {0: ('dummy', 'objtype', 'objtype')} 209 210 # freeze after prune 211 assert index.freeze() == { 212 'docnames': ('docname2',), 213 'envversion': '1.0', 214 'filenames': ['filename2'], 215 'objects': {}, 216 'objnames': {0: ('dummy', 'objtype', 'objtype')}, 217 'objtypes': {0: 'dummy:objtype'}, 218 'terms': {'comment': 0, 219 'fermion': 0, 220 'index': 0, 221 'non': 0, 222 'test': 0}, 223 'titles': ('title2',), 224 'titleterms': {'section_titl': 0} 225 } 226 assert index._objtypes == {('dummy', 'objtype'): 0} 227 assert index._objnames == {0: ('dummy', 'objtype', 'objtype')} 228 229 230def test_IndexBuilder_lookup(): 231 env = DummyEnvironment('1.0', {}) 232 233 # zh 234 index = IndexBuilder(env, 'zh', {}, None) 235 assert index.lang.lang == 'zh' 236 237 # zh_CN 238 index = IndexBuilder(env, 'zh_CN', {}, None) 239 assert index.lang.lang == 'zh' 240 241 242@pytest.mark.sphinx( 243 testroot='search', 244 confoverrides={'html_search_language': 'zh'}, 245 srcdir='search_zh' 246) 247def test_search_index_gen_zh(app, status, warning): 248 app.builder.build_all() 249 # jsdump fails if search language is 'zh'; hence we just get the text: 250 searchindex = (app.outdir / 'searchindex.js').read_text() 251 assert 'chinesetest ' not in searchindex 252 assert 'chinesetest' in searchindex 253 assert 'chinesetesttwo' in searchindex 254 assert 'cas' in searchindex 255 256 257@pytest.mark.sphinx(testroot='search') 258def test_nosearch(app): 259 app.build() 260 index = jsload(app.outdir / 'searchindex.js') 261 assert index['docnames'] == ['index', 'nosearch', 'tocitem'] 262 assert 'latex' not in index['terms'] 263 assert 'zfs' in index['terms'] 264 assert index['terms']['zfs'] == 0 # zfs on nosearch.rst is not registered to index 265