1""" 2 test_build_linkcheck 3 ~~~~~~~~~~~~~~~~~~~~ 4 5 Test the build process with manpage builder with the test root. 6 7 :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS. 8 :license: BSD, see LICENSE for details. 9""" 10 11import http.server 12import json 13import re 14import textwrap 15import time 16import wsgiref.handlers 17from datetime import datetime 18from queue import Queue 19from typing import Dict 20from unittest import mock 21 22import pytest 23import requests 24 25from sphinx.builders.linkcheck import HyperlinkAvailabilityCheckWorker, RateLimit 26from sphinx.util.console import strip_colors 27 28from .utils import CERT_FILE, http_server, https_server 29 30ts_re = re.compile(r".*\[(?P<ts>.*)\].*") 31 32 33@pytest.mark.sphinx('linkcheck', testroot='linkcheck', freshenv=True) 34def test_defaults(app): 35 app.build() 36 37 assert (app.outdir / 'output.txt').exists() 38 content = (app.outdir / 'output.txt').read_text() 39 40 print(content) 41 # looking for '#top' and '#does-not-exist' not found should fail 42 assert "Anchor 'top' not found" in content 43 assert "Anchor 'does-not-exist' not found" in content 44 # looking for non-existent URL should fail 45 assert " Max retries exceeded with url: /doesnotexist" in content 46 # images should fail 47 assert "Not Found for url: https://www.google.com/image.png" in content 48 assert "Not Found for url: https://www.google.com/image2.png" in content 49 # looking for local file should fail 50 assert "[broken] path/to/notfound" in content 51 assert len(content.splitlines()) == 6 52 53 54@pytest.mark.sphinx('linkcheck', testroot='linkcheck', freshenv=True) 55def test_defaults_json(app): 56 app.build() 57 58 assert (app.outdir / 'output.json').exists() 59 content = (app.outdir / 'output.json').read_text() 60 print(content) 61 62 rows = [json.loads(x) for x in content.splitlines()] 63 row = rows[0] 64 for attr in ["filename", "lineno", "status", "code", "uri", 65 "info"]: 66 assert attr in row 67 68 assert len(content.splitlines()) == 10 69 assert len(rows) == 10 70 # the output order of the rows is not stable 71 # due to possible variance in network latency 72 rowsby = {row["uri"]: row for row in rows} 73 assert rowsby["https://www.google.com#!bar"] == { 74 'filename': 'links.txt', 75 'lineno': 10, 76 'status': 'working', 77 'code': 0, 78 'uri': 'https://www.google.com#!bar', 79 'info': '' 80 } 81 # looking for non-existent URL should fail 82 dnerow = rowsby['https://localhost:7777/doesnotexist'] 83 assert dnerow['filename'] == 'links.txt' 84 assert dnerow['lineno'] == 13 85 assert dnerow['status'] == 'broken' 86 assert dnerow['code'] == 0 87 assert dnerow['uri'] == 'https://localhost:7777/doesnotexist' 88 assert rowsby['https://www.google.com/image2.png'] == { 89 'filename': 'links.txt', 90 'lineno': 18, 91 'status': 'broken', 92 'code': 0, 93 'uri': 'https://www.google.com/image2.png', 94 'info': '404 Client Error: Not Found for url: https://www.google.com/image2.png' 95 } 96 # looking for '#top' and '#does-not-exist' not found should fail 97 assert "Anchor 'top' not found" == \ 98 rowsby["https://www.google.com/#top"]["info"] 99 assert "Anchor 'does-not-exist' not found" == \ 100 rowsby["http://www.sphinx-doc.org/en/1.7/intro.html#does-not-exist"]["info"] 101 # images should fail 102 assert "Not Found for url: https://www.google.com/image.png" in \ 103 rowsby["https://www.google.com/image.png"]["info"] 104 105 106@pytest.mark.sphinx( 107 'linkcheck', testroot='linkcheck', freshenv=True, 108 confoverrides={'linkcheck_anchors_ignore': ["^!", "^top$"], 109 'linkcheck_ignore': [ 110 'https://localhost:7777/doesnotexist', 111 'http://www.sphinx-doc.org/en/1.7/intro.html#', 112 'https://www.google.com/image.png', 113 'https://www.google.com/image2.png', 114 'path/to/notfound'] 115 }) 116def test_anchors_ignored(app): 117 app.build() 118 119 assert (app.outdir / 'output.txt').exists() 120 content = (app.outdir / 'output.txt').read_text() 121 122 # expect all ok when excluding #top 123 assert not content 124 125 126@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-anchor', freshenv=True) 127def test_raises_for_invalid_status(app): 128 class InternalServerErrorHandler(http.server.BaseHTTPRequestHandler): 129 def do_GET(self): 130 self.send_error(500, "Internal Server Error") 131 132 with http_server(InternalServerErrorHandler): 133 app.build() 134 content = (app.outdir / 'output.txt').read_text() 135 assert content == ( 136 "index.rst:1: [broken] http://localhost:7777/#anchor: " 137 "500 Server Error: Internal Server Error " 138 "for url: http://localhost:7777/\n" 139 ) 140 141 142class HeadersDumperHandler(http.server.BaseHTTPRequestHandler): 143 def do_HEAD(self): 144 self.do_GET() 145 146 def do_GET(self): 147 self.send_response(200, "OK") 148 self.end_headers() 149 print(self.headers.as_string()) 150 151 152@pytest.mark.sphinx( 153 'linkcheck', testroot='linkcheck-localserver', freshenv=True, 154 confoverrides={'linkcheck_auth': [ 155 (r'^$', ('no', 'match')), 156 (r'^http://localhost:7777/$', ('user1', 'password')), 157 (r'.*local.*', ('user2', 'hunter2')), 158 ]}) 159def test_auth_header_uses_first_match(app, capsys): 160 with http_server(HeadersDumperHandler): 161 app.build() 162 stdout, stderr = capsys.readouterr() 163 auth = requests.auth._basic_auth_str('user1', 'password') 164 assert "Authorization: %s\n" % auth in stdout 165 166 167@pytest.mark.sphinx( 168 'linkcheck', testroot='linkcheck-localserver', freshenv=True, 169 confoverrides={'linkcheck_auth': [(r'^$', ('user1', 'password'))]}) 170def test_auth_header_no_match(app, capsys): 171 with http_server(HeadersDumperHandler): 172 app.build() 173 stdout, stderr = capsys.readouterr() 174 assert "Authorization" not in stdout 175 176 177@pytest.mark.sphinx( 178 'linkcheck', testroot='linkcheck-localserver', freshenv=True, 179 confoverrides={'linkcheck_request_headers': { 180 "http://localhost:7777/": { 181 "Accept": "text/html", 182 }, 183 "*": { 184 "X-Secret": "open sesami", 185 } 186 }}) 187def test_linkcheck_request_headers(app, capsys): 188 with http_server(HeadersDumperHandler): 189 app.build() 190 191 stdout, _stderr = capsys.readouterr() 192 assert "Accept: text/html\n" in stdout 193 assert "X-Secret" not in stdout 194 assert "sesami" not in stdout 195 196 197@pytest.mark.sphinx( 198 'linkcheck', testroot='linkcheck-localserver', freshenv=True, 199 confoverrides={'linkcheck_request_headers': { 200 "http://localhost:7777": {"Accept": "application/json"}, 201 "*": {"X-Secret": "open sesami"} 202 }}) 203def test_linkcheck_request_headers_no_slash(app, capsys): 204 with http_server(HeadersDumperHandler): 205 app.build() 206 207 stdout, _stderr = capsys.readouterr() 208 assert "Accept: application/json\n" in stdout 209 assert "X-Secret" not in stdout 210 assert "sesami" not in stdout 211 212 213@pytest.mark.sphinx( 214 'linkcheck', testroot='linkcheck-localserver', freshenv=True, 215 confoverrides={'linkcheck_request_headers': { 216 "http://do.not.match.org": {"Accept": "application/json"}, 217 "*": {"X-Secret": "open sesami"} 218 }}) 219def test_linkcheck_request_headers_default(app, capsys): 220 with http_server(HeadersDumperHandler): 221 app.build() 222 223 stdout, _stderr = capsys.readouterr() 224 assert "Accepts: application/json\n" not in stdout 225 assert "X-Secret: open sesami\n" in stdout 226 227 228def make_redirect_handler(*, support_head): 229 class RedirectOnceHandler(http.server.BaseHTTPRequestHandler): 230 def do_HEAD(self): 231 if support_head: 232 self.do_GET() 233 else: 234 self.send_response(405, "Method Not Allowed") 235 self.end_headers() 236 237 def do_GET(self): 238 if self.path == "/?redirected=1": 239 self.send_response(204, "No content") 240 else: 241 self.send_response(302, "Found") 242 self.send_header("Location", "http://localhost:7777/?redirected=1") 243 self.end_headers() 244 245 def log_date_time_string(self): 246 """Strip date and time from logged messages for assertions.""" 247 return "" 248 249 return RedirectOnceHandler 250 251 252@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True) 253def test_follows_redirects_on_HEAD(app, capsys): 254 with http_server(make_redirect_handler(support_head=True)): 255 app.build() 256 stdout, stderr = capsys.readouterr() 257 content = (app.outdir / 'output.txt').read_text() 258 assert content == ( 259 "index.rst:1: [redirected with Found] " 260 "http://localhost:7777/ to http://localhost:7777/?redirected=1\n" 261 ) 262 assert stderr == textwrap.dedent( 263 """\ 264 127.0.0.1 - - [] "HEAD / HTTP/1.1" 302 - 265 127.0.0.1 - - [] "HEAD /?redirected=1 HTTP/1.1" 204 - 266 """ 267 ) 268 269 270@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True) 271def test_follows_redirects_on_GET(app, capsys): 272 with http_server(make_redirect_handler(support_head=False)): 273 app.build() 274 stdout, stderr = capsys.readouterr() 275 content = (app.outdir / 'output.txt').read_text() 276 assert content == ( 277 "index.rst:1: [redirected with Found] " 278 "http://localhost:7777/ to http://localhost:7777/?redirected=1\n" 279 ) 280 assert stderr == textwrap.dedent( 281 """\ 282 127.0.0.1 - - [] "HEAD / HTTP/1.1" 405 - 283 127.0.0.1 - - [] "GET / HTTP/1.1" 302 - 284 127.0.0.1 - - [] "GET /?redirected=1 HTTP/1.1" 204 - 285 """ 286 ) 287 288 289class OKHandler(http.server.BaseHTTPRequestHandler): 290 def do_HEAD(self): 291 self.send_response(200, "OK") 292 self.end_headers() 293 294 def do_GET(self): 295 self.do_HEAD() 296 self.wfile.write(b"ok\n") 297 298 299@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True) 300def test_invalid_ssl(app): 301 # Link indicates SSL should be used (https) but the server does not handle it. 302 with http_server(OKHandler): 303 app.build() 304 305 with open(app.outdir / 'output.json') as fp: 306 content = json.load(fp) 307 assert content["status"] == "broken" 308 assert content["filename"] == "index.rst" 309 assert content["lineno"] == 1 310 assert content["uri"] == "https://localhost:7777/" 311 assert "SSLError" in content["info"] 312 313 314@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True) 315def test_connect_to_selfsigned_fails(app): 316 with https_server(OKHandler): 317 app.build() 318 319 with open(app.outdir / 'output.json') as fp: 320 content = json.load(fp) 321 assert content["status"] == "broken" 322 assert content["filename"] == "index.rst" 323 assert content["lineno"] == 1 324 assert content["uri"] == "https://localhost:7777/" 325 assert "[SSL: CERTIFICATE_VERIFY_FAILED]" in content["info"] 326 327 328@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True) 329def test_connect_to_selfsigned_with_tls_verify_false(app): 330 app.config.tls_verify = False 331 with https_server(OKHandler): 332 app.build() 333 334 with open(app.outdir / 'output.json') as fp: 335 content = json.load(fp) 336 assert content == { 337 "code": 0, 338 "status": "working", 339 "filename": "index.rst", 340 "lineno": 1, 341 "uri": "https://localhost:7777/", 342 "info": "", 343 } 344 345 346@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True) 347def test_connect_to_selfsigned_with_tls_cacerts(app): 348 app.config.tls_cacerts = CERT_FILE 349 with https_server(OKHandler): 350 app.build() 351 352 with open(app.outdir / 'output.json') as fp: 353 content = json.load(fp) 354 assert content == { 355 "code": 0, 356 "status": "working", 357 "filename": "index.rst", 358 "lineno": 1, 359 "uri": "https://localhost:7777/", 360 "info": "", 361 } 362 363 364@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True) 365def test_connect_to_selfsigned_with_requests_env_var(monkeypatch, app): 366 monkeypatch.setenv("REQUESTS_CA_BUNDLE", CERT_FILE) 367 with https_server(OKHandler): 368 app.build() 369 370 with open(app.outdir / 'output.json') as fp: 371 content = json.load(fp) 372 assert content == { 373 "code": 0, 374 "status": "working", 375 "filename": "index.rst", 376 "lineno": 1, 377 "uri": "https://localhost:7777/", 378 "info": "", 379 } 380 381 382@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True) 383def test_connect_to_selfsigned_nonexistent_cert_file(app): 384 app.config.tls_cacerts = "does/not/exist" 385 with https_server(OKHandler): 386 app.build() 387 388 with open(app.outdir / 'output.json') as fp: 389 content = json.load(fp) 390 assert content == { 391 "code": 0, 392 "status": "broken", 393 "filename": "index.rst", 394 "lineno": 1, 395 "uri": "https://localhost:7777/", 396 "info": "Could not find a suitable TLS CA certificate bundle, invalid path: does/not/exist", 397 } 398 399 400@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True) 401def test_TooManyRedirects_on_HEAD(app): 402 class InfiniteRedirectOnHeadHandler(http.server.BaseHTTPRequestHandler): 403 def do_HEAD(self): 404 self.send_response(302, "Found") 405 self.send_header("Location", "http://localhost:7777/") 406 self.end_headers() 407 408 def do_GET(self): 409 self.send_response(200, "OK") 410 self.end_headers() 411 self.wfile.write(b"ok\n") 412 413 with http_server(InfiniteRedirectOnHeadHandler): 414 app.build() 415 416 with open(app.outdir / 'output.json') as fp: 417 content = json.load(fp) 418 assert content == { 419 "code": 0, 420 "status": "working", 421 "filename": "index.rst", 422 "lineno": 1, 423 "uri": "http://localhost:7777/", 424 "info": "", 425 } 426 427 428def make_retry_after_handler(responses): 429 class RetryAfterHandler(http.server.BaseHTTPRequestHandler): 430 def do_HEAD(self): 431 status, retry_after = responses.pop(0) 432 self.send_response(status) 433 if retry_after: 434 self.send_header('Retry-After', retry_after) 435 self.end_headers() 436 437 def log_date_time_string(self): 438 """Strip date and time from logged messages for assertions.""" 439 return "" 440 441 return RetryAfterHandler 442 443 444@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True) 445def test_too_many_requests_retry_after_int_delay(app, capsys, status): 446 with http_server(make_retry_after_handler([(429, "0"), (200, None)])), \ 447 mock.patch("sphinx.builders.linkcheck.DEFAULT_DELAY", 0), \ 448 mock.patch("sphinx.builders.linkcheck.QUEUE_POLL_SECS", 0.01): 449 app.build() 450 content = (app.outdir / 'output.json').read_text() 451 assert json.loads(content) == { 452 "filename": "index.rst", 453 "lineno": 1, 454 "status": "working", 455 "code": 0, 456 "uri": "http://localhost:7777/", 457 "info": "", 458 } 459 rate_limit_log = "-rate limited- http://localhost:7777/ | sleeping...\n" 460 assert rate_limit_log in strip_colors(status.getvalue()) 461 _stdout, stderr = capsys.readouterr() 462 assert stderr == textwrap.dedent( 463 """\ 464 127.0.0.1 - - [] "HEAD / HTTP/1.1" 429 - 465 127.0.0.1 - - [] "HEAD / HTTP/1.1" 200 - 466 """ 467 ) 468 469 470@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True) 471def test_too_many_requests_retry_after_HTTP_date(app, capsys): 472 now = datetime.now().timetuple() 473 retry_after = wsgiref.handlers.format_date_time(time.mktime(now)) 474 with http_server(make_retry_after_handler([(429, retry_after), (200, None)])): 475 app.build() 476 content = (app.outdir / 'output.json').read_text() 477 assert json.loads(content) == { 478 "filename": "index.rst", 479 "lineno": 1, 480 "status": "working", 481 "code": 0, 482 "uri": "http://localhost:7777/", 483 "info": "", 484 } 485 _stdout, stderr = capsys.readouterr() 486 assert stderr == textwrap.dedent( 487 """\ 488 127.0.0.1 - - [] "HEAD / HTTP/1.1" 429 - 489 127.0.0.1 - - [] "HEAD / HTTP/1.1" 200 - 490 """ 491 ) 492 493 494@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True) 495def test_too_many_requests_retry_after_without_header(app, capsys): 496 with http_server(make_retry_after_handler([(429, None), (200, None)])),\ 497 mock.patch("sphinx.builders.linkcheck.DEFAULT_DELAY", 0): 498 app.build() 499 content = (app.outdir / 'output.json').read_text() 500 assert json.loads(content) == { 501 "filename": "index.rst", 502 "lineno": 1, 503 "status": "working", 504 "code": 0, 505 "uri": "http://localhost:7777/", 506 "info": "", 507 } 508 _stdout, stderr = capsys.readouterr() 509 assert stderr == textwrap.dedent( 510 """\ 511 127.0.0.1 - - [] "HEAD / HTTP/1.1" 429 - 512 127.0.0.1 - - [] "HEAD / HTTP/1.1" 200 - 513 """ 514 ) 515 516 517@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True) 518def test_too_many_requests_user_timeout(app, capsys): 519 app.config.linkcheck_rate_limit_timeout = 0.0 520 with http_server(make_retry_after_handler([(429, None)])): 521 app.build() 522 content = (app.outdir / 'output.json').read_text() 523 assert json.loads(content) == { 524 "filename": "index.rst", 525 "lineno": 1, 526 "status": "broken", 527 "code": 0, 528 "uri": "http://localhost:7777/", 529 "info": "429 Client Error: Too Many Requests for url: http://localhost:7777/", 530 } 531 532 533class FakeResponse: 534 headers = {} # type: Dict[str, str] 535 url = "http://localhost/" 536 537 538def test_limit_rate_default_sleep(app): 539 worker = HyperlinkAvailabilityCheckWorker(app.env, app.config, Queue(), Queue(), {}) 540 with mock.patch('time.time', return_value=0.0): 541 next_check = worker.limit_rate(FakeResponse()) 542 assert next_check == 60.0 543 544 545def test_limit_rate_user_max_delay(app): 546 app.config.linkcheck_rate_limit_timeout = 0.0 547 worker = HyperlinkAvailabilityCheckWorker(app.env, app.config, Queue(), Queue(), {}) 548 next_check = worker.limit_rate(FakeResponse()) 549 assert next_check is None 550 551 552def test_limit_rate_doubles_previous_wait_time(app): 553 rate_limits = {"localhost": RateLimit(60.0, 0.0)} 554 worker = HyperlinkAvailabilityCheckWorker(app.env, app.config, Queue(), Queue(), 555 rate_limits) 556 with mock.patch('time.time', return_value=0.0): 557 next_check = worker.limit_rate(FakeResponse()) 558 assert next_check == 120.0 559 560 561def test_limit_rate_clips_wait_time_to_max_time(app): 562 app.config.linkcheck_rate_limit_timeout = 90.0 563 rate_limits = {"localhost": RateLimit(60.0, 0.0)} 564 worker = HyperlinkAvailabilityCheckWorker(app.env, app.config, Queue(), Queue(), 565 rate_limits) 566 with mock.patch('time.time', return_value=0.0): 567 next_check = worker.limit_rate(FakeResponse()) 568 assert next_check == 90.0 569 570 571def test_limit_rate_bails_out_after_waiting_max_time(app): 572 app.config.linkcheck_rate_limit_timeout = 90.0 573 rate_limits = {"localhost": RateLimit(90.0, 0.0)} 574 worker = HyperlinkAvailabilityCheckWorker(app.env, app.config, Queue(), Queue(), 575 rate_limits) 576 next_check = worker.limit_rate(FakeResponse()) 577 assert next_check is None 578