1"""
2    test_build_linkcheck
3    ~~~~~~~~~~~~~~~~~~~~
4
5    Test the build process with manpage builder with the test root.
6
7    :copyright: Copyright 2007-2021 by the Sphinx team, see AUTHORS.
8    :license: BSD, see LICENSE for details.
9"""
10
11import http.server
12import json
13import re
14import textwrap
15import time
16import wsgiref.handlers
17from datetime import datetime
18from queue import Queue
19from typing import Dict
20from unittest import mock
21
22import pytest
23import requests
24
25from sphinx.builders.linkcheck import HyperlinkAvailabilityCheckWorker, RateLimit
26from sphinx.util.console import strip_colors
27
28from .utils import CERT_FILE, http_server, https_server
29
30ts_re = re.compile(r".*\[(?P<ts>.*)\].*")
31
32
33@pytest.mark.sphinx('linkcheck', testroot='linkcheck', freshenv=True)
34def test_defaults(app):
35    app.build()
36
37    assert (app.outdir / 'output.txt').exists()
38    content = (app.outdir / 'output.txt').read_text()
39
40    print(content)
41    # looking for '#top' and '#does-not-exist' not found should fail
42    assert "Anchor 'top' not found" in content
43    assert "Anchor 'does-not-exist' not found" in content
44    # looking for non-existent URL should fail
45    assert " Max retries exceeded with url: /doesnotexist" in content
46    # images should fail
47    assert "Not Found for url: https://www.google.com/image.png" in content
48    assert "Not Found for url: https://www.google.com/image2.png" in content
49    # looking for local file should fail
50    assert "[broken] path/to/notfound" in content
51    assert len(content.splitlines()) == 6
52
53
54@pytest.mark.sphinx('linkcheck', testroot='linkcheck', freshenv=True)
55def test_defaults_json(app):
56    app.build()
57
58    assert (app.outdir / 'output.json').exists()
59    content = (app.outdir / 'output.json').read_text()
60    print(content)
61
62    rows = [json.loads(x) for x in content.splitlines()]
63    row = rows[0]
64    for attr in ["filename", "lineno", "status", "code", "uri",
65                 "info"]:
66        assert attr in row
67
68    assert len(content.splitlines()) == 10
69    assert len(rows) == 10
70    # the output order of the rows is not stable
71    # due to possible variance in network latency
72    rowsby = {row["uri"]: row for row in rows}
73    assert rowsby["https://www.google.com#!bar"] == {
74        'filename': 'links.txt',
75        'lineno': 10,
76        'status': 'working',
77        'code': 0,
78        'uri': 'https://www.google.com#!bar',
79        'info': ''
80    }
81    # looking for non-existent URL should fail
82    dnerow = rowsby['https://localhost:7777/doesnotexist']
83    assert dnerow['filename'] == 'links.txt'
84    assert dnerow['lineno'] == 13
85    assert dnerow['status'] == 'broken'
86    assert dnerow['code'] == 0
87    assert dnerow['uri'] == 'https://localhost:7777/doesnotexist'
88    assert rowsby['https://www.google.com/image2.png'] == {
89        'filename': 'links.txt',
90        'lineno': 18,
91        'status': 'broken',
92        'code': 0,
93        'uri': 'https://www.google.com/image2.png',
94        'info': '404 Client Error: Not Found for url: https://www.google.com/image2.png'
95    }
96    # looking for '#top' and '#does-not-exist' not found should fail
97    assert "Anchor 'top' not found" == \
98        rowsby["https://www.google.com/#top"]["info"]
99    assert "Anchor 'does-not-exist' not found" == \
100        rowsby["http://www.sphinx-doc.org/en/1.7/intro.html#does-not-exist"]["info"]
101    # images should fail
102    assert "Not Found for url: https://www.google.com/image.png" in \
103        rowsby["https://www.google.com/image.png"]["info"]
104
105
106@pytest.mark.sphinx(
107    'linkcheck', testroot='linkcheck', freshenv=True,
108    confoverrides={'linkcheck_anchors_ignore': ["^!", "^top$"],
109                   'linkcheck_ignore': [
110                       'https://localhost:7777/doesnotexist',
111                       'http://www.sphinx-doc.org/en/1.7/intro.html#',
112                       'https://www.google.com/image.png',
113                       'https://www.google.com/image2.png',
114                       'path/to/notfound']
115                   })
116def test_anchors_ignored(app):
117    app.build()
118
119    assert (app.outdir / 'output.txt').exists()
120    content = (app.outdir / 'output.txt').read_text()
121
122    # expect all ok when excluding #top
123    assert not content
124
125
126@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-anchor', freshenv=True)
127def test_raises_for_invalid_status(app):
128    class InternalServerErrorHandler(http.server.BaseHTTPRequestHandler):
129        def do_GET(self):
130            self.send_error(500, "Internal Server Error")
131
132    with http_server(InternalServerErrorHandler):
133        app.build()
134    content = (app.outdir / 'output.txt').read_text()
135    assert content == (
136        "index.rst:1: [broken] http://localhost:7777/#anchor: "
137        "500 Server Error: Internal Server Error "
138        "for url: http://localhost:7777/\n"
139    )
140
141
142class HeadersDumperHandler(http.server.BaseHTTPRequestHandler):
143    def do_HEAD(self):
144        self.do_GET()
145
146    def do_GET(self):
147        self.send_response(200, "OK")
148        self.end_headers()
149        print(self.headers.as_string())
150
151
152@pytest.mark.sphinx(
153    'linkcheck', testroot='linkcheck-localserver', freshenv=True,
154    confoverrides={'linkcheck_auth': [
155        (r'^$', ('no', 'match')),
156        (r'^http://localhost:7777/$', ('user1', 'password')),
157        (r'.*local.*', ('user2', 'hunter2')),
158    ]})
159def test_auth_header_uses_first_match(app, capsys):
160    with http_server(HeadersDumperHandler):
161        app.build()
162    stdout, stderr = capsys.readouterr()
163    auth = requests.auth._basic_auth_str('user1', 'password')
164    assert "Authorization: %s\n" % auth in stdout
165
166
167@pytest.mark.sphinx(
168    'linkcheck', testroot='linkcheck-localserver', freshenv=True,
169    confoverrides={'linkcheck_auth': [(r'^$', ('user1', 'password'))]})
170def test_auth_header_no_match(app, capsys):
171    with http_server(HeadersDumperHandler):
172        app.build()
173    stdout, stderr = capsys.readouterr()
174    assert "Authorization" not in stdout
175
176
177@pytest.mark.sphinx(
178    'linkcheck', testroot='linkcheck-localserver', freshenv=True,
179    confoverrides={'linkcheck_request_headers': {
180        "http://localhost:7777/": {
181            "Accept": "text/html",
182        },
183        "*": {
184            "X-Secret": "open sesami",
185        }
186    }})
187def test_linkcheck_request_headers(app, capsys):
188    with http_server(HeadersDumperHandler):
189        app.build()
190
191    stdout, _stderr = capsys.readouterr()
192    assert "Accept: text/html\n" in stdout
193    assert "X-Secret" not in stdout
194    assert "sesami" not in stdout
195
196
197@pytest.mark.sphinx(
198    'linkcheck', testroot='linkcheck-localserver', freshenv=True,
199    confoverrides={'linkcheck_request_headers': {
200        "http://localhost:7777": {"Accept": "application/json"},
201        "*": {"X-Secret": "open sesami"}
202    }})
203def test_linkcheck_request_headers_no_slash(app, capsys):
204    with http_server(HeadersDumperHandler):
205        app.build()
206
207    stdout, _stderr = capsys.readouterr()
208    assert "Accept: application/json\n" in stdout
209    assert "X-Secret" not in stdout
210    assert "sesami" not in stdout
211
212
213@pytest.mark.sphinx(
214    'linkcheck', testroot='linkcheck-localserver', freshenv=True,
215    confoverrides={'linkcheck_request_headers': {
216        "http://do.not.match.org": {"Accept": "application/json"},
217        "*": {"X-Secret": "open sesami"}
218    }})
219def test_linkcheck_request_headers_default(app, capsys):
220    with http_server(HeadersDumperHandler):
221        app.build()
222
223    stdout, _stderr = capsys.readouterr()
224    assert "Accepts: application/json\n" not in stdout
225    assert "X-Secret: open sesami\n" in stdout
226
227
228def make_redirect_handler(*, support_head):
229    class RedirectOnceHandler(http.server.BaseHTTPRequestHandler):
230        def do_HEAD(self):
231            if support_head:
232                self.do_GET()
233            else:
234                self.send_response(405, "Method Not Allowed")
235                self.end_headers()
236
237        def do_GET(self):
238            if self.path == "/?redirected=1":
239                self.send_response(204, "No content")
240            else:
241                self.send_response(302, "Found")
242                self.send_header("Location", "http://localhost:7777/?redirected=1")
243            self.end_headers()
244
245        def log_date_time_string(self):
246            """Strip date and time from logged messages for assertions."""
247            return ""
248
249    return RedirectOnceHandler
250
251
252@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True)
253def test_follows_redirects_on_HEAD(app, capsys):
254    with http_server(make_redirect_handler(support_head=True)):
255        app.build()
256    stdout, stderr = capsys.readouterr()
257    content = (app.outdir / 'output.txt').read_text()
258    assert content == (
259        "index.rst:1: [redirected with Found] "
260        "http://localhost:7777/ to http://localhost:7777/?redirected=1\n"
261    )
262    assert stderr == textwrap.dedent(
263        """\
264        127.0.0.1 - - [] "HEAD / HTTP/1.1" 302 -
265        127.0.0.1 - - [] "HEAD /?redirected=1 HTTP/1.1" 204 -
266        """
267    )
268
269
270@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True)
271def test_follows_redirects_on_GET(app, capsys):
272    with http_server(make_redirect_handler(support_head=False)):
273        app.build()
274    stdout, stderr = capsys.readouterr()
275    content = (app.outdir / 'output.txt').read_text()
276    assert content == (
277        "index.rst:1: [redirected with Found] "
278        "http://localhost:7777/ to http://localhost:7777/?redirected=1\n"
279    )
280    assert stderr == textwrap.dedent(
281        """\
282        127.0.0.1 - - [] "HEAD / HTTP/1.1" 405 -
283        127.0.0.1 - - [] "GET / HTTP/1.1" 302 -
284        127.0.0.1 - - [] "GET /?redirected=1 HTTP/1.1" 204 -
285        """
286    )
287
288
289class OKHandler(http.server.BaseHTTPRequestHandler):
290    def do_HEAD(self):
291        self.send_response(200, "OK")
292        self.end_headers()
293
294    def do_GET(self):
295        self.do_HEAD()
296        self.wfile.write(b"ok\n")
297
298
299@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True)
300def test_invalid_ssl(app):
301    # Link indicates SSL should be used (https) but the server does not handle it.
302    with http_server(OKHandler):
303        app.build()
304
305    with open(app.outdir / 'output.json') as fp:
306        content = json.load(fp)
307    assert content["status"] == "broken"
308    assert content["filename"] == "index.rst"
309    assert content["lineno"] == 1
310    assert content["uri"] == "https://localhost:7777/"
311    assert "SSLError" in content["info"]
312
313
314@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True)
315def test_connect_to_selfsigned_fails(app):
316    with https_server(OKHandler):
317        app.build()
318
319    with open(app.outdir / 'output.json') as fp:
320        content = json.load(fp)
321    assert content["status"] == "broken"
322    assert content["filename"] == "index.rst"
323    assert content["lineno"] == 1
324    assert content["uri"] == "https://localhost:7777/"
325    assert "[SSL: CERTIFICATE_VERIFY_FAILED]" in content["info"]
326
327
328@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True)
329def test_connect_to_selfsigned_with_tls_verify_false(app):
330    app.config.tls_verify = False
331    with https_server(OKHandler):
332        app.build()
333
334    with open(app.outdir / 'output.json') as fp:
335        content = json.load(fp)
336    assert content == {
337        "code": 0,
338        "status": "working",
339        "filename": "index.rst",
340        "lineno": 1,
341        "uri": "https://localhost:7777/",
342        "info": "",
343    }
344
345
346@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True)
347def test_connect_to_selfsigned_with_tls_cacerts(app):
348    app.config.tls_cacerts = CERT_FILE
349    with https_server(OKHandler):
350        app.build()
351
352    with open(app.outdir / 'output.json') as fp:
353        content = json.load(fp)
354    assert content == {
355        "code": 0,
356        "status": "working",
357        "filename": "index.rst",
358        "lineno": 1,
359        "uri": "https://localhost:7777/",
360        "info": "",
361    }
362
363
364@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True)
365def test_connect_to_selfsigned_with_requests_env_var(monkeypatch, app):
366    monkeypatch.setenv("REQUESTS_CA_BUNDLE", CERT_FILE)
367    with https_server(OKHandler):
368        app.build()
369
370    with open(app.outdir / 'output.json') as fp:
371        content = json.load(fp)
372    assert content == {
373        "code": 0,
374        "status": "working",
375        "filename": "index.rst",
376        "lineno": 1,
377        "uri": "https://localhost:7777/",
378        "info": "",
379    }
380
381
382@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver-https', freshenv=True)
383def test_connect_to_selfsigned_nonexistent_cert_file(app):
384    app.config.tls_cacerts = "does/not/exist"
385    with https_server(OKHandler):
386        app.build()
387
388    with open(app.outdir / 'output.json') as fp:
389        content = json.load(fp)
390    assert content == {
391        "code": 0,
392        "status": "broken",
393        "filename": "index.rst",
394        "lineno": 1,
395        "uri": "https://localhost:7777/",
396        "info": "Could not find a suitable TLS CA certificate bundle, invalid path: does/not/exist",
397    }
398
399
400@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True)
401def test_TooManyRedirects_on_HEAD(app):
402    class InfiniteRedirectOnHeadHandler(http.server.BaseHTTPRequestHandler):
403        def do_HEAD(self):
404            self.send_response(302, "Found")
405            self.send_header("Location", "http://localhost:7777/")
406            self.end_headers()
407
408        def do_GET(self):
409            self.send_response(200, "OK")
410            self.end_headers()
411            self.wfile.write(b"ok\n")
412
413    with http_server(InfiniteRedirectOnHeadHandler):
414        app.build()
415
416    with open(app.outdir / 'output.json') as fp:
417        content = json.load(fp)
418    assert content == {
419        "code": 0,
420        "status": "working",
421        "filename": "index.rst",
422        "lineno": 1,
423        "uri": "http://localhost:7777/",
424        "info": "",
425    }
426
427
428def make_retry_after_handler(responses):
429    class RetryAfterHandler(http.server.BaseHTTPRequestHandler):
430        def do_HEAD(self):
431            status, retry_after = responses.pop(0)
432            self.send_response(status)
433            if retry_after:
434                self.send_header('Retry-After', retry_after)
435            self.end_headers()
436
437        def log_date_time_string(self):
438            """Strip date and time from logged messages for assertions."""
439            return ""
440
441    return RetryAfterHandler
442
443
444@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True)
445def test_too_many_requests_retry_after_int_delay(app, capsys, status):
446    with http_server(make_retry_after_handler([(429, "0"), (200, None)])), \
447         mock.patch("sphinx.builders.linkcheck.DEFAULT_DELAY", 0), \
448         mock.patch("sphinx.builders.linkcheck.QUEUE_POLL_SECS", 0.01):
449        app.build()
450    content = (app.outdir / 'output.json').read_text()
451    assert json.loads(content) == {
452        "filename": "index.rst",
453        "lineno": 1,
454        "status": "working",
455        "code": 0,
456        "uri": "http://localhost:7777/",
457        "info": "",
458    }
459    rate_limit_log = "-rate limited-   http://localhost:7777/ | sleeping...\n"
460    assert rate_limit_log in strip_colors(status.getvalue())
461    _stdout, stderr = capsys.readouterr()
462    assert stderr == textwrap.dedent(
463        """\
464        127.0.0.1 - - [] "HEAD / HTTP/1.1" 429 -
465        127.0.0.1 - - [] "HEAD / HTTP/1.1" 200 -
466        """
467    )
468
469
470@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True)
471def test_too_many_requests_retry_after_HTTP_date(app, capsys):
472    now = datetime.now().timetuple()
473    retry_after = wsgiref.handlers.format_date_time(time.mktime(now))
474    with http_server(make_retry_after_handler([(429, retry_after), (200, None)])):
475        app.build()
476    content = (app.outdir / 'output.json').read_text()
477    assert json.loads(content) == {
478        "filename": "index.rst",
479        "lineno": 1,
480        "status": "working",
481        "code": 0,
482        "uri": "http://localhost:7777/",
483        "info": "",
484    }
485    _stdout, stderr = capsys.readouterr()
486    assert stderr == textwrap.dedent(
487        """\
488        127.0.0.1 - - [] "HEAD / HTTP/1.1" 429 -
489        127.0.0.1 - - [] "HEAD / HTTP/1.1" 200 -
490        """
491    )
492
493
494@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True)
495def test_too_many_requests_retry_after_without_header(app, capsys):
496    with http_server(make_retry_after_handler([(429, None), (200, None)])),\
497         mock.patch("sphinx.builders.linkcheck.DEFAULT_DELAY", 0):
498        app.build()
499    content = (app.outdir / 'output.json').read_text()
500    assert json.loads(content) == {
501        "filename": "index.rst",
502        "lineno": 1,
503        "status": "working",
504        "code": 0,
505        "uri": "http://localhost:7777/",
506        "info": "",
507    }
508    _stdout, stderr = capsys.readouterr()
509    assert stderr == textwrap.dedent(
510        """\
511        127.0.0.1 - - [] "HEAD / HTTP/1.1" 429 -
512        127.0.0.1 - - [] "HEAD / HTTP/1.1" 200 -
513        """
514    )
515
516
517@pytest.mark.sphinx('linkcheck', testroot='linkcheck-localserver', freshenv=True)
518def test_too_many_requests_user_timeout(app, capsys):
519    app.config.linkcheck_rate_limit_timeout = 0.0
520    with http_server(make_retry_after_handler([(429, None)])):
521        app.build()
522    content = (app.outdir / 'output.json').read_text()
523    assert json.loads(content) == {
524        "filename": "index.rst",
525        "lineno": 1,
526        "status": "broken",
527        "code": 0,
528        "uri": "http://localhost:7777/",
529        "info": "429 Client Error: Too Many Requests for url: http://localhost:7777/",
530    }
531
532
533class FakeResponse:
534    headers = {}  # type: Dict[str, str]
535    url = "http://localhost/"
536
537
538def test_limit_rate_default_sleep(app):
539    worker = HyperlinkAvailabilityCheckWorker(app.env, app.config, Queue(), Queue(), {})
540    with mock.patch('time.time', return_value=0.0):
541        next_check = worker.limit_rate(FakeResponse())
542    assert next_check == 60.0
543
544
545def test_limit_rate_user_max_delay(app):
546    app.config.linkcheck_rate_limit_timeout = 0.0
547    worker = HyperlinkAvailabilityCheckWorker(app.env, app.config, Queue(), Queue(), {})
548    next_check = worker.limit_rate(FakeResponse())
549    assert next_check is None
550
551
552def test_limit_rate_doubles_previous_wait_time(app):
553    rate_limits = {"localhost": RateLimit(60.0, 0.0)}
554    worker = HyperlinkAvailabilityCheckWorker(app.env, app.config, Queue(), Queue(),
555                                              rate_limits)
556    with mock.patch('time.time', return_value=0.0):
557        next_check = worker.limit_rate(FakeResponse())
558    assert next_check == 120.0
559
560
561def test_limit_rate_clips_wait_time_to_max_time(app):
562    app.config.linkcheck_rate_limit_timeout = 90.0
563    rate_limits = {"localhost": RateLimit(60.0, 0.0)}
564    worker = HyperlinkAvailabilityCheckWorker(app.env, app.config, Queue(), Queue(),
565                                              rate_limits)
566    with mock.patch('time.time', return_value=0.0):
567        next_check = worker.limit_rate(FakeResponse())
568    assert next_check == 90.0
569
570
571def test_limit_rate_bails_out_after_waiting_max_time(app):
572    app.config.linkcheck_rate_limit_timeout = 90.0
573    rate_limits = {"localhost": RateLimit(90.0, 0.0)}
574    worker = HyperlinkAvailabilityCheckWorker(app.env, app.config, Queue(), Queue(),
575                                              rate_limits)
576    next_check = worker.limit_rate(FakeResponse())
577    assert next_check is None
578