1# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et:
2
3# Copyright 2016-2021 Florian Bruhin (The Compiler) <mail@qutebrowser.org>
4#
5# This file is part of qutebrowser.
6#
7# qutebrowser is free software: you can redistribute it and/or modify
8# it under the terms of the GNU General Public License as published by
9# the Free Software Foundation, either version 3 of the License, or
10# (at your option) any later version.
11#
12# qutebrowser is distributed in the hope that it will be useful,
13# but WITHOUT ANY WARRANTY; without even the implied warranty of
14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15# GNU General Public License for more details.
16#
17# You should have received a copy of the GNU General Public License
18# along with qutebrowser.  If not, see <https://www.gnu.org/licenses/>.
19
20"""Backend-independent qute://* code.
21
22Module attributes:
23    pyeval_output: The output of the last :pyeval command.
24    _HANDLERS: The handlers registered via decorators.
25"""
26
27import html
28import json
29import os
30import time
31import textwrap
32import urllib
33import collections
34import secrets
35from typing import TypeVar, Callable, Dict, List, Optional, Union, Sequence, Tuple
36
37from PyQt5.QtCore import QUrlQuery, QUrl
38
39import qutebrowser
40from qutebrowser.browser import pdfjs, downloads, history
41from qutebrowser.config import config, configdata, configexc
42from qutebrowser.utils import (version, utils, jinja, log, message, docutils,
43                               resources, objreg, standarddir)
44from qutebrowser.misc import guiprocess, quitter
45from qutebrowser.qt import sip
46
47
48pyeval_output = ":pyeval was never called"
49csrf_token = None
50
51
52_HANDLERS = {}
53
54
55class Error(Exception):
56
57    """Exception for generic errors on a qute:// page."""
58
59
60class NotFoundError(Error):
61
62    """Raised when the given URL was not found."""
63
64
65class SchemeOSError(Error):
66
67    """Raised when there was an OSError inside a handler."""
68
69
70class UrlInvalidError(Error):
71
72    """Raised when an invalid URL was opened."""
73
74
75class RequestDeniedError(Error):
76
77    """Raised when the request is forbidden."""
78
79
80class Redirect(Exception):
81
82    """Exception to signal a redirect should happen.
83
84    Attributes:
85        url: The URL to redirect to, as a QUrl.
86    """
87
88    def __init__(self, url: QUrl):
89        super().__init__(url.toDisplayString())
90        self.url = url
91
92
93# Return value: (mimetype, data) (encoded as utf-8 if a str is returned)
94_HandlerRet = Tuple[str, Union[str, bytes]]
95_HandlerCallable = Callable[[QUrl], _HandlerRet]
96_Handler = TypeVar('_Handler', bound=_HandlerCallable)
97
98
99class add_handler:  # noqa: N801,N806 pylint: disable=invalid-name
100
101    """Decorator to register a qute://* URL handler.
102
103    Attributes:
104        _name: The 'foo' part of qute://foo
105    """
106
107    def __init__(self, name: str) -> None:
108        self._name = name
109        self._function: Optional[_HandlerCallable] = None
110
111    def __call__(self, function: _Handler) -> _Handler:
112        self._function = function
113        _HANDLERS[self._name] = self.wrapper
114        return function
115
116    def wrapper(self, url: QUrl) -> _HandlerRet:
117        """Call the underlying function."""
118        assert self._function is not None
119        return self._function(url)
120
121
122def data_for_url(url: QUrl) -> Tuple[str, bytes]:
123    """Get the data to show for the given URL.
124
125    Args:
126        url: The QUrl to show.
127
128    Return:
129        A (mimetype, data) tuple.
130    """
131    norm_url = url.adjusted(
132        QUrl.NormalizePathSegments |  # type: ignore[arg-type]
133        QUrl.StripTrailingSlash)
134    if norm_url != url:
135        raise Redirect(norm_url)
136
137    path = url.path()
138    host = url.host()
139    query = url.query()
140    # A url like "qute:foo" is split as "scheme:path", not "scheme:host".
141    log.misc.debug("url: {}, path: {}, host {}".format(
142        url.toDisplayString(), path, host))
143    if not path or not host:
144        new_url = QUrl()
145        new_url.setScheme('qute')
146        # When path is absent, e.g. qute://help (with no trailing slash)
147        if host:
148            new_url.setHost(host)
149        # When host is absent, e.g. qute:help
150        else:
151            new_url.setHost(path)
152
153        new_url.setPath('/')
154        if query:
155            new_url.setQuery(query)
156        if new_url.host():  # path was a valid host
157            raise Redirect(new_url)
158
159    try:
160        handler = _HANDLERS[host]
161    except KeyError:
162        raise NotFoundError("No handler found for {}".format(
163            url.toDisplayString()))
164
165    try:
166        mimetype, data = handler(url)
167    except OSError as e:
168        raise SchemeOSError(e)
169
170    assert mimetype is not None, url
171    if mimetype == 'text/html' and isinstance(data, str):
172        # We let handlers return HTML as text
173        data = data.encode('utf-8', errors='xmlcharrefreplace')
174    assert isinstance(data, bytes)
175
176    return mimetype, data
177
178
179@add_handler('bookmarks')
180def qute_bookmarks(_url: QUrl) -> _HandlerRet:
181    """Handler for qute://bookmarks. Display all quickmarks / bookmarks."""
182    bookmarks = sorted(objreg.get('bookmark-manager').marks.items(),
183                       key=lambda x: x[1])  # Sort by title
184    quickmarks = sorted(objreg.get('quickmark-manager').marks.items(),
185                        key=lambda x: x[0])  # Sort by name
186
187    src = jinja.render('bookmarks.html',
188                       title='Bookmarks',
189                       bookmarks=bookmarks,
190                       quickmarks=quickmarks)
191    return 'text/html', src
192
193
194@add_handler('tabs')
195def qute_tabs(_url: QUrl) -> _HandlerRet:
196    """Handler for qute://tabs. Display information about all open tabs."""
197    tabs: Dict[str, List[Tuple[str, str]]] = collections.defaultdict(list)
198    for win_id, window in objreg.window_registry.items():
199        if sip.isdeleted(window):
200            continue
201        tabbed_browser = objreg.get('tabbed-browser',
202                                    scope='window',
203                                    window=win_id)
204        for tab in tabbed_browser.widgets():
205            if tab.url() not in [QUrl("qute://tabs/"), QUrl("qute://tabs")]:
206                urlstr = tab.url().toDisplayString()
207                tabs[str(win_id)].append((tab.title(), urlstr))
208
209    src = jinja.render('tabs.html',
210                       title='Tabs',
211                       tab_list_by_window=tabs)
212    return 'text/html', src
213
214
215def history_data(
216        start_time: float,
217        offset: int = None
218) -> Sequence[Dict[str, Union[str, int]]]:
219    """Return history data.
220
221    Arguments:
222        start_time: select history starting from this timestamp.
223        offset: number of items to skip
224    """
225    # history atimes are stored as ints, ensure start_time is not a float
226    start_time = int(start_time)
227    if offset is not None:
228        entries = history.web_history.entries_before(start_time, limit=1000,
229                                                     offset=offset)
230    else:
231        # end is 24hrs earlier than start
232        end_time = start_time - 24*60*60
233        entries = history.web_history.entries_between(end_time, start_time)
234
235    return [{"url": e.url,
236             "title": html.escape(e.title) or html.escape(e.url),
237             "time": e.atime} for e in entries]
238
239
240@add_handler('history')
241def qute_history(url: QUrl) -> _HandlerRet:
242    """Handler for qute://history. Display and serve history."""
243    if url.path() == '/data':
244        q_offset = QUrlQuery(url).queryItemValue("offset")
245        try:
246            offset = int(q_offset) if q_offset else None
247        except ValueError:
248            raise UrlInvalidError("Query parameter offset is invalid")
249
250        # Use start_time in query or current time.
251        q_start_time = QUrlQuery(url).queryItemValue("start_time")
252        try:
253            start_time = float(q_start_time) if q_start_time else time.time()
254        except ValueError:
255            raise UrlInvalidError("Query parameter start_time is invalid")
256
257        return 'text/html', json.dumps(history_data(start_time, offset))
258    else:
259        return 'text/html', jinja.render(
260            'history.html',
261            title='History',
262            gap_interval=config.val.history_gap_interval
263        )
264
265
266@add_handler('javascript')
267def qute_javascript(url: QUrl) -> _HandlerRet:
268    """Handler for qute://javascript.
269
270    Return content of file given as query parameter.
271    """
272    path = url.path()
273    if path:
274        path = "javascript" + os.sep.join(path.split('/'))
275        return 'text/html', resources.read_file(path)
276    else:
277        raise UrlInvalidError("No file specified")
278
279
280@add_handler('pyeval')
281def qute_pyeval(_url: QUrl) -> _HandlerRet:
282    """Handler for qute://pyeval."""
283    src = jinja.render('pre.html', title='pyeval', content=pyeval_output)
284    return 'text/html', src
285
286
287@add_handler('process')
288def qute_process(url: QUrl) -> _HandlerRet:
289    """Handler for qute://process."""
290    path = url.path()[1:]
291    try:
292        pid = int(path)
293    except ValueError:
294        raise UrlInvalidError(f"Invalid PID {path}")
295
296    try:
297        proc = guiprocess.all_processes[pid]
298    except KeyError:
299        raise NotFoundError(f"No process {pid}")
300
301    if proc is None:
302        raise NotFoundError(f"Data for process {pid} got cleaned up.")
303
304    src = jinja.render('process.html', title=f'Process {pid}', proc=proc)
305    return 'text/html', src
306
307
308@add_handler('version')
309@add_handler('verizon')
310def qute_version(_url: QUrl) -> _HandlerRet:
311    """Handler for qute://version."""
312    src = jinja.render('version.html', title='Version info',
313                       version=version.version_info(),
314                       copyright=qutebrowser.__copyright__)
315    return 'text/html', src
316
317
318@add_handler('log')
319def qute_log(url: QUrl) -> _HandlerRet:
320    """Handler for qute://log.
321
322    There are three query parameters:
323
324    - level: The minimum log level to print.
325    For example, qute://log?level=warning prints warnings and errors.
326    Level can be one of: vdebug, debug, info, warning, error, critical.
327
328    - plain: If given (and not 'false'), plaintext is shown.
329
330    - logfilter: A filter string like the --logfilter commandline argument
331      accepts.
332    """
333    query = QUrlQuery(url)
334    plain = (query.hasQueryItem('plain') and
335             query.queryItemValue('plain').lower() != 'false')
336
337    if log.ram_handler is None:
338        content = "Log output was disabled." if plain else None
339    else:
340        level = query.queryItemValue('level')
341        if not level:
342            level = 'vdebug'
343
344        filter_str = query.queryItemValue('logfilter')
345
346        try:
347            logfilter = (log.LogFilter.parse(filter_str, only_debug=False)
348                         if filter_str else None)
349        except log.InvalidLogFilterError as e:
350            raise UrlInvalidError(e)
351
352        content = log.ram_handler.dump_log(html=not plain,
353                                           level=level, logfilter=logfilter)
354
355    template = 'pre.html' if plain else 'log.html'
356    src = jinja.render(template, title='log', content=content)
357    return 'text/html', src
358
359
360@add_handler('gpl')
361def qute_gpl(_url: QUrl) -> _HandlerRet:
362    """Handler for qute://gpl. Return HTML content as string."""
363    return 'text/html', resources.read_file('html/license.html')
364
365
366def _asciidoc_fallback_path(html_path: str) -> Optional[str]:
367    """Fall back to plaintext asciidoc if the HTML is unavailable."""
368    path = html_path.replace('.html', '.asciidoc')
369    try:
370        return resources.read_file(path)
371    except OSError:
372        return None
373
374
375@add_handler('help')
376def qute_help(url: QUrl) -> _HandlerRet:
377    """Handler for qute://help."""
378    urlpath = url.path()
379    if not urlpath or urlpath == '/':
380        urlpath = 'index.html'
381    else:
382        urlpath = urlpath.lstrip('/')
383    if not docutils.docs_up_to_date(urlpath):
384        message.error("Your documentation is outdated! Please re-run "
385                      "scripts/asciidoc2html.py.")
386
387    path = 'html/doc/{}'.format(urlpath)
388    if not urlpath.endswith('.html'):
389        try:
390            bdata = resources.read_file_binary(path)
391        except OSError as e:
392            raise SchemeOSError(e)
393        mimetype = utils.guess_mimetype(urlpath)
394        return mimetype, bdata
395
396    try:
397        data = resources.read_file(path)
398    except OSError:
399        asciidoc = _asciidoc_fallback_path(path)
400
401        if asciidoc is None:
402            raise
403
404        preamble = textwrap.dedent("""
405            There was an error loading the documentation!
406
407            This most likely means the documentation was not generated
408            properly. If you are running qutebrowser from the git repository,
409            please (re)run scripts/asciidoc2html.py and reload this page.
410
411            If you're running a released version this is a bug, please use
412            :report to report it.
413
414            Falling back to the plaintext version.
415
416            ---------------------------------------------------------------
417
418
419        """)
420        return 'text/plain', (preamble + asciidoc).encode('utf-8')
421    else:
422        return 'text/html', data
423
424
425def _qute_settings_set(url: QUrl) -> _HandlerRet:
426    """Handler for qute://settings/set."""
427    query = QUrlQuery(url)
428    option = query.queryItemValue('option', QUrl.FullyDecoded)
429    value = query.queryItemValue('value', QUrl.FullyDecoded)
430
431    # https://github.com/qutebrowser/qutebrowser/issues/727
432    if option == 'content.javascript.enabled' and value == 'false':
433        msg = ("Refusing to disable javascript via qute://settings "
434               "as it needs javascript support.")
435        message.error(msg)
436        return 'text/html', b'error: ' + msg.encode('utf-8')
437
438    try:
439        config.instance.set_str(option, value, save_yaml=True)
440        return 'text/html', b'ok'
441    except configexc.Error as e:
442        message.error(str(e))
443        return 'text/html', b'error: ' + str(e).encode('utf-8')
444
445
446@add_handler('settings')
447def qute_settings(url: QUrl) -> _HandlerRet:
448    """Handler for qute://settings. View/change qute configuration."""
449    global csrf_token
450
451    if url.path() == '/set':
452        if url.password() != csrf_token:
453            message.error("Invalid CSRF token for qute://settings!")
454            raise RequestDeniedError("Invalid CSRF token!")
455        if quitter.instance.is_shutting_down:
456            log.config.debug("Ignoring /set request during shutdown")
457            return 'text/html', b'error: ignored'
458        return _qute_settings_set(url)
459
460    # Requests to qute://settings/set should only be allowed from
461    # qute://settings. As an additional security precaution, we generate a CSRF
462    # token to use here.
463    csrf_token = secrets.token_urlsafe()
464
465    src = jinja.render('settings.html', title='settings',
466                       configdata=configdata,
467                       confget=config.instance.get_str,
468                       csrf_token=csrf_token)
469    return 'text/html', src
470
471
472@add_handler('bindings')
473def qute_bindings(_url: QUrl) -> _HandlerRet:
474    """Handler for qute://bindings. View keybindings."""
475    bindings = {}
476    defaults = config.val.bindings.default
477
478    config_modes = set(defaults.keys()).union(config.val.bindings.commands)
479    config_modes.remove('normal')
480
481    modes = ['normal'] + sorted(config_modes)
482    for mode in modes:
483        bindings[mode] = config.key_instance.get_bindings_for(mode)
484
485    src = jinja.render('bindings.html', title='Bindings',
486                       bindings=bindings)
487    return 'text/html', src
488
489
490@add_handler('back')
491def qute_back(url: QUrl) -> _HandlerRet:
492    """Handler for qute://back.
493
494    Simple page to free ram / lazy load a site, goes back on focusing the tab.
495    """
496    src = jinja.render(
497        'back.html',
498        title='Suspended: ' + urllib.parse.unquote(url.fragment()))
499    return 'text/html', src
500
501
502@add_handler('configdiff')
503def qute_configdiff(_url: QUrl) -> _HandlerRet:
504    """Handler for qute://configdiff."""
505    data = config.instance.dump_userconfig().encode('utf-8')
506    return 'text/plain', data
507
508
509@add_handler('pastebin-version')
510def qute_pastebin_version(_url: QUrl) -> _HandlerRet:
511    """Handler that pastebins the version string."""
512    version.pastebin_version()
513    return 'text/plain', b'Paste called.'
514
515
516def _pdf_path(filename: str) -> str:
517    """Get the path of a temporary PDF file."""
518    return os.path.join(downloads.temp_download_manager.get_tmpdir().name,
519                        filename)
520
521
522@add_handler('pdfjs')
523def qute_pdfjs(url: QUrl) -> _HandlerRet:
524    """Handler for qute://pdfjs.
525
526    Return the pdf.js viewer or redirect to original URL if the file does not
527    exist.
528    """
529    if url.path() == '/file':
530        filename = QUrlQuery(url).queryItemValue('filename')
531        if not filename:
532            raise UrlInvalidError("Missing filename")
533        if '/' in filename or os.sep in filename:
534            raise RequestDeniedError("Path separator in filename.")
535
536        path = _pdf_path(filename)
537        with open(path, 'rb') as f:
538            data = f.read()
539
540        mimetype = utils.guess_mimetype(filename, fallback=True)
541        return mimetype, data
542
543    if url.path() == '/web/viewer.html':
544        query = QUrlQuery(url)
545        filename = query.queryItemValue("filename")
546        if not filename:
547            raise UrlInvalidError("Missing filename")
548
549        path = _pdf_path(filename)
550        if not os.path.isfile(path):
551            source = query.queryItemValue('source')
552            if not source:  # This may happen with old URLs stored in history
553                raise UrlInvalidError("Missing source")
554            raise Redirect(QUrl(source))
555
556        data = pdfjs.generate_pdfjs_page(filename, url)
557        return 'text/html', data
558
559    try:
560        data = pdfjs.get_pdfjs_res(url.path())
561    except pdfjs.PDFJSNotFound as e:
562        # Logging as the error might get lost otherwise since we're not showing
563        # the error page if a single asset is missing. This way we don't lose
564        # information, as the failed pdfjs requests are still in the log.
565        log.misc.warning(
566            "pdfjs resource requested but not found: {}".format(e.path))
567        raise NotFoundError("Can't find pdfjs resource '{}'".format(e.path))
568    else:
569        mimetype = utils.guess_mimetype(url.fileName(), fallback=True)
570        return mimetype, data
571
572
573@add_handler('warning')
574def qute_warning(url: QUrl) -> _HandlerRet:
575    """Handler for qute://warning."""
576    path = url.path()
577    if path == '/webkit':
578        src = jinja.render('warning-webkit.html',
579                           title='QtWebKit backend warning')
580    elif path == '/sessions':
581        src = jinja.render('warning-sessions.html',
582                           title='Qt 5.15 sessions warning',
583                           datadir=standarddir.data(),
584                           sep=os.sep)
585    else:
586        raise NotFoundError("Invalid warning page {}".format(path))
587    return 'text/html', src
588
589
590@add_handler('resource')
591def qute_resource(url: QUrl) -> _HandlerRet:
592    """Handler for qute://resource."""
593    path = url.path().lstrip('/')
594    mimetype = utils.guess_mimetype(path, fallback=True)
595    try:
596        data = resources.read_file_binary(path)
597    except FileNotFoundError as e:
598        raise NotFoundError(str(e))
599    return mimetype, data
600