1# vim: ft=python fileencoding=utf-8 sts=4 sw=4 et: 2 3# Copyright 2016-2021 Florian Bruhin (The Compiler) <mail@qutebrowser.org> 4# 5# This file is part of qutebrowser. 6# 7# qutebrowser is free software: you can redistribute it and/or modify 8# it under the terms of the GNU General Public License as published by 9# the Free Software Foundation, either version 3 of the License, or 10# (at your option) any later version. 11# 12# qutebrowser is distributed in the hope that it will be useful, 13# but WITHOUT ANY WARRANTY; without even the implied warranty of 14# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15# GNU General Public License for more details. 16# 17# You should have received a copy of the GNU General Public License 18# along with qutebrowser. If not, see <https://www.gnu.org/licenses/>. 19 20"""Backend-independent qute://* code. 21 22Module attributes: 23 pyeval_output: The output of the last :pyeval command. 24 _HANDLERS: The handlers registered via decorators. 25""" 26 27import html 28import json 29import os 30import time 31import textwrap 32import urllib 33import collections 34import secrets 35from typing import TypeVar, Callable, Dict, List, Optional, Union, Sequence, Tuple 36 37from PyQt5.QtCore import QUrlQuery, QUrl 38 39import qutebrowser 40from qutebrowser.browser import pdfjs, downloads, history 41from qutebrowser.config import config, configdata, configexc 42from qutebrowser.utils import (version, utils, jinja, log, message, docutils, 43 resources, objreg, standarddir) 44from qutebrowser.misc import guiprocess, quitter 45from qutebrowser.qt import sip 46 47 48pyeval_output = ":pyeval was never called" 49csrf_token = None 50 51 52_HANDLERS = {} 53 54 55class Error(Exception): 56 57 """Exception for generic errors on a qute:// page.""" 58 59 60class NotFoundError(Error): 61 62 """Raised when the given URL was not found.""" 63 64 65class SchemeOSError(Error): 66 67 """Raised when there was an OSError inside a handler.""" 68 69 70class UrlInvalidError(Error): 71 72 """Raised when an invalid URL was opened.""" 73 74 75class RequestDeniedError(Error): 76 77 """Raised when the request is forbidden.""" 78 79 80class Redirect(Exception): 81 82 """Exception to signal a redirect should happen. 83 84 Attributes: 85 url: The URL to redirect to, as a QUrl. 86 """ 87 88 def __init__(self, url: QUrl): 89 super().__init__(url.toDisplayString()) 90 self.url = url 91 92 93# Return value: (mimetype, data) (encoded as utf-8 if a str is returned) 94_HandlerRet = Tuple[str, Union[str, bytes]] 95_HandlerCallable = Callable[[QUrl], _HandlerRet] 96_Handler = TypeVar('_Handler', bound=_HandlerCallable) 97 98 99class add_handler: # noqa: N801,N806 pylint: disable=invalid-name 100 101 """Decorator to register a qute://* URL handler. 102 103 Attributes: 104 _name: The 'foo' part of qute://foo 105 """ 106 107 def __init__(self, name: str) -> None: 108 self._name = name 109 self._function: Optional[_HandlerCallable] = None 110 111 def __call__(self, function: _Handler) -> _Handler: 112 self._function = function 113 _HANDLERS[self._name] = self.wrapper 114 return function 115 116 def wrapper(self, url: QUrl) -> _HandlerRet: 117 """Call the underlying function.""" 118 assert self._function is not None 119 return self._function(url) 120 121 122def data_for_url(url: QUrl) -> Tuple[str, bytes]: 123 """Get the data to show for the given URL. 124 125 Args: 126 url: The QUrl to show. 127 128 Return: 129 A (mimetype, data) tuple. 130 """ 131 norm_url = url.adjusted( 132 QUrl.NormalizePathSegments | # type: ignore[arg-type] 133 QUrl.StripTrailingSlash) 134 if norm_url != url: 135 raise Redirect(norm_url) 136 137 path = url.path() 138 host = url.host() 139 query = url.query() 140 # A url like "qute:foo" is split as "scheme:path", not "scheme:host". 141 log.misc.debug("url: {}, path: {}, host {}".format( 142 url.toDisplayString(), path, host)) 143 if not path or not host: 144 new_url = QUrl() 145 new_url.setScheme('qute') 146 # When path is absent, e.g. qute://help (with no trailing slash) 147 if host: 148 new_url.setHost(host) 149 # When host is absent, e.g. qute:help 150 else: 151 new_url.setHost(path) 152 153 new_url.setPath('/') 154 if query: 155 new_url.setQuery(query) 156 if new_url.host(): # path was a valid host 157 raise Redirect(new_url) 158 159 try: 160 handler = _HANDLERS[host] 161 except KeyError: 162 raise NotFoundError("No handler found for {}".format( 163 url.toDisplayString())) 164 165 try: 166 mimetype, data = handler(url) 167 except OSError as e: 168 raise SchemeOSError(e) 169 170 assert mimetype is not None, url 171 if mimetype == 'text/html' and isinstance(data, str): 172 # We let handlers return HTML as text 173 data = data.encode('utf-8', errors='xmlcharrefreplace') 174 assert isinstance(data, bytes) 175 176 return mimetype, data 177 178 179@add_handler('bookmarks') 180def qute_bookmarks(_url: QUrl) -> _HandlerRet: 181 """Handler for qute://bookmarks. Display all quickmarks / bookmarks.""" 182 bookmarks = sorted(objreg.get('bookmark-manager').marks.items(), 183 key=lambda x: x[1]) # Sort by title 184 quickmarks = sorted(objreg.get('quickmark-manager').marks.items(), 185 key=lambda x: x[0]) # Sort by name 186 187 src = jinja.render('bookmarks.html', 188 title='Bookmarks', 189 bookmarks=bookmarks, 190 quickmarks=quickmarks) 191 return 'text/html', src 192 193 194@add_handler('tabs') 195def qute_tabs(_url: QUrl) -> _HandlerRet: 196 """Handler for qute://tabs. Display information about all open tabs.""" 197 tabs: Dict[str, List[Tuple[str, str]]] = collections.defaultdict(list) 198 for win_id, window in objreg.window_registry.items(): 199 if sip.isdeleted(window): 200 continue 201 tabbed_browser = objreg.get('tabbed-browser', 202 scope='window', 203 window=win_id) 204 for tab in tabbed_browser.widgets(): 205 if tab.url() not in [QUrl("qute://tabs/"), QUrl("qute://tabs")]: 206 urlstr = tab.url().toDisplayString() 207 tabs[str(win_id)].append((tab.title(), urlstr)) 208 209 src = jinja.render('tabs.html', 210 title='Tabs', 211 tab_list_by_window=tabs) 212 return 'text/html', src 213 214 215def history_data( 216 start_time: float, 217 offset: int = None 218) -> Sequence[Dict[str, Union[str, int]]]: 219 """Return history data. 220 221 Arguments: 222 start_time: select history starting from this timestamp. 223 offset: number of items to skip 224 """ 225 # history atimes are stored as ints, ensure start_time is not a float 226 start_time = int(start_time) 227 if offset is not None: 228 entries = history.web_history.entries_before(start_time, limit=1000, 229 offset=offset) 230 else: 231 # end is 24hrs earlier than start 232 end_time = start_time - 24*60*60 233 entries = history.web_history.entries_between(end_time, start_time) 234 235 return [{"url": e.url, 236 "title": html.escape(e.title) or html.escape(e.url), 237 "time": e.atime} for e in entries] 238 239 240@add_handler('history') 241def qute_history(url: QUrl) -> _HandlerRet: 242 """Handler for qute://history. Display and serve history.""" 243 if url.path() == '/data': 244 q_offset = QUrlQuery(url).queryItemValue("offset") 245 try: 246 offset = int(q_offset) if q_offset else None 247 except ValueError: 248 raise UrlInvalidError("Query parameter offset is invalid") 249 250 # Use start_time in query or current time. 251 q_start_time = QUrlQuery(url).queryItemValue("start_time") 252 try: 253 start_time = float(q_start_time) if q_start_time else time.time() 254 except ValueError: 255 raise UrlInvalidError("Query parameter start_time is invalid") 256 257 return 'text/html', json.dumps(history_data(start_time, offset)) 258 else: 259 return 'text/html', jinja.render( 260 'history.html', 261 title='History', 262 gap_interval=config.val.history_gap_interval 263 ) 264 265 266@add_handler('javascript') 267def qute_javascript(url: QUrl) -> _HandlerRet: 268 """Handler for qute://javascript. 269 270 Return content of file given as query parameter. 271 """ 272 path = url.path() 273 if path: 274 path = "javascript" + os.sep.join(path.split('/')) 275 return 'text/html', resources.read_file(path) 276 else: 277 raise UrlInvalidError("No file specified") 278 279 280@add_handler('pyeval') 281def qute_pyeval(_url: QUrl) -> _HandlerRet: 282 """Handler for qute://pyeval.""" 283 src = jinja.render('pre.html', title='pyeval', content=pyeval_output) 284 return 'text/html', src 285 286 287@add_handler('process') 288def qute_process(url: QUrl) -> _HandlerRet: 289 """Handler for qute://process.""" 290 path = url.path()[1:] 291 try: 292 pid = int(path) 293 except ValueError: 294 raise UrlInvalidError(f"Invalid PID {path}") 295 296 try: 297 proc = guiprocess.all_processes[pid] 298 except KeyError: 299 raise NotFoundError(f"No process {pid}") 300 301 if proc is None: 302 raise NotFoundError(f"Data for process {pid} got cleaned up.") 303 304 src = jinja.render('process.html', title=f'Process {pid}', proc=proc) 305 return 'text/html', src 306 307 308@add_handler('version') 309@add_handler('verizon') 310def qute_version(_url: QUrl) -> _HandlerRet: 311 """Handler for qute://version.""" 312 src = jinja.render('version.html', title='Version info', 313 version=version.version_info(), 314 copyright=qutebrowser.__copyright__) 315 return 'text/html', src 316 317 318@add_handler('log') 319def qute_log(url: QUrl) -> _HandlerRet: 320 """Handler for qute://log. 321 322 There are three query parameters: 323 324 - level: The minimum log level to print. 325 For example, qute://log?level=warning prints warnings and errors. 326 Level can be one of: vdebug, debug, info, warning, error, critical. 327 328 - plain: If given (and not 'false'), plaintext is shown. 329 330 - logfilter: A filter string like the --logfilter commandline argument 331 accepts. 332 """ 333 query = QUrlQuery(url) 334 plain = (query.hasQueryItem('plain') and 335 query.queryItemValue('plain').lower() != 'false') 336 337 if log.ram_handler is None: 338 content = "Log output was disabled." if plain else None 339 else: 340 level = query.queryItemValue('level') 341 if not level: 342 level = 'vdebug' 343 344 filter_str = query.queryItemValue('logfilter') 345 346 try: 347 logfilter = (log.LogFilter.parse(filter_str, only_debug=False) 348 if filter_str else None) 349 except log.InvalidLogFilterError as e: 350 raise UrlInvalidError(e) 351 352 content = log.ram_handler.dump_log(html=not plain, 353 level=level, logfilter=logfilter) 354 355 template = 'pre.html' if plain else 'log.html' 356 src = jinja.render(template, title='log', content=content) 357 return 'text/html', src 358 359 360@add_handler('gpl') 361def qute_gpl(_url: QUrl) -> _HandlerRet: 362 """Handler for qute://gpl. Return HTML content as string.""" 363 return 'text/html', resources.read_file('html/license.html') 364 365 366def _asciidoc_fallback_path(html_path: str) -> Optional[str]: 367 """Fall back to plaintext asciidoc if the HTML is unavailable.""" 368 path = html_path.replace('.html', '.asciidoc') 369 try: 370 return resources.read_file(path) 371 except OSError: 372 return None 373 374 375@add_handler('help') 376def qute_help(url: QUrl) -> _HandlerRet: 377 """Handler for qute://help.""" 378 urlpath = url.path() 379 if not urlpath or urlpath == '/': 380 urlpath = 'index.html' 381 else: 382 urlpath = urlpath.lstrip('/') 383 if not docutils.docs_up_to_date(urlpath): 384 message.error("Your documentation is outdated! Please re-run " 385 "scripts/asciidoc2html.py.") 386 387 path = 'html/doc/{}'.format(urlpath) 388 if not urlpath.endswith('.html'): 389 try: 390 bdata = resources.read_file_binary(path) 391 except OSError as e: 392 raise SchemeOSError(e) 393 mimetype = utils.guess_mimetype(urlpath) 394 return mimetype, bdata 395 396 try: 397 data = resources.read_file(path) 398 except OSError: 399 asciidoc = _asciidoc_fallback_path(path) 400 401 if asciidoc is None: 402 raise 403 404 preamble = textwrap.dedent(""" 405 There was an error loading the documentation! 406 407 This most likely means the documentation was not generated 408 properly. If you are running qutebrowser from the git repository, 409 please (re)run scripts/asciidoc2html.py and reload this page. 410 411 If you're running a released version this is a bug, please use 412 :report to report it. 413 414 Falling back to the plaintext version. 415 416 --------------------------------------------------------------- 417 418 419 """) 420 return 'text/plain', (preamble + asciidoc).encode('utf-8') 421 else: 422 return 'text/html', data 423 424 425def _qute_settings_set(url: QUrl) -> _HandlerRet: 426 """Handler for qute://settings/set.""" 427 query = QUrlQuery(url) 428 option = query.queryItemValue('option', QUrl.FullyDecoded) 429 value = query.queryItemValue('value', QUrl.FullyDecoded) 430 431 # https://github.com/qutebrowser/qutebrowser/issues/727 432 if option == 'content.javascript.enabled' and value == 'false': 433 msg = ("Refusing to disable javascript via qute://settings " 434 "as it needs javascript support.") 435 message.error(msg) 436 return 'text/html', b'error: ' + msg.encode('utf-8') 437 438 try: 439 config.instance.set_str(option, value, save_yaml=True) 440 return 'text/html', b'ok' 441 except configexc.Error as e: 442 message.error(str(e)) 443 return 'text/html', b'error: ' + str(e).encode('utf-8') 444 445 446@add_handler('settings') 447def qute_settings(url: QUrl) -> _HandlerRet: 448 """Handler for qute://settings. View/change qute configuration.""" 449 global csrf_token 450 451 if url.path() == '/set': 452 if url.password() != csrf_token: 453 message.error("Invalid CSRF token for qute://settings!") 454 raise RequestDeniedError("Invalid CSRF token!") 455 if quitter.instance.is_shutting_down: 456 log.config.debug("Ignoring /set request during shutdown") 457 return 'text/html', b'error: ignored' 458 return _qute_settings_set(url) 459 460 # Requests to qute://settings/set should only be allowed from 461 # qute://settings. As an additional security precaution, we generate a CSRF 462 # token to use here. 463 csrf_token = secrets.token_urlsafe() 464 465 src = jinja.render('settings.html', title='settings', 466 configdata=configdata, 467 confget=config.instance.get_str, 468 csrf_token=csrf_token) 469 return 'text/html', src 470 471 472@add_handler('bindings') 473def qute_bindings(_url: QUrl) -> _HandlerRet: 474 """Handler for qute://bindings. View keybindings.""" 475 bindings = {} 476 defaults = config.val.bindings.default 477 478 config_modes = set(defaults.keys()).union(config.val.bindings.commands) 479 config_modes.remove('normal') 480 481 modes = ['normal'] + sorted(config_modes) 482 for mode in modes: 483 bindings[mode] = config.key_instance.get_bindings_for(mode) 484 485 src = jinja.render('bindings.html', title='Bindings', 486 bindings=bindings) 487 return 'text/html', src 488 489 490@add_handler('back') 491def qute_back(url: QUrl) -> _HandlerRet: 492 """Handler for qute://back. 493 494 Simple page to free ram / lazy load a site, goes back on focusing the tab. 495 """ 496 src = jinja.render( 497 'back.html', 498 title='Suspended: ' + urllib.parse.unquote(url.fragment())) 499 return 'text/html', src 500 501 502@add_handler('configdiff') 503def qute_configdiff(_url: QUrl) -> _HandlerRet: 504 """Handler for qute://configdiff.""" 505 data = config.instance.dump_userconfig().encode('utf-8') 506 return 'text/plain', data 507 508 509@add_handler('pastebin-version') 510def qute_pastebin_version(_url: QUrl) -> _HandlerRet: 511 """Handler that pastebins the version string.""" 512 version.pastebin_version() 513 return 'text/plain', b'Paste called.' 514 515 516def _pdf_path(filename: str) -> str: 517 """Get the path of a temporary PDF file.""" 518 return os.path.join(downloads.temp_download_manager.get_tmpdir().name, 519 filename) 520 521 522@add_handler('pdfjs') 523def qute_pdfjs(url: QUrl) -> _HandlerRet: 524 """Handler for qute://pdfjs. 525 526 Return the pdf.js viewer or redirect to original URL if the file does not 527 exist. 528 """ 529 if url.path() == '/file': 530 filename = QUrlQuery(url).queryItemValue('filename') 531 if not filename: 532 raise UrlInvalidError("Missing filename") 533 if '/' in filename or os.sep in filename: 534 raise RequestDeniedError("Path separator in filename.") 535 536 path = _pdf_path(filename) 537 with open(path, 'rb') as f: 538 data = f.read() 539 540 mimetype = utils.guess_mimetype(filename, fallback=True) 541 return mimetype, data 542 543 if url.path() == '/web/viewer.html': 544 query = QUrlQuery(url) 545 filename = query.queryItemValue("filename") 546 if not filename: 547 raise UrlInvalidError("Missing filename") 548 549 path = _pdf_path(filename) 550 if not os.path.isfile(path): 551 source = query.queryItemValue('source') 552 if not source: # This may happen with old URLs stored in history 553 raise UrlInvalidError("Missing source") 554 raise Redirect(QUrl(source)) 555 556 data = pdfjs.generate_pdfjs_page(filename, url) 557 return 'text/html', data 558 559 try: 560 data = pdfjs.get_pdfjs_res(url.path()) 561 except pdfjs.PDFJSNotFound as e: 562 # Logging as the error might get lost otherwise since we're not showing 563 # the error page if a single asset is missing. This way we don't lose 564 # information, as the failed pdfjs requests are still in the log. 565 log.misc.warning( 566 "pdfjs resource requested but not found: {}".format(e.path)) 567 raise NotFoundError("Can't find pdfjs resource '{}'".format(e.path)) 568 else: 569 mimetype = utils.guess_mimetype(url.fileName(), fallback=True) 570 return mimetype, data 571 572 573@add_handler('warning') 574def qute_warning(url: QUrl) -> _HandlerRet: 575 """Handler for qute://warning.""" 576 path = url.path() 577 if path == '/webkit': 578 src = jinja.render('warning-webkit.html', 579 title='QtWebKit backend warning') 580 elif path == '/sessions': 581 src = jinja.render('warning-sessions.html', 582 title='Qt 5.15 sessions warning', 583 datadir=standarddir.data(), 584 sep=os.sep) 585 else: 586 raise NotFoundError("Invalid warning page {}".format(path)) 587 return 'text/html', src 588 589 590@add_handler('resource') 591def qute_resource(url: QUrl) -> _HandlerRet: 592 """Handler for qute://resource.""" 593 path = url.path().lstrip('/') 594 mimetype = utils.guess_mimetype(path, fallback=True) 595 try: 596 data = resources.read_file_binary(path) 597 except FileNotFoundError as e: 598 raise NotFoundError(str(e)) 599 return mimetype, data 600