1 2# Copyright 2008-2014 Jaap Karssenberg <jaap.karssenberg@gmail.com> 3 4'''This module contains a web interface for zim. This is an alternative 5to the GUI application. 6 7It can be run either as a stand-alone web server or embedded in another 8server as a cgi-bin script or using one of the python web frameworks 9using the "WSGI" API. 10 11The main classes here are L{WWWInterface} which implements the interface 12(and is callable as a "WSGI" application) and L{Server} which implements 13the standalone server. 14''' 15 16# TODO setting for doc_root_url when running in CGI mode 17# TODO support "etg" and "if-none-match' headers at least for icons 18# TODO: redirect server logging to logging module + set default level to -V in server process 19 20 21import sys 22import socket 23import logging 24from gi.repository import GObject 25 26from functools import partial 27 28from wsgiref.headers import Headers 29import urllib.request 30import urllib.parse 31import urllib.error 32 33from zim.errors import Error 34from zim.notebook import Notebook, Path, Page, encode_filename, PageNotFoundError 35from zim.fs import File, Dir, FileNotFoundError 36from zim.config import data_file 37from zim.parsing import url_encode 38 39from zim.export.linker import ExportLinker, StubLayout 40from zim.export.template import ExportTemplateContext 41from zim.export.exporters import createIndexPage 42 43from zim.formats import get_format 44 45logger = logging.getLogger('zim.www') 46 47 48class WWWError(Error): 49 '''Error with http error code''' 50 51 #: mapping of error number to string - extend when needed 52 statusstring = { 53 '403': 'Forbidden', 54 '404': 'Not Found', 55 '405': 'Method Not Allowed', 56 '500': 'Internal Server Error', 57 } 58 59 def __init__(self, msg, status='500', headers=None): 60 '''Constructor 61 @param msg: specific error message - will be appended after 62 the standard error string 63 @param status: error code, e.g. '500' for "Internal Server Error" 64 or '404' for "Not Found" - see http specifications for valid 65 error codes 66 @param headers: additional http headers for the error response, 67 list of 2-tuples with header name and value 68 ''' 69 self.status = '%s %s' % (status, self.statusstring[status]) 70 self.headers = headers 71 self.msg = self.status 72 if msg: 73 self.msg += ' - ' + msg 74 75 76class WebPageNotFoundError(WWWError): 77 '''Error whan a page is not found (404)''' 78 79 description = '''\ 80You tried to open a page that does not exist. 81''' 82 83 def __init__(self, page): 84 if not isinstance(page, str): 85 page = page.name 86 WWWError.__init__(self, 'No such page: %s' % page, status='404') 87 88 89class WebPathNotValidError(WWWError): 90 '''Error when the url points to an invalid page path''' 91 92 description = '''\ 93The requested path is not valid 94''' 95 96 def __init__(self): 97 WWWError.__init__(self, 'Invalid path', status='403') 98 99 100class WWWInterface(object): 101 '''Class to handle the WWW interface for zim notebooks. 102 103 Objects of this class are callable, so they can be used as application 104 objects within a WSGI compatible framework. See PEP 333 for details 105 (U{http://www.python.org/dev/peps/pep-0333/}). 106 107 For basic handlers to run this interface see the "wsgiref" package 108 in the standard library for python. 109 ''' 110 111 def __init__(self, notebook, template='Default', auth_creds=None): 112 '''Constructor 113 @param notebook: a L{Notebook} object 114 @param template: html template for zim pages 115 @param auth_creds: credentials for HTTP-authentication 116 ''' 117 assert isinstance(notebook, Notebook) 118 self.notebook = notebook 119 self.auth_creds = auth_creds 120 121 self.output = None 122 123 if template is None: 124 template = 'Default' 125 126 if isinstance(template, str): 127 from zim.templates import get_template 128 self.template = get_template('html', template) 129 if not self.template: 130 raise AssertionError('Could not find html template: %s' % template) 131 else: 132 self.template = template 133 134 self.linker_factory = partial(WWWLinker, self.notebook, self.template.resources_dir) 135 self.dumper_factory = get_format('html').Dumper # XXX 136 137 #~ self.notebook.indexer.check_and_update() 138 139 def __call__(self, environ, start_response): 140 '''Main function for handling a single request. Follows the 141 WSGI API. 142 143 @param environ: dictionary with environment variables for the 144 request and some special variables. See the PEP for expected 145 variables. 146 147 @param start_response: a function that can be called to set the 148 http response and headers. For example:: 149 150 start_response(200, [('Content-Type', 'text/plain')]) 151 152 @returns: the html page content as a list of lines 153 ''' 154 if self.auth_creds: 155 import base64 156 157 def bad_auth(): 158 body = 'Please authenticate' 159 realm = 'zimAuth' 160 logger.info('Requesting Basic HTTP-Authentication') 161 headers = [ 162 ('Content-Type', 'text/plain'), 163 ('Content-Length', str(len(body))), 164 ('WWW-Authenticate', 'Basic realm="%s"' % realm)] 165 start_response('401 Unauthorized', headers) 166 return [body.encode()] 167 168 auth = environ.get('HTTP_AUTHORIZATION') 169 if auth: 170 scheme, data = auth.split(None, 1) 171 assert scheme.lower() == 'basic' 172 username, password = base64.b64decode(data).decode('UTF-8').split(':') 173 if username != self.auth_creds[0] or password != self.auth_creds[1]: 174 return bad_auth() 175 environ['REMOTE_USER'] = username 176 del environ['HTTP_AUTHORIZATION'] 177 else: 178 return bad_auth() 179 180 headerlist = [] 181 headers = Headers(headerlist) 182 path = environ.get('PATH_INFO', '/') 183 path = path.encode('iso-8859-1').decode('UTF-8') 184 # The WSGI standard mandates iso-8859-1, but we want UTF-8. See: 185 # - https://www.python.org/dev/peps/pep-3333/#unicode-issues 186 # - https://code.djangoproject.com/ticket/19468 187 try: 188 methods = ('GET', 'HEAD') 189 if not environ['REQUEST_METHOD'] in methods: 190 raise WWWError('405', headers=[('Allow', ', '.join(methods))]) 191 192 # cleanup path 193 path = path.replace('\\', '/') # make it windows save 194 isdir = path.endswith('/') 195 parts = [p for p in path.split('/') if p and not p == '.'] 196 if [p for p in parts if p.startswith('.')]: 197 # exclude .. and all hidden files from possible paths 198 raise WebPathNotValidError() 199 path = '/' + '/'.join(parts) 200 if isdir and not path == '/': 201 path += '/' 202 203 if not path: 204 path = '/' 205 elif path == '/favicon.ico': 206 path = '/+resources/favicon.ico' 207 else: 208 path = urllib.parse.unquote(path) 209 210 if path == '/': 211 headers.add_header('Content-Type', 'text/html', charset='utf-8') 212 content = self.render_index() 213 elif path.startswith('/+docs/'): 214 dir = self.notebook.document_root 215 if not dir: 216 raise WebPageNotFoundError(path) 217 file = dir.file(path[7:]) 218 content = [file.raw()] 219 # Will raise FileNotFound when file does not exist 220 headers['Content-Type'] = file.get_mimetype() 221 elif path.startswith('/+file/'): 222 file = self.notebook.folder.file(path[7:]) 223 # TODO: need abstraction for getting file from top level dir ? 224 content = [file.read_binary()] 225 # Will raise FileNotFound when file does not exist 226 headers['Content-Type'] = file.mimetype() 227 elif path.startswith('/+resources/'): 228 if self.template.resources_dir: 229 file = self.template.resources_dir.file(path[12:]) 230 if not file.exists(): 231 file = data_file('pixmaps/%s' % path[12:]) 232 else: 233 file = data_file('pixmaps/%s' % path[12:]) 234 235 if file: 236 content = [file.raw()] 237 # Will raise FileNotFound when file does not exist 238 headers['Content-Type'] = file.get_mimetype() 239 else: 240 raise WebPageNotFoundError(path) 241 else: 242 # Must be a page or a namespace (html file or directory path) 243 headers.add_header('Content-Type', 'text/html', charset='utf-8') 244 if path.endswith('.html'): 245 pagename = path[:-5].replace('/', ':') 246 elif path.endswith('/'): 247 pagename = path[:-1].replace('/', ':') 248 else: 249 raise WebPageNotFoundError(path) 250 251 path = self.notebook.pages.lookup_from_user_input(pagename) 252 try: 253 page = self.notebook.get_page(path) 254 if page.hascontent: 255 content = self.render_page(page) 256 elif page.haschildren: 257 content = self.render_index(page) 258 else: 259 raise WebPageNotFoundError(path) 260 except PageNotFoundError: 261 raise WebPageNotFoundError(path) 262 except Exception as error: 263 headerlist = [] 264 headers = Headers(headerlist) 265 headers.add_header('Content-Type', 'text/plain', charset='utf-8') 266 if isinstance(error, (WWWError, FileNotFoundError)): 267 logger.error(error.msg) 268 if isinstance(error, FileNotFoundError): 269 error = WebPageNotFoundError(path) 270 # show url path instead of file path 271 if error.headers: 272 for key, value in error.headers: 273 headers.add_header(key, value) 274 start_response(error.status, headerlist) 275 content = str(error).splitlines(True) 276 # TODO also handle template errors as special here 277 else: 278 # Unexpected error - maybe a bug, do not expose output on bugs 279 # to the outside world 280 logger.exception('Unexpected error:') 281 start_response('500 Internal Server Error', headerlist) 282 content = ['Internal Server Error'] 283 284 if environ['REQUEST_METHOD'] == 'HEAD': 285 return [] 286 else: 287 return [c.encode('UTF-8') for c in content] 288 else: 289 start_response('200 OK', headerlist) 290 if environ['REQUEST_METHOD'] == 'HEAD': 291 return [] 292 elif content and isinstance(content[0], str): 293 return [c.encode('UTF-8') for c in content] 294 else: 295 return content 296 297 def render_index(self, namespace=None): 298 '''Render an index page 299 @param namespace: the namespace L{Path} 300 @returns: html as a list of lines 301 ''' 302 path = namespace or Path(':') 303 page = createIndexPage(self.notebook, path, namespace) 304 return self.render_page(page) 305 306 def render_page(self, page): 307 '''Render a single page from the notebook 308 @param page: a L{Page} object 309 @returns: html as a list of lines 310 ''' 311 lines = [] 312 313 context = ExportTemplateContext( 314 self.notebook, 315 self.linker_factory, 316 self.dumper_factory, 317 title=page.get_title(), 318 content=[page], 319 home=self.notebook.get_home_page(), 320 up=page.parent if page.parent and not page.parent.isroot else None, 321 prevpage=self.notebook.pages.get_previous(page) if not page.isroot else None, 322 nextpage=self.notebook.pages.get_next(page) if not page.isroot else None, 323 links={'index': '/'}, 324 index_generator=self.notebook.pages.walk, 325 index_page=page, 326 ) 327 self.template.process(lines, context) 328 return lines 329 330 331class WWWLinker(ExportLinker): 332 '''Implements a linker that returns the correct 333 links for the way the server handles URLs. 334 ''' 335 336 def __init__(self, notebook, resources_dir=None, source=None): 337 layout = StubLayout(notebook, resources_dir) 338 ExportLinker.__init__(self, notebook, layout, source=source) 339 340 def icon(self, name): 341 return url_encode('/+resources/%s.png' % name) 342 343 def resource(self, path): 344 return url_encode('/+resources/%s' % path) 345 346 def resolve_source_file(self, link): 347 return None # not used by HTML anyway 348 349 def page_object(self, path): 350 '''Turn a L{Path} object in a relative link or URI''' 351 return url_encode('/' + encode_filename(path.name) + '.html') 352 # TODO use script location as root for cgi-bin 353 354 def file_object(self, file): 355 '''Turn a L{File} object in a relative link or URI''' 356 if file.ischild(self.notebook.folder): 357 # attachment 358 relpath = file.relpath(self.notebook.folder) 359 return url_encode('/+file/' + relpath) 360 elif self.notebook.document_root \ 361 and file.ischild(self.notebook.document_root): 362 # document root 363 relpath = file.relpath(self.notebook.document_root) 364 return url_encode('/+docs/' + relpath) 365 # TODO use script location as root for cgi-bin 366 # TODO allow alternative document root for cgi-bin 367 else: 368 # external file -> file:// 369 return file.uri 370 371 372def main(notebook, port=8080, public=True, **opts): 373 httpd = make_server(notebook, port, public, **opts) 374 logger.info("Serving HTTP on %s port %i...", httpd.server_name, httpd.server_port) 375 httpd.serve_forever() 376 377 378def make_server(notebook, port=8080, public=True, auth_creds=None, **opts): 379 '''Create a simple http server 380 @param notebook: the notebook location 381 @param port: the http port to serve on 382 @param public: allow connections to the server from other 383 computers - if C{False} can only connect from localhost 384 @param auth_creds: credentials for HTTP-authentication 385 @param opts: options for L{WWWInterface.__init__()} 386 @returns: a C{WSGIServer} object 387 ''' 388 import wsgiref.simple_server 389 app = WWWInterface(notebook, auth_creds=auth_creds, **opts) # FIXME make opts explicit 390 if public: 391 httpd = wsgiref.simple_server.make_server('', port, app) 392 else: 393 httpd = wsgiref.simple_server.make_server('localhost', port, app) 394 return httpd 395