1
2# Copyright 2008-2014 Jaap Karssenberg <jaap.karssenberg@gmail.com>
3
4'''This module contains a web interface for zim. This is an alternative
5to the GUI application.
6
7It can be run either as a stand-alone web server or embedded in another
8server as a cgi-bin script or using  one of the python web frameworks
9using the "WSGI" API.
10
11The main classes here are L{WWWInterface} which implements the interface
12(and is callable as a "WSGI" application) and L{Server} which implements
13the standalone server.
14'''
15
16# TODO setting for doc_root_url when running in CGI mode
17# TODO support "etg" and "if-none-match' headers at least for icons
18# TODO: redirect server logging to logging module + set default level to -V in server process
19
20
21import sys
22import socket
23import logging
24from gi.repository import GObject
25
26from functools import partial
27
28from wsgiref.headers import Headers
29import urllib.request
30import urllib.parse
31import urllib.error
32
33from zim.errors import Error
34from zim.notebook import Notebook, Path, Page, encode_filename, PageNotFoundError
35from zim.fs import File, Dir, FileNotFoundError
36from zim.config import data_file
37from zim.parsing import url_encode
38
39from zim.export.linker import ExportLinker, StubLayout
40from zim.export.template import ExportTemplateContext
41from zim.export.exporters import createIndexPage
42
43from zim.formats import get_format
44
45logger = logging.getLogger('zim.www')
46
47
48class WWWError(Error):
49	'''Error with http error code'''
50
51	#: mapping of error number to string - extend when needed
52	statusstring = {
53		'403': 'Forbidden',
54		'404': 'Not Found',
55		'405': 'Method Not Allowed',
56		'500': 'Internal Server Error',
57	}
58
59	def __init__(self, msg, status='500', headers=None):
60		'''Constructor
61		@param msg: specific error message - will be appended after
62		the standard error string
63		@param status: error code, e.g. '500' for "Internal Server Error"
64		or '404' for "Not Found" - see http specifications for valid
65		error codes
66		@param headers: additional http headers for the error response,
67		list of 2-tuples with header name and value
68		'''
69		self.status = '%s %s' % (status, self.statusstring[status])
70		self.headers = headers
71		self.msg = self.status
72		if msg:
73			self.msg += ' - ' + msg
74
75
76class WebPageNotFoundError(WWWError):
77	'''Error whan a page is not found (404)'''
78
79	description = '''\
80You tried to open a page that does not exist.
81'''
82
83	def __init__(self, page):
84		if not isinstance(page, str):
85			page = page.name
86		WWWError.__init__(self, 'No such page: %s' % page, status='404')
87
88
89class WebPathNotValidError(WWWError):
90	'''Error when the url points to an invalid page path'''
91
92	description = '''\
93The requested path is not valid
94'''
95
96	def __init__(self):
97		WWWError.__init__(self, 'Invalid path', status='403')
98
99
100class WWWInterface(object):
101	'''Class to handle the WWW interface for zim notebooks.
102
103	Objects of this class are callable, so they can be used as application
104	objects within a WSGI compatible framework. See PEP 333 for details
105	(U{http://www.python.org/dev/peps/pep-0333/}).
106
107	For basic handlers to run this interface see the "wsgiref" package
108	in the standard library for python.
109	'''
110
111	def __init__(self, notebook, template='Default', auth_creds=None):
112		'''Constructor
113		@param notebook: a L{Notebook} object
114		@param template: html template for zim pages
115		@param auth_creds: credentials for HTTP-authentication
116		'''
117		assert isinstance(notebook, Notebook)
118		self.notebook = notebook
119		self.auth_creds = auth_creds
120
121		self.output = None
122
123		if template is None:
124			template = 'Default'
125
126		if isinstance(template, str):
127			from zim.templates import get_template
128			self.template = get_template('html', template)
129			if not self.template:
130				raise AssertionError('Could not find html template: %s' % template)
131		else:
132			self.template = template
133
134		self.linker_factory = partial(WWWLinker, self.notebook, self.template.resources_dir)
135		self.dumper_factory = get_format('html').Dumper # XXX
136
137		#~ self.notebook.indexer.check_and_update()
138
139	def __call__(self, environ, start_response):
140		'''Main function for handling a single request. Follows the
141		WSGI API.
142
143		@param environ: dictionary with environment variables for the
144		request and some special variables. See the PEP for expected
145		variables.
146
147		@param start_response: a function that can be called to set the
148		http response and headers. For example::
149
150			start_response(200, [('Content-Type', 'text/plain')])
151
152		@returns: the html page content as a list of lines
153		'''
154		if self.auth_creds:
155			import base64
156
157			def bad_auth():
158				body = 'Please authenticate'
159				realm = 'zimAuth'
160				logger.info('Requesting Basic HTTP-Authentication')
161				headers = [
162					('Content-Type', 'text/plain'),
163					('Content-Length', str(len(body))),
164					('WWW-Authenticate', 'Basic realm="%s"' % realm)]
165				start_response('401 Unauthorized', headers)
166				return [body.encode()]
167
168			auth = environ.get('HTTP_AUTHORIZATION')
169			if auth:
170				scheme, data = auth.split(None, 1)
171				assert scheme.lower() == 'basic'
172				username, password = base64.b64decode(data).decode('UTF-8').split(':')
173				if username != self.auth_creds[0] or password != self.auth_creds[1]:
174					return bad_auth()
175				environ['REMOTE_USER'] = username
176				del environ['HTTP_AUTHORIZATION']
177			else:
178				return bad_auth()
179
180		headerlist = []
181		headers = Headers(headerlist)
182		path = environ.get('PATH_INFO', '/')
183		path = path.encode('iso-8859-1').decode('UTF-8')
184			# The WSGI standard mandates iso-8859-1, but we want UTF-8. See:
185			# - https://www.python.org/dev/peps/pep-3333/#unicode-issues
186			# - https://code.djangoproject.com/ticket/19468
187		try:
188			methods = ('GET', 'HEAD')
189			if not environ['REQUEST_METHOD'] in methods:
190				raise WWWError('405', headers=[('Allow', ', '.join(methods))])
191
192			# cleanup path
193			path = path.replace('\\', '/') # make it windows save
194			isdir = path.endswith('/')
195			parts = [p for p in path.split('/') if p and not p == '.']
196			if [p for p in parts if p.startswith('.')]:
197				# exclude .. and all hidden files from possible paths
198				raise WebPathNotValidError()
199			path = '/' + '/'.join(parts)
200			if isdir and not path == '/':
201				path += '/'
202
203			if not path:
204				path = '/'
205			elif path == '/favicon.ico':
206				path = '/+resources/favicon.ico'
207			else:
208				path = urllib.parse.unquote(path)
209
210			if path == '/':
211				headers.add_header('Content-Type', 'text/html', charset='utf-8')
212				content = self.render_index()
213			elif path.startswith('/+docs/'):
214				dir = self.notebook.document_root
215				if not dir:
216					raise WebPageNotFoundError(path)
217				file = dir.file(path[7:])
218				content = [file.raw()]
219					# Will raise FileNotFound when file does not exist
220				headers['Content-Type'] = file.get_mimetype()
221			elif path.startswith('/+file/'):
222				file = self.notebook.folder.file(path[7:])
223					# TODO: need abstraction for getting file from top level dir ?
224				content = [file.read_binary()]
225					# Will raise FileNotFound when file does not exist
226				headers['Content-Type'] = file.mimetype()
227			elif path.startswith('/+resources/'):
228				if self.template.resources_dir:
229					file = self.template.resources_dir.file(path[12:])
230					if not file.exists():
231						file = data_file('pixmaps/%s' % path[12:])
232				else:
233					file = data_file('pixmaps/%s' % path[12:])
234
235				if file:
236					content = [file.raw()]
237						# Will raise FileNotFound when file does not exist
238					headers['Content-Type'] = file.get_mimetype()
239				else:
240					raise WebPageNotFoundError(path)
241			else:
242				# Must be a page or a namespace (html file or directory path)
243				headers.add_header('Content-Type', 'text/html', charset='utf-8')
244				if path.endswith('.html'):
245					pagename = path[:-5].replace('/', ':')
246				elif path.endswith('/'):
247					pagename = path[:-1].replace('/', ':')
248				else:
249					raise WebPageNotFoundError(path)
250
251				path = self.notebook.pages.lookup_from_user_input(pagename)
252				try:
253					page = self.notebook.get_page(path)
254					if page.hascontent:
255						content = self.render_page(page)
256					elif page.haschildren:
257						content = self.render_index(page)
258					else:
259						raise WebPageNotFoundError(path)
260				except PageNotFoundError:
261					raise WebPageNotFoundError(path)
262		except Exception as error:
263			headerlist = []
264			headers = Headers(headerlist)
265			headers.add_header('Content-Type', 'text/plain', charset='utf-8')
266			if isinstance(error, (WWWError, FileNotFoundError)):
267				logger.error(error.msg)
268				if isinstance(error, FileNotFoundError):
269					error = WebPageNotFoundError(path)
270					# show url path instead of file path
271				if error.headers:
272					for key, value in error.headers:
273						headers.add_header(key, value)
274				start_response(error.status, headerlist)
275				content = str(error).splitlines(True)
276			# TODO also handle template errors as special here
277			else:
278				# Unexpected error - maybe a bug, do not expose output on bugs
279				# to the outside world
280				logger.exception('Unexpected error:')
281				start_response('500 Internal Server Error', headerlist)
282				content = ['Internal Server Error']
283
284			if environ['REQUEST_METHOD'] == 'HEAD':
285				return []
286			else:
287				return [c.encode('UTF-8') for c in content]
288		else:
289			start_response('200 OK', headerlist)
290			if environ['REQUEST_METHOD'] == 'HEAD':
291				return []
292			elif content and isinstance(content[0], str):
293				return [c.encode('UTF-8') for c in content]
294			else:
295				return content
296
297	def render_index(self, namespace=None):
298		'''Render an index page
299		@param namespace: the namespace L{Path}
300		@returns: html as a list of lines
301		'''
302		path = namespace or Path(':')
303		page = createIndexPage(self.notebook, path, namespace)
304		return self.render_page(page)
305
306	def render_page(self, page):
307		'''Render a single page from the notebook
308		@param page: a L{Page} object
309		@returns: html as a list of lines
310		'''
311		lines = []
312
313		context = ExportTemplateContext(
314			self.notebook,
315			self.linker_factory,
316			self.dumper_factory,
317			title=page.get_title(),
318			content=[page],
319			home=self.notebook.get_home_page(),
320			up=page.parent if page.parent and not page.parent.isroot else None,
321			prevpage=self.notebook.pages.get_previous(page) if not page.isroot else None,
322			nextpage=self.notebook.pages.get_next(page) if not page.isroot else None,
323			links={'index': '/'},
324			index_generator=self.notebook.pages.walk,
325			index_page=page,
326		)
327		self.template.process(lines, context)
328		return lines
329
330
331class WWWLinker(ExportLinker):
332	'''Implements a linker that returns the correct
333	links for the way the server handles URLs.
334	'''
335
336	def __init__(self, notebook, resources_dir=None, source=None):
337		layout = StubLayout(notebook, resources_dir)
338		ExportLinker.__init__(self, notebook, layout, source=source)
339
340	def icon(self, name):
341		return url_encode('/+resources/%s.png' % name)
342
343	def resource(self, path):
344		return url_encode('/+resources/%s' % path)
345
346	def resolve_source_file(self, link):
347		return None # not used by HTML anyway
348
349	def page_object(self, path):
350		'''Turn a L{Path} object in a relative link or URI'''
351		return url_encode('/' + encode_filename(path.name) + '.html')
352			# TODO use script location as root for cgi-bin
353
354	def file_object(self, file):
355		'''Turn a L{File} object in a relative link or URI'''
356		if file.ischild(self.notebook.folder):
357			# attachment
358			relpath = file.relpath(self.notebook.folder)
359			return url_encode('/+file/' + relpath)
360		elif self.notebook.document_root \
361		and file.ischild(self.notebook.document_root):
362			# document root
363			relpath = file.relpath(self.notebook.document_root)
364			return url_encode('/+docs/' + relpath)
365			# TODO use script location as root for cgi-bin
366			# TODO allow alternative document root for cgi-bin
367		else:
368			# external file -> file://
369			return file.uri
370
371
372def main(notebook, port=8080, public=True, **opts):
373	httpd = make_server(notebook, port, public, **opts)
374	logger.info("Serving HTTP on %s port %i...", httpd.server_name, httpd.server_port)
375	httpd.serve_forever()
376
377
378def make_server(notebook, port=8080, public=True, auth_creds=None, **opts):
379	'''Create a simple http server
380	@param notebook: the notebook location
381	@param port: the http port to serve on
382	@param public: allow connections to the server from other
383	computers - if C{False} can only connect from localhost
384	@param auth_creds: credentials for HTTP-authentication
385	@param opts: options for L{WWWInterface.__init__()}
386	@returns: a C{WSGIServer} object
387	'''
388	import wsgiref.simple_server
389	app = WWWInterface(notebook, auth_creds=auth_creds, **opts) # FIXME make opts explicit
390	if public:
391		httpd = wsgiref.simple_server.make_server('', port, app)
392	else:
393		httpd = wsgiref.simple_server.make_server('localhost', port, app)
394	return httpd
395