1
2# Copyright 2009-2018 Jaap Karssenberg <jaap.karssenberg@gmail.com>
3
4
5from datetime import datetime
6
7import sqlite3
8import logging
9
10logger = logging.getLogger('zim.notebook.index')
11
12from zim.utils import natural_sort_key
13from zim.notebook.page import Path, HRef, \
14	HREF_REL_ABSOLUTE, HREF_REL_FLOATING, HREF_REL_RELATIVE
15from zim.tokenparser import TokenBuilder
16
17from zim.formats import ParseTreeBuilder
18
19from .base import *
20
21
22from zim.notebook.layout import \
23FILE_TYPE_PAGE_SOURCE, \
24FILE_TYPE_ATTACHMENT
25
26ROOT_PATH = Path(':')
27ROOT_ID = 1 # Constant for the ID of the root namespace in "pages"
28			# (Primary key starts count at 1 and first entry will be root)
29
30PAGE_EXISTS_UNCERTAIN = 0 # e.g. folder with unknown children - not shown to outside world
31PAGE_EXISTS_AS_LINK = 1 # placeholder for link target
32PAGE_EXISTS_HAS_CONTENT = 2 # either has content or children have content
33
34
35def emptyParseTree():
36	b = ParseTreeBuilder()
37	b.start('zim-tree')
38	b.end('zim-tree')
39	return b.get_parsetree()
40
41
42class PagesIndexer(IndexerBase):
43	'''Indexer for the "pages" table.
44
45	@signal: C{page-row-inserted (row)}: new row inserted
46	@signal: C{page-row-changed (row, oldrow)}: row changed
47	@signal: C{page-row-delete (row)}: row to be deleted
48	@signal: C{page-row-deleted (row)}: row that has been deleted
49
50	@signal: C{page-changed (row, content)}: page contents changed
51	'''
52
53	__signals__ = {
54		'page-row-inserted': (None, None, (object,)),
55		'page-row-changed': (None, None, (object, object)),
56		'page-row-delete': (None, None, (object,)),
57		'page-row-deleted': (None, None, (object,)),
58		'page-changed': (None, None, (object, object))
59	}
60
61	def __init__(self, db, layout, filesindexer):
62		IndexerBase.__init__(self, db)
63		self.layout = layout
64		self.connectto_all(filesindexer, (
65			'file-row-inserted', 'file-row-changed', 'file-row-deleted'
66		))
67
68		self.db.executescript('''
69			CREATE TABLE IF NOT EXISTS pages(
70				id INTEGER PRIMARY KEY,
71				parent INTEGER REFERENCES pages(id),
72				n_children INTEGER DEFAULT 0,
73
74				name TEXT UNIQUE NOT NULL,
75				lowerbasename TEXT NOT NULL,
76				sortkey TEXT NOT NULL,
77				mtime TIMESTAMP,
78
79				source_file INTEGER REFERENCES files(id),
80				is_link_placeholder BOOLEAN DEFAULT 0
81
82				CONSTRAINT no_self_ref CHECK (parent <> id)
83			);
84			CREATE UNIQUE INDEX IF NOT EXISTS pages_name ON pages(name);
85			CREATE INDEX IF NOT EXISTS pages_sortkey ON pages(sortkey);
86			CREATE INDEX IF NOT EXISTS pages_parent ON pages(parent);
87		''')
88		row = self.db.execute('SELECT * FROM pages WHERE id == 1').fetchone()
89		if row is None:
90			c = self.db.execute(
91				'INSERT INTO pages(parent, name, lowerbasename, sortkey, source_file) '
92				'VALUES (?, ?, ?, ?, ?)',
93				(0, '', '', '', 1)
94			)
95			assert c.lastrowid == 1 # ensure we start empty
96
97	def _select(self, pagename):
98		return self.db.execute(
99			'SELECT * FROM pages WHERE name=?', (pagename.name,)
100		).fetchone()
101
102	# We should not read file contents on db-file-inserted because
103	# there can be many in one iterarion when the FileIndexer indexes
104	# a folder. Therefore we only send page-changed in response to
105	# db-file-updated and trust we get this signal for each file
106	# that is inserted in a separate iteration.
107
108	def on_file_row_inserted(self, o, filerow):
109		pagename, file_type = self.layout.map_filepath(filerow['path'])
110		if file_type != FILE_TYPE_PAGE_SOURCE:
111			return # nothing to do
112
113		row = self._select(pagename)
114		if row is None:
115			self.insert_page(pagename, filerow['id'])
116		elif row['source_file'] is None:
117			self.db.execute(
118				'UPDATE pages SET source_file=?, mtime=?, is_link_placeholder=? WHERE name=?',
119				(filerow['id'], None, False, pagename.name)
120			)
121			self.update_parent(pagename.parent)
122			newrow = self._select(pagename)
123			self.emit('page-row-changed', newrow, row)
124		else:
125			# TODO: Flag conflict
126			raise NotImplementedError
127
128	def on_file_row_changed(self, o, filerow):
129		pagename, file_type = self.layout.map_filepath(filerow['path'])
130		if file_type != FILE_TYPE_PAGE_SOURCE:
131			return # nothing to do
132
133		row = self._select(pagename)
134		if row is None:
135			self.insert_page(pagename, filerow['id']) # May have been changed file type
136			row = self._select(pagename)
137
138		if row['source_file'] == filerow['id']:
139			file = self.layout.root.file(filerow['path'])
140			format = self.layout.get_format(file)
141			mtime = file.mtime()
142			tree = format.Parser().parse(file.read())
143			self.update_page(pagename, mtime, tree)
144		else:
145			pass # some conflict file changed
146
147	def on_file_row_deleted(self, o, filerow):
148		pagename, file_type = self.layout.map_filepath(filerow['path'])
149		if file_type != FILE_TYPE_PAGE_SOURCE:
150			return # nothing to do
151
152		row = self._select(pagename)
153		if row is None:
154			return # not a source file after all (e.g. .txt attachment)
155
156		if row['source_file'] == filerow['id']:
157			if row['n_children'] > 0:
158				self.db.execute(
159					'UPDATE pages SET source_file=?, mtime=? WHERE name=?',
160					(None, None, pagename.name)
161				)
162				self.update_parent(pagename, oldrow=row)
163					# checks if any children have sources - else will be removed
164				try:
165					row = self._select(pagename)
166					self.emit('page-changed', row, emptyParseTree())
167				except IndexNotFoundError:
168					pass
169			else:
170				self.remove_page(pagename)
171		else:
172			pass # some conflict removed - or not a source file in the first place (.txt attachment)
173
174	def insert_page(self, pagename, file_id):
175		return self._insert_page(pagename, False, file_id)
176
177	def insert_link_placeholder(self, pagename):
178		return self._insert_page(pagename, True)
179
180	def delete_link_placeholder(self, pagename):
181		row = self._select(pagename)
182		assert row is not None
183
184		if not row['is_link_placeholder']:
185			raise AssertionError('Not a placeholder')
186		else:
187			self.remove_page(pagename)
188
189	def _insert_page(self, pagename, is_link_placeholder, file_id=None):
190		assert not (is_link_placeholder and file_id)
191
192		# insert parents
193		parent_row = self._select(pagename.parent)
194		if parent_row is None:
195			self._insert_page(pagename.parent, is_link_placeholder) # recurs
196			parent_row = self._select(pagename.parent)
197			assert parent_row is not None
198
199		# insert new page
200		lowerbasename = pagename.basename.lower()
201		sortkey = natural_sort_key(pagename.basename)
202		try:
203			self.db.execute(
204				'INSERT INTO pages(name, lowerbasename, sortkey, parent, is_link_placeholder, source_file)'
205				'VALUES (?, ?, ?, ?, ?, ?)',
206				(pagename.name, lowerbasename, sortkey, parent_row['id'], is_link_placeholder, file_id)
207			)
208		except sqlite3.IntegrityError:
209			# This can occur in rare edge cases when resolve_page failed to
210			# see a page existed already - typically due to locale changes
211			# affecting sortkey
212			logger.exception('Error while inserting page - re-index needed?')
213			self.db.execute(
214				'UPDATE pages SET sortkey=? WHERE name=?',
215				(sortkey, pagename.name)
216			)
217			row = self._select(pagename)
218		else:
219			row = self._select(pagename)
220			self._update_parent_nchildren(pagename.parent)
221			self.emit('page-row-inserted', row)
222
223		# update parent(s)
224		self.update_parent(pagename.parent)
225
226		return row['id']
227
228	def update_parent(self, parentname, allow_cleanup=lambda r: True, oldrow=None):
229		row = self._select(parentname)
230		assert row is not None
231
232		# get new status
233		n_children, all_child_are_placeholder = self.db.execute(
234			'SELECT count(*), min(is_link_placeholder) FROM pages WHERE parent=?',
235				# "min()" works as "any(not is_link_placeholder)"
236				# because False is "0" in sqlite
237			(row['id'],)
238		).fetchone()
239		if all_child_are_placeholder is None:
240			all_child_are_placeholder = True
241
242		if n_children == 0 and row['source_file'] is None and allow_cleanup(row):
243			# cleanup if no longer needed
244			self.db.execute(
245				'UPDATE pages SET n_children=? WHERE id=?',
246				(n_children, row['id'])
247			)
248			self.remove_page(parentname, allow_cleanup) # indirect recurs
249		else:
250			# update table
251			is_placeholder = row['source_file'] is None and all_child_are_placeholder
252			self.db.execute(
253				'UPDATE pages SET n_children=?, is_link_placeholder=? WHERE id=?',
254				(n_children, is_placeholder, row['id'])
255			)
256			if bool(row['is_link_placeholder']) is not is_placeholder:
257				self.update_parent(parentname.parent) # recurs
258
259			# notify others
260			if not parentname.isroot:
261				newrow = self._select(parentname)
262				self.emit('page-row-changed', newrow, oldrow or row)
263
264	def update_page(self, pagename, mtime, content):
265		self.db.execute(
266			'UPDATE pages SET mtime=? WHERE name=?',
267			(mtime, pagename.name),
268		)
269
270		row = self._select(pagename)
271		self.emit('page-changed', row, content)
272		self.emit('page-row-changed', row, row)
273
274	def remove_page(self, pagename, allow_cleanup=lambda r: True):
275		# allow_cleanup is used by LinksIndexer when cleaning up placeholders
276
277		row = self._select(pagename)
278		assert row['id'] != 1, 'BUG: can\'t delete notebook root'
279		if row['n_children'] > 0:
280			raise AssertionError('Page has child pages')
281
282		self.emit('page-row-delete', row)
283		self.db.execute('DELETE FROM pages WHERE name=?', (pagename.name,))
284		self._update_parent_nchildren(pagename.parent)
285		self.emit('page-row-deleted', row)
286		self.update_parent(pagename.parent, allow_cleanup)
287
288	def _update_parent_nchildren(self, parentname):
289		# parent n_children needs to be up-to-date when we emit the "deleted"
290		# signal, else Gtk.TreeView sees an inconsistency
291		# We still call update_parent() after the fact to do the rest of the
292		# house keeping
293		row = self._select(parentname)
294		assert row is not None
295
296		n_children, = self.db.execute(
297			'SELECT count(*) FROM pages WHERE parent=?',
298			(row['id'],)
299		).fetchone()
300		self.db.execute(
301			'UPDATE pages SET n_children=? WHERE id=?',
302			(n_children, row['id'])
303		)
304
305
306class PageIndexRecord(Path):
307	'''Object representing a page L{Path} in the index, with data
308	for the corresponding row in the C{pages} table.
309	'''
310
311	__slots__ = ('_row')
312
313	def __init__(self, row):
314		'''Constructor
315		@param row: a C{sqlite3.Row} object for this page in the
316		"pages" table, specifies most other attributes for this object
317		The property C{hasdata} is C{True} when the row is set.
318		'''
319		Path.__init__(self, row['name'])
320		self._row = row
321
322	@property
323	def id(self): return self._row['id']
324
325	@property
326	def hascontent(self): return self._row['source_file'] is not None
327
328	@property
329	def haschildren(self): return self._row['n_children'] > 0
330
331	@property
332	def n_children(self): return self._row['n_children']
333
334	@property
335	def mtime(self): return self._row['mtime']
336
337	def exists(self):
338		return not self._row['is_link_placeholder']
339
340
341class PagesViewInternal(object):
342	'''This class defines private methods used by L{PagesView},
343	L{LinksView}, L{TagsView} and others.
344	'''
345
346	def __init__(self, db):
347		self.db = db
348
349	def get_pagename(self, page_id):
350		row = self.db.execute(
351			'SELECT * FROM pages WHERE id=?', (page_id,)
352		).fetchone()
353		if row is None:
354			raise IndexConsistencyError('No page for page_id "%r"' % page_id)
355		return PageIndexRecord(row)
356
357	def get_page_id(self, pagename):
358		row = self.db.execute(
359			'SELECT id FROM pages WHERE name=?', (pagename.name,)
360		).fetchone()
361		if row is None:
362			raise IndexNotFoundError('Page not found in index: %s' % pagename.name)
363		return row['id']
364
365	def resolve_link(self, source, href, ignore_link_placeholders=True, source_id=None):
366		parent, parent_id, names = self._resolve_link(source, href, ignore_link_placeholders, source_id)
367		return self.resolve_pagename(parent, names)
368
369	def _resolve_link(self, source, href, ignore_link_placeholders=True, source_id=None):
370		if href.rel == HREF_REL_ABSOLUTE or source.isroot:
371			return (ROOT_PATH, ROOT_ID, href.parts())
372
373		start, start_id, relnames = source, source_id, []
374		while start_id is None:
375			# Do not assume source exists, find start point that does
376			try:
377				start_id = self.get_page_id(start)
378			except IndexNotFoundError:
379				relnames.insert(0, start.basename)
380				start = start.parent
381			else:
382				break
383
384		if href.rel == HREF_REL_RELATIVE:
385			return (start, start_id, relnames + href.parts())
386		else:
387			# HREF_REL_FLOATING
388			# Search upward namespaces for existing pages,
389			# By default ignore link placeholders to avoid circular
390			# dependencies between links and placeholders
391			assert href.rel == HREF_REL_FLOATING
392
393			# link to anchor on current page
394			if not href.parts():
395				return (start, start_id, relnames + href.parts())
396
397			anchor_key = natural_sort_key(href.parts()[0])
398
399			if relnames:
400				# Check if we are anchored in non-existing part
401				keys = list(map(natural_sort_key, relnames))
402				if anchor_key in keys:
403					i = [c for c, k in enumerate(keys) if k == anchor_key][-1]
404					return (start, start_id, relnames[:i] + href.parts())
405
406			if ignore_link_placeholders:
407				c = self.db.execute(
408					'SELECT name, id FROM pages '
409					'WHERE sortkey=? and is_link_placeholder=0 '
410					'ORDER BY name DESC',
411					(anchor_key,)
412				) # sort longest first
413			else:
414				c = self.db.execute(
415					'SELECT name, id FROM pages '
416					'WHERE sortkey=? '
417					'ORDER BY name DESC',
418					(anchor_key,)
419				) # sort longest first
420
421			maxdepth = source.name.count(':')
422			depth = -1 # level where items were found
423			found = [] # candidates that match the link - these can only differ in case of the basename
424			for name, pid in c:
425				mydepth = name.count(':')
426				if mydepth > maxdepth:
427					continue
428				elif mydepth < depth:
429					break
430
431				if mydepth > 0: # check whether we have a common parent
432					parentname = name.rsplit(':', 1)[0]
433					if start.name.startswith(parentname):
434						depth = mydepth
435						found.append((name, pid))
436				else: # resolve from root namespace
437					found.append((name, pid))
438
439			if found: # try to match case first, else just use first match
440				parts = href.parts()
441				anchor = parts.pop(0)
442				for name, pid in found:
443					if name.endswith(anchor):
444						return (Path(name), pid, parts)
445				else:
446					name, pid = found[0]
447					return (Path(name), pid, parts)
448
449			else:
450				# Return "brother" of source
451				if relnames:
452					return (start, start_id, relnames[:-1] + href.parts())
453				else:
454					return (start.parent, None, href.parts())
455
456	def resolve_pagename(self, parent, names, parent_id=None):
457		'''Resolve a pagename in the right case'''
458		# We do not ignore placeholders here. This can lead to a dependencies
459		# in how links are resolved based on order of indexing. However, this
460		# is not really a problem. Ignoring them means you could see duplicates
461		# if the tree for multiple links with slightly different spelling.
462		# Also we would need another call to return the page_id if a resolved
463		# page happens to exist.
464		assert isinstance(parent, Path)
465		pagename = parent
466		page_id = parent_id or self.get_page_id(parent)
467		for i, basename in enumerate(names):
468			sortkey = natural_sort_key(basename)
469			candidates = self.db.execute(
470				'SELECT id, name FROM pages '
471				'WHERE parent=? and sortkey=? ORDER BY name',
472				(page_id, sortkey)
473			).fetchall()
474
475			exact = pagename.child(basename).name
476			for row in candidates:
477				if row['name'] == exact:
478					pagename = Path(row['name'])
479					page_id = row['id']
480					break
481			else:
482				if candidates: # case insensitive match(es)
483					row = candidates[0]
484					pagename = Path(row['name'])
485					page_id = row['id']
486				else: # no match
487					return None, pagename.child(':'.join(names[i:]))
488		else:
489			return page_id, pagename
490
491	def walk(self, parent_id):
492		# Need to do this recursive to preserve sorting
493		#              else we could just do "name LIKE parent%"
494		for row in self.db.execute(
495			'SELECT * FROM pages WHERE parent=? '
496			'ORDER BY sortkey, name',
497			(parent_id,)
498		):
499			yield PageIndexRecord(row)
500			if row['n_children'] > 0:
501				for child in self.walk(row['id']): # recurs
502					yield child
503
504	def walk_bottomup(self, parent_id):
505		for row in self.db.execute(
506			'SELECT * FROM pages WHERE parent=? '
507			'ORDER BY sortkey, name',
508			(parent_id,)
509		):
510			if row['n_children'] > 0:
511				for child in self.walk_bottomup(row['id']): # recurs
512					yield child
513			yield PageIndexRecord(row)
514
515
516class PagesView(IndexView):
517	'''Index view that exposes the "pages" table in the index'''
518
519	def __init__(self, db):
520		IndexView.__init__(self, db)
521		self._pages = PagesViewInternal(db)
522
523	def lookup_by_pagename(self, pagename):
524		r = self.db.execute(
525			'SELECT * FROM pages WHERE name=?', (pagename.name,)
526		).fetchone()
527		if r is None:
528			raise IndexNotFoundError
529		else:
530			return PageIndexRecord(r)
531
532	def list_pages(self, path=None):
533		'''Generator for child pages of C{path}
534		@param path: a L{Path} object
535		@returns: yields L{Path} objects for children of C{path}
536		@raises IndexNotFoundError: if C{path} is not found in the index
537		'''
538		if path is None:
539			page_id = ROOT_ID
540		else:
541			page_id = self._pages.get_page_id(path) # can raise
542		return self._list_pages(page_id)
543
544	def _list_pages(self, page_id):
545		for row in self.db.execute(
546			'SELECT * FROM pages WHERE parent=? ORDER BY sortkey, name',
547			(page_id,)
548		):
549			yield PageIndexRecord(row)
550
551	def n_list_pages(self, path=None):
552		page_id = self._pages.get_page_id(path or ROOT_PATH)
553		c, = self.db.execute(
554			'SELECT COUNT(*) FROM pages WHERE parent=?', (page_id,)
555		).fetchone()
556		return c
557
558	def match_pages(self, path, text, limit=10):
559		'''Generator for child pages of C{path} that match C{text} in their name
560		@param path: a L{Path} object
561		@param text: a string
562		@param limit: max number of results
563		@returns: yields L{Path} objects for children of C{path}
564		@raises IndexNotFoundError: if C{path} is not found in the index
565		'''
566		if path is None:
567			page_id = ROOT_ID
568		else:
569			page_id = self._pages.get_page_id(path) # can raise
570		return self._match_pages(page_id, text, limit)
571
572	def _match_pages(self, page_id, text, limit):
573		# The LIKE keyword does not handle unicode case-insensitivity
574		# therefore we need python lower() to do the job
575		for row in self.db.execute(
576			'SELECT * FROM pages WHERE parent=? and lowerbasename LIKE ? ORDER BY sortkey, name LIMIT ?',
577			(page_id, "%%%s%%" % text.lower(), limit)
578		):
579			yield PageIndexRecord(row)
580
581	def match_all_pages(self, text, limit=10):
582		'''Like C{match_pages()} except not limited a specific namespace'''
583		for row in self.db.execute(
584			'SELECT * FROM pages WHERE lowerbasename LIKE ? ORDER BY length(name), sortkey, name LIMIT ?',
585			("%%%s%%" % text.lower(), limit)
586		):
587			yield PageIndexRecord(row)
588
589	def walk(self, path=None):
590		'''Generator function to yield all pages in the index, depth
591		first
592
593		@param path: a L{Path} object for the starting point, can be
594		used to only iterate a sub-tree. When this is C{None} the
595		whole notebook is iterated over
596		@returns: an iterator that yields L{Path} objects
597		@raises IndexNotFoundError: if C{path} does not exist in the index
598		'''
599		# Need to do this recursive to preserve sorting
600		#              else we could just do "name LIKE parent%"
601		page_id = self._pages.get_page_id(path) if path else ROOT_ID # can raise
602		return self._pages.walk(page_id)
603
604	def walk_bottomup(self, path=None):
605		page_id = self._pages.get_page_id(path) if path else ROOT_ID # can raise
606		return self._pages.walk_bottomup(page_id)
607
608	def n_all_pages(self):
609		'''Returns to total number of pages in the index'''
610		c, = self.db.execute('SELECT COUNT(*) FROM pages').fetchone()
611		return c - 1 # don't count ROOT
612
613	def get_has_previous_has_next(self, path):
614		if path.isroot:
615			raise ValueError('Can\'t use root')
616
617		r = self.db.execute(
618			'SELECT * FROM pages WHERE parent=? '
619			'ORDER BY sortkey ASC, name ASC LIMIT 1',
620			(ROOT_ID,)
621		).fetchone()
622		is_first = (r['name'] == path.name) if r else True
623
624		r = self.db.execute(
625			'SELECT * FROM pages WHERE parent=? '
626			'ORDER BY sortkey DESC, name DESC LIMIT 1',
627			(ROOT_ID,)
628		).fetchone()
629		is_last = (r['name'] == path.name) if r else True
630
631		return not is_first, not is_last
632
633	def get_previous(self, path):
634		'''Get the previous path in the index, in the same order that
635		L{walk()} will yield them
636		@param path: a L{Path} object
637		@returns: a L{Path} object or C{None} if {path} is the first page in
638		the index
639		'''
640		# Find last (grand)child of previous item with same parent
641		# If no previous item, yield parent
642		if path.isroot:
643			raise ValueError('Can\'t use root')
644
645		r = self.db.execute(
646			'SELECT parent FROM pages WHERE name=?', (path.name,)
647		).fetchone()
648		if r is None:
649			raise IndexNotFoundError('No such page: %s' % path)
650		else:
651			parent_id = r[0]
652
653		sortkey = natural_sort_key(path.basename)
654		r = self.db.execute('''
655			SELECT * FROM pages WHERE parent=? and (
656				sortkey<? or (sortkey=? and name<?)
657			) ORDER BY sortkey DESC, name DESC LIMIT 1''',
658			(parent_id, sortkey, sortkey, path.name)
659		).fetchone()
660		if not r:
661			parent = self._pages.get_pagename(parent_id)
662			return None if parent.isroot else parent
663		else:
664			while r['n_children'] > 0:
665				r = self.db.execute(
666					'SELECT * FROM pages WHERE parent=? '
667					'ORDER BY sortkey DESC, name DESC LIMIT 1',
668					(r['id'],)
669				).fetchone()
670				if r is None:
671					raise IndexConsistencyError('Missing children')
672			else:
673				return PageIndexRecord(r)
674
675	def get_next(self, path):
676		'''Get the next path in the index, in the same order that
677		L{walk()} will yield them
678		@param path: a L{Path} object
679		@returns: a L{Path} object or C{None} if C{path} is the last page in
680		the index
681		'''
682		# If item has children, yield first child
683		# Else find next item with same parent
684		# If no next item, find next item for parent
685		if path.isroot:
686			raise ValueError('Can\'t use root')
687
688		r = self.db.execute(
689			'SELECT * FROM pages WHERE name=?', (path.name,)
690		).fetchone()
691		if r is None:
692			raise IndexNotFoundError('No such page: %s' % path)
693
694		if r['n_children'] > 0:
695			r = self.db.execute(
696				'SELECT name FROM pages WHERE parent=? '
697				'ORDER BY sortkey, name LIMIT 1',
698				(r['id'],)
699			).fetchone()
700			if r is None:
701				raise IndexConsistencyError('Missing children')
702			else:
703				return PageIndexRecord(r)
704		else:
705			while True:
706				n = self.db.execute('''
707					SELECT * FROM pages WHERE parent=? and (
708						sortkey>? or (sortkey=? and name>?)
709					) ORDER BY sortkey, name LIMIT 1''',
710					(r['parent'], r['sortkey'], r['sortkey'], r['name'])
711				).fetchone()
712				if n is not None:
713					return PageIndexRecord(n)
714				elif r['parent'] == ROOT_ID:
715					return None
716				else:
717					r = self.db.execute(
718						'SELECT * FROM pages WHERE id=?', (r['parent'],)
719					).fetchone()
720					if r is None:
721						raise IndexConsistencyError('Missing parent')
722
723	def lookup_from_user_input(self, name, reference=None):
724		'''Lookup a pagename based on user input
725		@param name: the user input as string
726		@param reference: a L{Path} in case relative links are supported as
727		customer input
728		@returns: a L{Path} object for C{name}
729		@raises ValueError: when C{name} would reduce to empty string
730		after removing all invalid characters, or if C{name} is a
731		relative link while no C{reference} page is given.
732		@raises IndexNotFoundError: when C{reference} is not indexed
733		'''
734		# This method re-uses most of resolve_link() but is defined
735		# separate because it has a distinct different purpose.
736		# Only accidental that we treat user input as links ... ;)
737		href = HRef.new_from_wiki_link(name)
738		if reference is None and href.rel == HREF_REL_RELATIVE:
739			raise ValueError('Got relative page name without parent: %s' % name)
740		else:
741			source = reference or ROOT_PATH
742			id, pagename = self._pages.resolve_link(
743								source, href, ignore_link_placeholders=False)
744			return pagename
745
746	def resolve_link(self, source, href):
747		'''Find the end point of a link
748		Depending on the link type (absolute, relative, or floating),
749		this method first determines the starting point of the link
750		path. Then it goes downward doing a case insensitive match
751		against the index.
752		@param source: a L{Path} for the starting point of the link
753		@param href: a L{HRef} object for the link
754		@returns: a L{Path} object for the target of the link.
755		'''
756		assert isinstance(source, Path)
757		assert isinstance(href, HRef)
758		id, pagename = self._pages.resolve_link(source, href)
759		return pagename
760
761	def create_link(self, source, target):
762		'''Determine best way to represent a link between two pages
763		@param source: a L{Path} object
764		@param target: a L{Path} object
765		@returns: a L{HRef} object
766		'''
767		if target == source: # weird edge case ..
768			return HRef(HREF_REL_FLOATING, target.basename)
769		elif target.ischild(source):
770			return HRef(HREF_REL_RELATIVE, target.relname(source))
771		else:
772			href = self._find_floating_link(source, target)
773			return href or HRef(HREF_REL_ABSOLUTE, target.name)
774
775	def _find_floating_link(self, source, target):
776		# Relative links only resolve for pages that have a common parent
777		# with the source page. So we start finding the common parents and
778		# if that does not resolve (e.g. because same name also occurs on a
779		# lower level) try one level up to "anchor" the link.
780		# It is absolute must to use resolve_link() here - this ensures the
781		# outcome is always consistent between these functions.
782		parentnames = []
783		for n1, n2 in zip(source.parts, target.parts):
784			if n1 == n2:
785				parentnames.append(n1)
786			else:
787				break
788
789		def try_link(names):
790			assert names
791			href = HRef(HREF_REL_FLOATING, ':'.join(names))
792			pid, pagename = self._pages.resolve_link(source, href)
793			return href if pagename == target else None
794
795		relnames = target.parts[len(parentnames):]
796		if not relnames: # Target is direct parent
797			relnames.insert(0, parentnames.pop())
798		href = try_link(relnames)
799		if href is not None:
800			return href
801		else:
802			while parentnames:
803				relnames.insert(0, parentnames.pop())
804				href = try_link(relnames)
805				if href:
806					return href
807			else:
808				return None # no floating link possible
809
810	def list_recent_changes(self, limit=None, offset=None):
811		assert not (offset and not limit), "Can't use offset without limit"
812		if limit:
813			selection = ' LIMIT %i OFFSET %i' % (limit, offset or 0)
814		else:
815			selection = ''
816
817		for row in self.db.execute(
818			'SELECT * FROM pages WHERE id<>1 ORDER BY mtime DESC' + selection,
819		):
820			yield PageIndexRecord(row)
821
822
823try:
824	from gi.repository import Gtk
825except ImportError:
826	Gtk = None
827
828
829IS_PAGE = 1 #: Hint for MyTreeIter
830
831class PagesTreeModelMixin(TreeModelMixinBase):
832
833	# Optimize lookup for finding records in the same level
834	# - always cache parent, to retrieve other children more quickly
835	# - cache a range of 20 records at once
836
837	# Signals use "find_all" instead of "find" to allow for subclasses that
838	# have multiple entries, like models for tags
839
840	def __init__(self, index, root=None, reverse=False):
841		TreeModelMixinBase.__init__(self, index)
842		self._REVERSE = reverse
843		if root is None:
844			self._MY_ROOT_NAME = ''
845			self._MY_ROOT_NAME_C = ''
846			self._MY_ROOT_ID = ROOT_ID
847		else:
848			self._MY_ROOT_NAME = root.name
849			self._MY_ROOT_NAME_C = root.name + ':'
850			self._set_root_id()
851
852		self._deleted_paths = None
853
854	def _set_root_id(self):
855		myrow = self.db.execute(
856			'SELECT * FROM pages WHERE name=?', (self._MY_ROOT_NAME,)
857		).fetchone()
858		self._MY_ROOT_ID = myrow['id'] if myrow else None
859
860	def connect_to_updateiter(self, index, update_iter):
861		self.connectto_all(update_iter.pages,
862			('page-row-inserted', 'page-row-changed', 'page-row-delete', 'page-row-deleted')
863		)
864
865	def on_page_row_inserted(self, o, row):
866		self.flush_cache()
867		if row['name'] == self._MY_ROOT_NAME:
868			self._set_root_id()
869		else:
870			for treepath in self._find_all_pages(row['name']):
871				treeiter = self.get_iter(treepath) # not mytreeiter !
872				self.emit('row-inserted', treepath, treeiter)
873				if treepath[-1] == 0 and len(treepath) > 1:
874					self._check_parent_has_child_toggled(treepath, 1)
875
876	def _check_parent_has_child_toggled(self, treepath, count):
877		parent = self.get_mytreeiter(treepath[:-1])
878		if parent.n_children == count:
879			treeiter = self.get_iter(parent.treepath) # not mytreeiter !
880			self.emit('row-has-child-toggled', parent.treepath, treeiter)
881
882	def on_page_row_changed(self, o, row, oldrow):
883		# no clear cache here - just update row
884		for treepath in self._find_all_pages(row['name']):
885			treeiter = self.get_iter(treepath) # not mytreeiter !
886			self.cache[tuple(treepath)].row = row # ensure uptodate info
887			self.emit('row-changed', treepath, treeiter)
888
889	def on_page_row_delete(self, o, row):
890		self._deleted_paths = list(self._find_all_pages(row['name']))
891
892	def on_page_row_deleted(self, o, row):
893		# Technically "_deleted_paths" should always be a single path
894		# here, else two things changed at once, and Gtk.TreeView cannot
895		# always deal with that.
896
897		self.flush_cache()
898		if row['name'] == self._MY_ROOT_NAME:
899			self._MY_ROOT_ID = None
900		else:
901			for treepath in self._deleted_paths:
902				self.emit('row-deleted', treepath)
903				if treepath[-1] == 0 and len(treepath) > 1:
904					self._check_parent_has_child_toggled(treepath, 0)
905
906		self._deleted_paths = None
907
908	def n_children_top(self):
909		if self._MY_ROOT_ID is None:
910			return 0
911		else:
912			return self.db.execute(
913				'SELECT COUNT(*) FROM pages WHERE parent=?', (self._MY_ROOT_ID,)
914			).fetchone()[0]
915
916	def get_mytreeiter(self, treepath):
917		if self._MY_ROOT_ID is None:
918			return None
919
920		treepath = tuple(treepath) # used to cache
921		if treepath in self.cache:
922			return self.cache[treepath]
923
924		# Find parent
925		parentpath = treepath[:-1]
926		if not parentpath:
927			parent_id = self._MY_ROOT_ID
928		else:
929			parent_iter = self.cache.get(parentpath, None) \
930							or self.get_mytreeiter(parentpath) # recurs
931			if parent_iter:
932				parent_id = parent_iter.row['id']
933			else:
934				return None
935
936		# Now cache a slice at the target level
937		offset = treepath[-1]
938		if self._REVERSE:
939			rows = self.db.execute('''
940				SELECT * FROM pages WHERE parent=?
941				ORDER BY sortkey DESC, name DESC LIMIT 20 OFFSET ?
942				''',
943				(parent_id, offset)
944			)
945		else:
946			rows = self.db.execute('''
947				SELECT * FROM pages WHERE parent=?
948				ORDER BY sortkey ASC, name ASC LIMIT 20 OFFSET ?
949				''',
950				(parent_id, offset)
951			)
952		for i, row in enumerate(rows):
953			mytreepath = tuple(parentpath) + (offset + i,)
954			if mytreepath not in self.cache:
955				self.cache[mytreepath] = MyTreeIter(
956					Gtk.TreePath(mytreepath),
957					row,
958					row['n_children'],
959					IS_PAGE
960				)
961			else:
962				break # avoid overwriting cache because of ref count
963
964		return self.cache.get(treepath, None)
965
966	def find(self, path):
967		'''Returns the C{Gtk.TreePath} for a notebook page L{Path}
968		If the L{Path} appears multiple times returns the first occurrence
969		@raises IndexNotFoundError: if path not found
970		'''
971		if path.isroot:
972			raise ValueError
973		treepaths = sorted(self._find_all_pages(path.name))
974		try:
975			return treepaths[0]
976		except IndexError:
977			raise IndexNotFoundError(path)
978
979	def find_all(self, path):
980		'''Returns a list of C{Gtk.TreePath} for a notebook page L{Path}
981		Returns all occurrences in the treeview
982		@raises IndexNotFoundError: if path not found
983		'''
984		if path.isroot:
985			raise ValueError
986		treepaths = self._find_all_pages(path.name)
987		if not treepaths:
988			raise IndexNotFoundError(path)
989		else:
990			return treepaths
991
992	def _find_all_pages(self, name, update_cache=True):
993		if self._MY_ROOT_ID is None or \
994			not name.startswith(self._MY_ROOT_NAME_C):
995				return []
996
997		parent_id = self._MY_ROOT_ID
998		names = name[len(self._MY_ROOT_NAME_C):].split(':')
999		treepath = []
1000		for i, basename in enumerate(names):
1001			# Get treepath
1002			name = self._MY_ROOT_NAME_C + ':'.join(names[:i + 1])
1003			myrow = self.db.execute(
1004				'SELECT * FROM pages WHERE name=?', (name,)
1005			).fetchone()
1006			if myrow is None:
1007				raise IndexNotFoundError
1008
1009			sortkey = myrow['sortkey']
1010			if self._REVERSE:
1011				row = self.db.execute('''
1012					SELECT COUNT(*) FROM pages
1013					WHERE parent=? and (
1014						sortkey>? or (sortkey=? and name>?)
1015					)''',
1016					(parent_id, sortkey, sortkey, name)
1017				).fetchone()
1018			else:
1019				row = self.db.execute('''
1020					SELECT COUNT(*) FROM pages
1021					WHERE parent=? and (
1022						sortkey<? or (sortkey=? and name<?)
1023					)''',
1024					(parent_id, sortkey, sortkey, name)
1025				).fetchone()
1026			treepath.append(row[0])
1027			parent_id = myrow['id']
1028
1029			if update_cache:
1030				# Update cache (avoid overwriting because of ref count)
1031				mytreepath = tuple(treepath)
1032				if mytreepath not in self.cache:
1033					myiter = MyTreeIter(
1034						Gtk.TreePath(mytreepath),
1035						myrow,
1036						myrow['n_children'],
1037						IS_PAGE
1038					)
1039					self.cache[mytreepath] = myiter
1040
1041		return [Gtk.TreePath(treepath)]
1042
1043
1044########################################################################
1045
1046class TestPagesDBTable(object):
1047	# Mixin for test cases, defined here to have all SQL in one place
1048
1049	def assertPagesDBConsistent(self, db):
1050		for row in db.execute('SELECT * FROM pages'):
1051			count, = db.execute(
1052				'SELECT count(*) FROM pages WHERE parent=?',
1053				(row['id'],)
1054			).fetchone()
1055			self.assertEqual(row['n_children'], count,
1056				'Count for "%s" is %i while n_children=%i' % (row['name'], row['n_children'], count)
1057			)
1058
1059			if row['source_file'] is not None:
1060				self.assertFalse(row['is_link_placeholder'],
1061					'Placeholder status for %s is wrong (has source itself)' % row['name']
1062				)
1063			elif not row['is_link_placeholder']:
1064				# Check downwards - at least one child that is not a placeholder either
1065				child = db.execute(
1066					'SELECT * FROM pages WHERE parent=? and is_link_placeholder=?',
1067					(row['id'], False),
1068				).fetchone()
1069				self.assertIsNotNone(child,
1070					'Missing child with source for %s' % row['name'])
1071
1072			if row['id'] > 1:
1073				parent = db.execute(
1074					'SELECT * FROM pages WHERE id=?',
1075					(row['id'],)
1076				).fetchone()
1077				self.assertIsNotNone(parent,
1078					'Missing parent for %s' % row['name'])
1079
1080				if not row['is_link_placeholder']:
1081					# Check upwards - parent(s) must not be placeholder either
1082					self.assertFalse(parent['is_link_placeholder'],
1083						'Placeholder status for parent of %s is inconsistent' % row['name']
1084					)
1085
1086	def assertPagesDBEquals(self, db, pages):
1087		rows = db.execute('SELECT * FROM pages WHERE id>1').fetchall()
1088		in_db = set(r['name'] for r in rows)
1089		self.assertEqual(in_db, set(pages))
1090
1091	def assertPagesDBContains(self, db, pages):
1092		rows = db.execute('SELECT * FROM pages WHERE id>1').fetchall()
1093		in_db = set(r['name'] for r in rows)
1094		self.assertTrue(set(pages).issubset(in_db))
1095