1
2# Copyright 2008-2015 Jaap Karssenberg <jaap.karssenberg@gmail.com>
3
4
5import re
6import logging
7import itertools
8
9logger = logging.getLogger('zim.notebook')
10
11
12from zim.parsing import link_type
13from zim.errors import Error
14
15import zim.formats
16import zim.fs
17import zim.newfs
18
19from zim.signals import SignalEmitter, SIGNAL_NORMAL
20
21import zim.datetimetz as datetime
22
23
24_pagename_reduce_colon_re = re.compile('::+')
25_pagename_invalid_char_re = re.compile(
26	'(' +
27		'^[_\W]+|(?<=:)[_\W]+' +
28	'|' +
29		'[' + re.escape(''.join(
30			("?", "#", "/", "\\", "*", '"', "<", ">", "|", "%", "\t", "\n", "\r")
31		)) + ']' +
32	')',
33re.UNICODE)
34	# This pattern matches a non-alphanumber at start or after the ':'
35	# separator. It also matches any invalid character.
36	# The UNICODE flag is used to make the alphanumber check international.
37
38
39def shortest_unique_names(paths):
40	'''Returns the shortest unique name for each path in paths
41	@param paths: list of L{Path} objects
42	@returns: list of strings
43	'''
44	by_basename = {}
45	for path in paths:
46		basename = path.basename
47		mylist = by_basename.setdefault(basename, [])
48		mylist.append(path)
49
50	result = []
51	for path in paths:
52		basename = path.basename
53		conflicts = by_basename[basename]
54		if len(conflicts) == 1:
55			result.append(path.basename)
56		else:
57			conflicts.remove(path)
58			conflicts.insert(0, path) # shuffle path of interest to front
59			reverse_paths = [reversed(p.name.split(':')) for p in conflicts]
60			names = []
61			for parts in itertools.zip_longest(*reverse_paths):
62				if parts[0] is None:
63					break
64				elif parts[0] not in parts[1:]:
65					names.append(parts[0])
66					break
67				else:
68					names.append(parts[0])
69
70			result.append(':'.join(reversed(names)))
71
72	return result
73
74
75class Path(object):
76	'''Class representing a page name in the notebook
77
78	This is the parent class for the Page class. It contains the name
79	of the page and is used instead of the actual page object by methods
80	that only need to know the name of the page. Path objects have no
81	internal state and are essentially normalized page names. It also
82	has a number of methods to compare page names and determining what
83	the parent pages are etc.
84
85	@ivar name: the full name of the path
86	@ivar parts: all the parts of the name (split on ":")
87	@ivar basename: the basename of the path (last part of the name)
88	@ivar namespace: the name for the parent page or empty string
89	@ivar isroot: C{True} when this Path represents the top level namespace
90	@ivar parent: the L{Path} object for the parent page
91
92
93	Valid characters in page names
94	==============================
95
96	A number of characters are not valid in page names as used in Zim
97	notebooks.
98
99	Reserved characters are:
100	  - The ':' is reserved as separator
101	  - The '?' is reserved to encode url style options
102	  - The '#' is reserved as anchor separator
103	  - The '/' and '\' are reserved to distinguish file links & urls
104	  - First character of each part MUST be alphanumeric
105		(including utf8 letters / numbers)
106
107	For file system filenames we can not use:
108	'\', '/', ':', '*', '?', '"', '<', '>', '|'
109	(checked both win32 & posix)
110
111	Do not allow '\n' and '\t' for obvious reasons
112
113	Allowing '%' will cause problems with sql wildcards sooner
114	or later - also for url decoding ambiguity it is better to
115	keep this one reserved.
116
117	All other characters are allowed in page names
118
119	Note that Zim version < 0.42 used different rules that are not
120	fully compatible, this is important when upgrading old notebooks.
121	See L{Notebook.cleanup_pathname_zim028()}
122	'''
123
124	__slots__ = ('name',)
125
126	@staticmethod
127	def assertValidPageName(name):
128		'''Raises an C{AssertionError} if C{name} does not represent
129		a valid page name.
130		This is a strict check, most names that fail this test can still
131		be cleaned up by the L{makeValidPageName()}.
132		@param name: a string
133		@raises AssertionError: if the name is not valid
134		'''
135		assert isinstance(name, str)
136		if not name.strip(':') \
137		or _pagename_reduce_colon_re.search(name) \
138		or _pagename_invalid_char_re.search(name):
139			raise AssertionError('Not a valid page name: %s' % name)
140
141	@staticmethod
142	def makeValidPageName(name):
143		'''Remove any invalid characters from the string and return
144		a valid page name. Only string that can not be turned in
145		somthing valid is a string that reduces to an empty string
146		after removing all invalid characters.
147		@param name: a string
148		@returns: a string
149		@raises ValueError: when the result would be an empty string
150		'''
151		newname = _pagename_reduce_colon_re.sub(':', name.strip(':'))
152		newname = _pagename_invalid_char_re.sub('', newname)
153		newname = newname.replace('_', ' ')
154		try:
155			Path.assertValidPageName(newname)
156		except AssertionError:
157			raise ValueError('Not a valid page name: %s (was: %s)' % (newname, name))
158		return newname
159
160	def __init__(self, name):
161		'''Constructor.
162
163		@param name: the absolute page name in the right case as a
164		string or as a tuple strings
165
166		The name ":" is used as a special case to construct a path for
167		the toplevel namespace in a notebook.
168
169		@note: This constructor does not do any checks for the sanity of
170		the path name. Never construct a path directly from user input,
171		but use either L{index.lookup_from_user_input()} or first check the
172		name with L{makeValidPageName()}
173		'''
174		if isinstance(name, (list, tuple)):
175			self.name = ':'.join(name)
176		else:
177			self.name = name.strip(':')
178
179		try:
180			self.name = str(self.name)
181		except UnicodeDecodeError:
182			raise ValueError('BUG: invalid input, page names should be in ascii, or given as unicode')
183
184	@classmethod
185	def new_from_zim_config(klass, string):
186		'''Returns a new object based on the string representation for
187		that path.
188		'''
189		return klass(klass.makeValidPageName(string))
190
191	def serialize_zim_config(self):
192		'''Returns the name for serializing this path'''
193		return self.name
194
195	def __repr__(self):
196		return '<%s: %s>' % (self.__class__.__name__, self.name)
197
198	def __str__(self):
199		return self.name
200
201	def __hash__(self):
202		return self.name.__hash__()
203
204	def __eq__(self, other):
205		'''Paths are equal when their names are the same'''
206		if isinstance(other, Path):
207			return self.name == other.name
208		else: # e.g. path == None
209			return False
210
211	def __ne__(self, other):
212		'''Paths are not equal when their names are not the same'''
213		return not self.__eq__(other)
214
215	def __add__(self, name):
216		'''C{path + name} is an alias for C{path.child(name)}'''
217		return self.child(name)
218
219	@property
220	def parts(self):
221		'''Get all the parts of the name (split on ":")'''
222		return self.name.split(':')
223
224	@property
225	def basename(self):
226		'''Get the basename of the path (last part of the name)'''
227		i = self.name.rfind(':') + 1
228		return self.name[i:]
229
230	@property
231	def namespace(self):
232		'''Gives the name for the parent page.
233		Returns an empty string for the top level namespace.
234		'''
235		i = self.name.rfind(':')
236		if i > 0:
237			return self.name[:i]
238		else:
239			return ''
240
241	@property
242	def isroot(self):
243		'''C{True} when this Path represents the top level namespace'''
244		return self.name == ''
245
246	def relname(self, path): # TODO make this use HRef !
247		'''Get a part of this path relative to a parent path
248
249		@param path: a parent L{Path}
250
251		Raises an error if C{path} is not a parent
252
253		@returns: the part of the path that is relative to C{path}
254		'''
255		if path.name == '': # root path
256			return self.name
257		elif self.name.startswith(path.name + ':'):
258			i = len(path.name) + 1
259			return self.name[i:].strip(':')
260		else:
261			raise ValueError('"%s" is not below "%s"' % (self, path))
262
263	@property
264	def parent(self):
265		'''Get the path for the parent page'''
266		namespace = self.namespace
267		if namespace:
268			return Path(namespace)
269		elif self.isroot:
270			return None
271		else:
272			return Path(':')
273
274	def parents(self):
275		'''Generator function for parent Paths including root'''
276		if ':' in self.name:
277			path = self.name.split(':')
278			path.pop()
279			while len(path) > 0:
280				namespace = ':'.join(path)
281				yield Path(namespace)
282				path.pop()
283		yield Path(':')
284
285	def child(self, basename):
286		'''Get a child Path
287
288		@param basename: the relative name for the child
289		@returns: a new L{Path} object
290		'''
291		return Path(self.name + ':' + basename)
292
293	def ischild(self, parent):
294		'''Check whether this path is a child of a given path
295		@param parent: a L{Path} object
296		@returns: True when this path is a (grand-)child of C{parent}
297		'''
298		return parent.isroot or self.name.startswith(parent.name + ':')
299
300	def match_namespace(self, namespace):
301		'''Check whether this path is in a specific section of the notebook
302		@param namespace: a L{Path} object
303		@returns: True when this path is equal to C{namespace} or is a (grand-)child of C{namespace}
304		'''
305		return namespace.isroot or self.name == namespace.name or self.name.startswith(namespace.name + ':')
306
307	def commonparent(self, other):
308		'''Find a common parent for two Paths
309
310		@param other: another L{Path} object
311
312		@returns: a L{Path} object for the first common parent
313		'''
314		parent = []
315		parts = self.parts
316		other = other.parts
317		if parts[0] != other[0]:
318			return Path(':') # root
319		else:
320			for i in range(min(len(parts), len(other))):
321				if parts[i] == other[i]:
322					parent.append(parts[i])
323				else:
324					return Path(':'.join(parent))
325			else:
326				return Path(':'.join(parent))
327
328
329HREF_REL_ABSOLUTE = 0
330HREF_REL_FLOATING = 1
331HREF_REL_RELATIVE = 2
332
333class HRef(object):
334
335	__slots__ = ('rel', 'names', 'anchor')
336
337	@classmethod
338	def new_from_wiki_link(klass, href):
339		'''Constructor that constructs a L{HRef} object for a link as
340		written in zim's wiki syntax.
341		@param href: a string for the link
342		@returns: a L{HRef} object
343		@raises ValueError: when the string could not be parsed
344		(see L{Path.makeValidPageName()})
345
346		@note: This method HRef class assumes the logic of our wiki links
347		for other formats, a separate constructor may be needed
348		'''
349		if href.startswith(':'):
350			rel = HREF_REL_ABSOLUTE
351		elif href.startswith('+'):
352			rel = HREF_REL_RELATIVE
353		else:
354			rel = HREF_REL_FLOATING
355
356		anchor = None
357		if '#' in href:
358			href, anchor = href.split('#', 1)
359
360		names = Path.makeValidPageName(href.lstrip('+')) if href else ""
361
362		return klass(rel, names, anchor)
363
364	def __init__(self, rel, names, anchor=None):
365		self.rel = rel
366		self.names = names
367		self.anchor = anchor
368
369	def __str__(self):
370		rel = {HREF_REL_ABSOLUTE: 'abs', HREF_REL_FLOATING: 'float', HREF_REL_RELATIVE: 'rel'}[self.rel]
371		return '<%s: %s %s %s>' % (self.__class__.__name__, rel, self.names, self.anchor)
372
373	def parts(self):
374		return self.names.split(':') if self.names else []
375
376	def to_wiki_link(self):
377		'''Returns href as text for wiki link'''
378		if self.rel == HREF_REL_ABSOLUTE:
379			link = ":" + self.names.strip(':')
380		elif self.rel == HREF_REL_RELATIVE:
381			link = "+" + self.names
382		else:
383			link = self.names
384		if self.anchor:
385			link += "#" + self.anchor
386		return link
387
388
389class SourceFile(zim.fs.File):
390
391	def iswritable(self):
392		return False
393
394	def write(self, *a):
395		raise AssertionError('Not writeable')
396
397	def writelines(self, *a):
398		raise AssertionError('Not writeable')
399
400
401class PageReadOnlyError(Error):
402	_msg = _('Can not modify page: %s') # T: error message for read-only pages
403
404
405class Page(Path, SignalEmitter):
406	'''Class to represent a single page in the notebook.
407
408	Page objects inherit from L{Path} but have internal state reflecting
409	content in the notebook. We try to keep Page objects unique
410	by hashing them in L{Notebook.get_page()}, Path object on the other
411	hand are cheap and can have multiple instances for the same logical path.
412	We ask for a path object instead of a name in the constructor to
413	encourage the use of Path objects over passing around page names as
414	string.
415
416	You can use a Page object instead of a Path anywhere in the APIs where
417	a path is needed as argument etc.
418
419	@ivar name: full page name (inherited from L{Path})
420	@ivar hascontent: C{True} if the page has content
421	@ivar haschildren: C{True} if the page has sub-pages
422	@ivar modified: C{True} if the page was modified since the last
423	store. Will be reset by L{Notebook.store_page()}
424	@ivar readonly: C{True} when the page is read-only or belongs to a readonly notebook
425
426	@signal: C{storage-changed (changed-on-disk)}: signal emitted on page
427	change. The argument "changed-on-disk" is C{True} when an external
428	edit was detected. For internal edits it is C{False}.
429	@signal: C{modified-changed ()}: emitted when the page is edited
430	'''
431
432	__signals__ = {
433		'storage-changed': (SIGNAL_NORMAL, None, (bool,)),
434		'modified-changed': (SIGNAL_NORMAL, None, ()),
435	}
436
437	def __init__(self, path, haschildren, file, folder, format):
438		assert isinstance(path, Path)
439		self.name = path.name
440		self.haschildren = haschildren
441			# Note: this attribute is updated by the owning notebook
442			# when a child page is stored
443		self._modified = False
444		self._change_counter = 0
445		self._parsetree = None
446		self._textbuffer = None
447		self._meta = None
448
449		self._readonly = None
450		self._last_etag = None
451		if isinstance(format, str):
452			self.format = zim.formats.get_format(format)
453		else:
454			self.format = format
455		self.source = SourceFile(file.path) # XXX
456		self.source_file = file
457		self.attachments_folder = folder
458
459	@property
460	def readonly(self):
461		if self._readonly is None:
462			self._readonly = not self.source_file.iswritable()
463		return self._readonly
464
465	@property
466	def mtime(self):
467		return self.source_file.mtime() if self.source_file.exists() else None
468
469	@property
470	def ctime(self):
471		return self.source_file.ctime() if self.source_file.exists() else None
472
473	@property
474	def hascontent(self):
475		'''Returns whether this page has content'''
476		if self._textbuffer:
477			return self._textbuffer.hascontent
478		elif self._parsetree:
479			return self._parsetree.hascontent
480		else:
481			return self.source_file.exists()
482
483	@property
484	def modified(self):
485		return self._modified
486
487	def set_modified(self, modified):
488		if modified:
489			# HACK: by setting page.modified to a number rather than a
490			# bool we can use this number to check against race conditions
491			# in notebook.store_page_async post handler
492			self._change_counter = max(1, (self._change_counter + 1) % 1000)
493			self._modified = self._change_counter
494			assert bool(self._modified) is True, 'BUG in counter'
495		else:
496			self._modified = False
497		self.emit('modified-changed')
498
499	def on_buffer_modified_changed(self, buffer):
500		# one-way traffic, set page modified after modifying the buffer
501		# but do not set page.modified False again when buffer goes
502		# back to un-modified. Reason is that we use the buffer modified
503		# state to track if we already requested the parse tree (see
504		# get_parsetree()) while page modified is used to track need
505		# for saving and is reset after save was done
506		if buffer.get_modified():
507			if self.readonly:
508				logger.warn('Buffer edited while page read-only - potential bug')
509			self.set_modified(True)
510
511	def _store(self):
512		tree = self.get_parsetree()
513		self._store_tree(tree)
514
515	def _store_tree(self, tree):
516		if tree and tree.hascontent:
517			if self._meta is not None:
518				tree.meta.update(self._meta) # Preserver headers
519			elif self.source_file.exists():
520				# Try getting headers from file
521				try:
522					text = self.source_file.read()
523				except zim.newfs.FileNotFoundError:
524					return None
525				else:
526					parser = self.format.Parser()
527					tree = parser.parse(text)
528					self._meta = tree.meta
529					tree.meta.update(self._meta) # Preserver headers
530			else: # not self.source_file.exists()
531				now = datetime.now()
532				tree.meta['Creation-Date'] = now.isoformat()
533
534			lines = self.format.Dumper().dump(tree, file_output=True)
535			self._last_etag = self.source_file.writelines_with_etag(lines, self._last_etag)
536			self._meta = tree.meta
537		else:
538			self.source_file.remove()
539			self._last_etag = None
540			self._meta = None
541		self.emit('storage-changed', False)
542
543	def check_source_changed(self):
544		'''Checks for changes in the source file and load it if needed
545
546		If the page has a C{textbuffer} and it contains unsaved changes, this
547		method will not overwrite them and you'll get an error on next attempt
548		to save. To force overwrite see L{reload_textbuffer()}
549		'''
550		if (
551			self._last_etag
552			and not (self.source_file.exists() and self.source_file.verify_etag(self._last_etag))
553		) or (
554			not self._last_etag
555			and self.source_file.exists()
556		):
557			logger.info('Page changed on disk: %s', self.name)
558			self._last_etag = None
559			self._meta = None
560			if self._textbuffer and not self._textbuffer.get_modified():
561				self.reload_textbuffer()
562			else:
563				self._parsetree = None
564
565			self.emit('storage-changed', True)
566			return True
567		else:
568			return False
569
570	def exists(self):
571		'''C{True} when the page has either content or children'''
572		return self.haschildren or self.hascontent
573
574	def isequal(self, other):
575		'''Check equality of pages
576		This method is intended to deal with case-insensitive storage
577		backends (e.g. case insensitive file system) where the method
578		is supposed to check equality of the resource.
579		Note that this may be the case even when the page objects differ
580		and can have a different name (so L{__cmp__} will not show
581		them to be equal). However default falls back to L{__cmp__}.
582		@returns: C{True} of both page objects point to the same resource
583		@implementation: can be implementated by subclasses
584		'''
585		if self is other or self == other:
586			return True
587		elif self.source_file.exists():
588			return self.source_file.isequal(other.source_file)
589		else:
590			return False
591
592	def get_parsetree(self):
593		'''Returns the contents of the page
594
595		@returns: a L{zim.formats.ParseTree} object or C{None}
596		'''
597		if self._textbuffer:
598			if self._textbuffer.get_modified() or self._parsetree is None:
599				self._parsetree = self._textbuffer.get_parsetree()
600				self._textbuffer.set_modified(False)
601			#~ print self._parsetree.tostring()
602			return self._parsetree
603		elif self._parsetree:
604			return self._parsetree
605		else:
606			try:
607				text, self._last_etag = self.source_file.read_with_etag()
608			except zim.newfs.FileNotFoundError:
609				return None
610			else:
611				parser = self.format.Parser()
612				self._parsetree = parser.parse(text, file_input=True)
613				self._meta = self._parsetree.meta
614				assert self._meta is not None
615				return self._parsetree
616
617	def set_parsetree(self, tree):
618		'''Set the parsetree with content for this page
619
620		@param tree: a L{zim.formats.ParseTree} object with content
621		or C{None} to remove all content from the page
622
623		@note: after setting new content in the Page object it still
624		needs to be stored in the notebook to save this content
625		permanently. See L{Notebook.store_page()}.
626		'''
627		if self.readonly:
628			raise PageReadOnlyError(self)
629		self._set_parsetree(tree)
630
631	def _set_parsetree(self, tree):
632		self._parsetree = tree
633		if self._textbuffer:
634			assert not self._textbuffer.get_modified(), 'BUG: changing parsetree while buffer was changed as well'
635			try:
636				if tree is None:
637					self._textbuffer.clear()
638				else:
639					self._textbuffer.set_parsetree(tree)
640			except:
641				# Prevent auto-save to kick in at any cost
642				self._textbuffer.set_modified(False)
643				raise
644			else:
645				self._textbuffer.set_modified(False)
646
647		self.set_modified(True)
648
649	def append_parsetree(self, tree):
650		'''Append content
651
652		@param tree: a L{zim.formats.ParseTree} object with content
653		'''
654		if self._textbuffer:
655			self._textbuffer.append_parsetree(tree)
656		else:
657			ourtree = self.get_parsetree()
658			if ourtree:
659				self.set_parsetree(ourtree + tree)
660			else:
661				self.set_parsetree(tree)
662
663	def get_textbuffer(self, constructor=None):
664		'''Get a C{Gtk.TextBuffer} for the page
665
666		Will either return an existing buffer or construct a new one and return
667		it. A C{Gtk.TextBuffer} can be shared between multiple C{Gtk.TextView}s.
668		The page object owns the textbuffer to allow multiple views on the same
669		page.
670
671		Once a buffer is set, also methods like L{get_parsetree()} and
672		L{get_parsetree()} will interact with this buffer.
673
674		@param constructor: if not buffer was set previously, this function
675		is called to construct the buffer.
676
677		@returns: a C{TextBuffer} object or C{None} if no buffer is set and
678		no constructor is provided.
679		'''
680		if self._textbuffer is None:
681			if constructor is None:
682				return None
683
684			tree = self.get_parsetree()
685			self._textbuffer = constructor(parsetree=tree)
686			self._textbuffer.connect('modified-changed', self.on_buffer_modified_changed)
687
688		return self._textbuffer
689
690	def reload_textbuffer(self):
691		'''Reload page content from source file and update the textbuffer if set
692
693			NOTE: this method overwrites any changes in the C{textbuffer} or
694			C{parsetree} that have not been saved to file !
695		'''
696		buffer = self._textbuffer
697		self._textbuffer = None
698		self._parsetree = None
699		if buffer is not None:
700			tree = self.get_parsetree()
701			self._textbuffer = buffer
702			buffer.set_modified(False)
703			self._set_parsetree(tree)
704				# load new tree in buffer, undo-able in 1 step
705				# private method circumvents readonly check !
706			self.set_modified(False)
707		# else do nothing - source will be read with next call to `get_parsetree()`
708
709	def dump(self, format, linker=None):
710		'''Get content in a specific format
711
712		Convenience method that converts the current parse tree to a
713		particular format first.
714
715		@param format: either a format module or a string
716		that is understood by L{zim.formats.get_format()}.
717
718		@param linker: a linker object (see e.g. L{BaseLinker})
719
720		@returns: text as a list of lines or an empty list
721		'''
722		if isinstance(format, str):
723			format = zim.formats.get_format(format)
724
725		if not linker is None:
726			linker.set_path(self)
727
728		tree = self.get_parsetree()
729		if tree:
730			return format.Dumper(linker=linker).dump(tree)
731		else:
732			return []
733
734	def parse(self, format, text, append=False):
735		'''Store formatted text in the page
736
737		Convenience method that parses text and sets the parse tree
738		accordingly.
739
740		@param format: either a format module or a string
741		that is understood by L{zim.formats.get_format()}.
742		@param text: text as a string or as a list of lines
743		@param append: if C{True} the text is appended instead of
744		replacing current content.
745		'''
746		if isinstance(format, str):
747			format = zim.formats.get_format(format)
748
749		if append:
750			self.append_parsetree(format.Parser().parse(text))
751		else:
752			self.set_parsetree(format.Parser().parse(text))
753
754	def get_links(self):
755		'''Generator for links in the page content
756
757		This method gives the raw links from the content, if you want
758		nice L{Link} objects use
759		L{index.list_links()<zim.index.Index.list_links()>} instead.
760
761		@returns: yields a list of 3-tuples C{(type, href, attrib)}
762		where:
763		  - C{type} is the link type (e.g. "page" or "file")
764		  - C{href} is the link itself
765		  - C{attrib} is a dict with link properties
766		'''
767		# FIXME optimize with a ParseTree.get_links that does not
768		#       use Node
769		tree = self.get_parsetree()
770		if tree:
771			for elt in tree.findall(zim.formats.LINK):
772				href = elt.attrib.pop('href')
773				type = link_type(href)
774				yield type, href, elt.attrib
775
776			for elt in tree.findall(zim.formats.IMAGE):
777				if not 'href' in elt.attrib:
778					continue
779				href = elt.attrib.pop('href')
780				type = link_type(href)
781				yield type, href, elt.attrib
782
783
784	def get_tags(self):
785		'''Generator for tags in the page content
786
787		@returns: yields an unordered list of unique 2-tuples
788		C{(name, attrib)} for tags in the parsetree.
789		'''
790		# FIXME optimize with a ParseTree.get_links that does not
791		#       use Node
792		tree = self.get_parsetree()
793		if tree:
794			seen = set()
795			for elt in tree.findall(zim.formats.TAG):
796				name = elt.gettext()
797				if not name in seen:
798					seen.add(name)
799					yield name.lstrip('@'), elt.attrib
800
801	def get_anchors(self):
802		'''Generator returning all the (explicit) anchors in the page content'''
803		tree = self.get_parsetree()
804		if tree:
805			seen = set()
806			for elt in tree.findall(zim.formats.ANCHOR):
807				name = elt.gettext()
808				if not name in seen:
809					seen.add(name)
810					yield name, elt.attrib
811
812	def get_title(self):
813		tree = self.get_parsetree()
814		if tree:
815			return tree.get_heading_text() or self.basename
816		else:
817			return self.basename
818
819	def heading_matches_pagename(self):
820		'''Returns whether the heading matches the page name.
821		Used to determine whether the page should have its heading
822		auto-changed on rename/move.
823		@returns: C{True} when the heading can be auto-changed.
824		'''
825		tree = self.get_parsetree()
826		if tree:
827			return tree.get_heading_text() == self.basename
828		else:
829			return False
830