1 2# Copyright 2008-2015 Jaap Karssenberg <jaap.karssenberg@gmail.com> 3 4 5import re 6import logging 7import itertools 8 9logger = logging.getLogger('zim.notebook') 10 11 12from zim.parsing import link_type 13from zim.errors import Error 14 15import zim.formats 16import zim.fs 17import zim.newfs 18 19from zim.signals import SignalEmitter, SIGNAL_NORMAL 20 21import zim.datetimetz as datetime 22 23 24_pagename_reduce_colon_re = re.compile('::+') 25_pagename_invalid_char_re = re.compile( 26 '(' + 27 '^[_\W]+|(?<=:)[_\W]+' + 28 '|' + 29 '[' + re.escape(''.join( 30 ("?", "#", "/", "\\", "*", '"', "<", ">", "|", "%", "\t", "\n", "\r") 31 )) + ']' + 32 ')', 33re.UNICODE) 34 # This pattern matches a non-alphanumber at start or after the ':' 35 # separator. It also matches any invalid character. 36 # The UNICODE flag is used to make the alphanumber check international. 37 38 39def shortest_unique_names(paths): 40 '''Returns the shortest unique name for each path in paths 41 @param paths: list of L{Path} objects 42 @returns: list of strings 43 ''' 44 by_basename = {} 45 for path in paths: 46 basename = path.basename 47 mylist = by_basename.setdefault(basename, []) 48 mylist.append(path) 49 50 result = [] 51 for path in paths: 52 basename = path.basename 53 conflicts = by_basename[basename] 54 if len(conflicts) == 1: 55 result.append(path.basename) 56 else: 57 conflicts.remove(path) 58 conflicts.insert(0, path) # shuffle path of interest to front 59 reverse_paths = [reversed(p.name.split(':')) for p in conflicts] 60 names = [] 61 for parts in itertools.zip_longest(*reverse_paths): 62 if parts[0] is None: 63 break 64 elif parts[0] not in parts[1:]: 65 names.append(parts[0]) 66 break 67 else: 68 names.append(parts[0]) 69 70 result.append(':'.join(reversed(names))) 71 72 return result 73 74 75class Path(object): 76 '''Class representing a page name in the notebook 77 78 This is the parent class for the Page class. It contains the name 79 of the page and is used instead of the actual page object by methods 80 that only need to know the name of the page. Path objects have no 81 internal state and are essentially normalized page names. It also 82 has a number of methods to compare page names and determining what 83 the parent pages are etc. 84 85 @ivar name: the full name of the path 86 @ivar parts: all the parts of the name (split on ":") 87 @ivar basename: the basename of the path (last part of the name) 88 @ivar namespace: the name for the parent page or empty string 89 @ivar isroot: C{True} when this Path represents the top level namespace 90 @ivar parent: the L{Path} object for the parent page 91 92 93 Valid characters in page names 94 ============================== 95 96 A number of characters are not valid in page names as used in Zim 97 notebooks. 98 99 Reserved characters are: 100 - The ':' is reserved as separator 101 - The '?' is reserved to encode url style options 102 - The '#' is reserved as anchor separator 103 - The '/' and '\' are reserved to distinguish file links & urls 104 - First character of each part MUST be alphanumeric 105 (including utf8 letters / numbers) 106 107 For file system filenames we can not use: 108 '\', '/', ':', '*', '?', '"', '<', '>', '|' 109 (checked both win32 & posix) 110 111 Do not allow '\n' and '\t' for obvious reasons 112 113 Allowing '%' will cause problems with sql wildcards sooner 114 or later - also for url decoding ambiguity it is better to 115 keep this one reserved. 116 117 All other characters are allowed in page names 118 119 Note that Zim version < 0.42 used different rules that are not 120 fully compatible, this is important when upgrading old notebooks. 121 See L{Notebook.cleanup_pathname_zim028()} 122 ''' 123 124 __slots__ = ('name',) 125 126 @staticmethod 127 def assertValidPageName(name): 128 '''Raises an C{AssertionError} if C{name} does not represent 129 a valid page name. 130 This is a strict check, most names that fail this test can still 131 be cleaned up by the L{makeValidPageName()}. 132 @param name: a string 133 @raises AssertionError: if the name is not valid 134 ''' 135 assert isinstance(name, str) 136 if not name.strip(':') \ 137 or _pagename_reduce_colon_re.search(name) \ 138 or _pagename_invalid_char_re.search(name): 139 raise AssertionError('Not a valid page name: %s' % name) 140 141 @staticmethod 142 def makeValidPageName(name): 143 '''Remove any invalid characters from the string and return 144 a valid page name. Only string that can not be turned in 145 somthing valid is a string that reduces to an empty string 146 after removing all invalid characters. 147 @param name: a string 148 @returns: a string 149 @raises ValueError: when the result would be an empty string 150 ''' 151 newname = _pagename_reduce_colon_re.sub(':', name.strip(':')) 152 newname = _pagename_invalid_char_re.sub('', newname) 153 newname = newname.replace('_', ' ') 154 try: 155 Path.assertValidPageName(newname) 156 except AssertionError: 157 raise ValueError('Not a valid page name: %s (was: %s)' % (newname, name)) 158 return newname 159 160 def __init__(self, name): 161 '''Constructor. 162 163 @param name: the absolute page name in the right case as a 164 string or as a tuple strings 165 166 The name ":" is used as a special case to construct a path for 167 the toplevel namespace in a notebook. 168 169 @note: This constructor does not do any checks for the sanity of 170 the path name. Never construct a path directly from user input, 171 but use either L{index.lookup_from_user_input()} or first check the 172 name with L{makeValidPageName()} 173 ''' 174 if isinstance(name, (list, tuple)): 175 self.name = ':'.join(name) 176 else: 177 self.name = name.strip(':') 178 179 try: 180 self.name = str(self.name) 181 except UnicodeDecodeError: 182 raise ValueError('BUG: invalid input, page names should be in ascii, or given as unicode') 183 184 @classmethod 185 def new_from_zim_config(klass, string): 186 '''Returns a new object based on the string representation for 187 that path. 188 ''' 189 return klass(klass.makeValidPageName(string)) 190 191 def serialize_zim_config(self): 192 '''Returns the name for serializing this path''' 193 return self.name 194 195 def __repr__(self): 196 return '<%s: %s>' % (self.__class__.__name__, self.name) 197 198 def __str__(self): 199 return self.name 200 201 def __hash__(self): 202 return self.name.__hash__() 203 204 def __eq__(self, other): 205 '''Paths are equal when their names are the same''' 206 if isinstance(other, Path): 207 return self.name == other.name 208 else: # e.g. path == None 209 return False 210 211 def __ne__(self, other): 212 '''Paths are not equal when their names are not the same''' 213 return not self.__eq__(other) 214 215 def __add__(self, name): 216 '''C{path + name} is an alias for C{path.child(name)}''' 217 return self.child(name) 218 219 @property 220 def parts(self): 221 '''Get all the parts of the name (split on ":")''' 222 return self.name.split(':') 223 224 @property 225 def basename(self): 226 '''Get the basename of the path (last part of the name)''' 227 i = self.name.rfind(':') + 1 228 return self.name[i:] 229 230 @property 231 def namespace(self): 232 '''Gives the name for the parent page. 233 Returns an empty string for the top level namespace. 234 ''' 235 i = self.name.rfind(':') 236 if i > 0: 237 return self.name[:i] 238 else: 239 return '' 240 241 @property 242 def isroot(self): 243 '''C{True} when this Path represents the top level namespace''' 244 return self.name == '' 245 246 def relname(self, path): # TODO make this use HRef ! 247 '''Get a part of this path relative to a parent path 248 249 @param path: a parent L{Path} 250 251 Raises an error if C{path} is not a parent 252 253 @returns: the part of the path that is relative to C{path} 254 ''' 255 if path.name == '': # root path 256 return self.name 257 elif self.name.startswith(path.name + ':'): 258 i = len(path.name) + 1 259 return self.name[i:].strip(':') 260 else: 261 raise ValueError('"%s" is not below "%s"' % (self, path)) 262 263 @property 264 def parent(self): 265 '''Get the path for the parent page''' 266 namespace = self.namespace 267 if namespace: 268 return Path(namespace) 269 elif self.isroot: 270 return None 271 else: 272 return Path(':') 273 274 def parents(self): 275 '''Generator function for parent Paths including root''' 276 if ':' in self.name: 277 path = self.name.split(':') 278 path.pop() 279 while len(path) > 0: 280 namespace = ':'.join(path) 281 yield Path(namespace) 282 path.pop() 283 yield Path(':') 284 285 def child(self, basename): 286 '''Get a child Path 287 288 @param basename: the relative name for the child 289 @returns: a new L{Path} object 290 ''' 291 return Path(self.name + ':' + basename) 292 293 def ischild(self, parent): 294 '''Check whether this path is a child of a given path 295 @param parent: a L{Path} object 296 @returns: True when this path is a (grand-)child of C{parent} 297 ''' 298 return parent.isroot or self.name.startswith(parent.name + ':') 299 300 def match_namespace(self, namespace): 301 '''Check whether this path is in a specific section of the notebook 302 @param namespace: a L{Path} object 303 @returns: True when this path is equal to C{namespace} or is a (grand-)child of C{namespace} 304 ''' 305 return namespace.isroot or self.name == namespace.name or self.name.startswith(namespace.name + ':') 306 307 def commonparent(self, other): 308 '''Find a common parent for two Paths 309 310 @param other: another L{Path} object 311 312 @returns: a L{Path} object for the first common parent 313 ''' 314 parent = [] 315 parts = self.parts 316 other = other.parts 317 if parts[0] != other[0]: 318 return Path(':') # root 319 else: 320 for i in range(min(len(parts), len(other))): 321 if parts[i] == other[i]: 322 parent.append(parts[i]) 323 else: 324 return Path(':'.join(parent)) 325 else: 326 return Path(':'.join(parent)) 327 328 329HREF_REL_ABSOLUTE = 0 330HREF_REL_FLOATING = 1 331HREF_REL_RELATIVE = 2 332 333class HRef(object): 334 335 __slots__ = ('rel', 'names', 'anchor') 336 337 @classmethod 338 def new_from_wiki_link(klass, href): 339 '''Constructor that constructs a L{HRef} object for a link as 340 written in zim's wiki syntax. 341 @param href: a string for the link 342 @returns: a L{HRef} object 343 @raises ValueError: when the string could not be parsed 344 (see L{Path.makeValidPageName()}) 345 346 @note: This method HRef class assumes the logic of our wiki links 347 for other formats, a separate constructor may be needed 348 ''' 349 if href.startswith(':'): 350 rel = HREF_REL_ABSOLUTE 351 elif href.startswith('+'): 352 rel = HREF_REL_RELATIVE 353 else: 354 rel = HREF_REL_FLOATING 355 356 anchor = None 357 if '#' in href: 358 href, anchor = href.split('#', 1) 359 360 names = Path.makeValidPageName(href.lstrip('+')) if href else "" 361 362 return klass(rel, names, anchor) 363 364 def __init__(self, rel, names, anchor=None): 365 self.rel = rel 366 self.names = names 367 self.anchor = anchor 368 369 def __str__(self): 370 rel = {HREF_REL_ABSOLUTE: 'abs', HREF_REL_FLOATING: 'float', HREF_REL_RELATIVE: 'rel'}[self.rel] 371 return '<%s: %s %s %s>' % (self.__class__.__name__, rel, self.names, self.anchor) 372 373 def parts(self): 374 return self.names.split(':') if self.names else [] 375 376 def to_wiki_link(self): 377 '''Returns href as text for wiki link''' 378 if self.rel == HREF_REL_ABSOLUTE: 379 link = ":" + self.names.strip(':') 380 elif self.rel == HREF_REL_RELATIVE: 381 link = "+" + self.names 382 else: 383 link = self.names 384 if self.anchor: 385 link += "#" + self.anchor 386 return link 387 388 389class SourceFile(zim.fs.File): 390 391 def iswritable(self): 392 return False 393 394 def write(self, *a): 395 raise AssertionError('Not writeable') 396 397 def writelines(self, *a): 398 raise AssertionError('Not writeable') 399 400 401class PageReadOnlyError(Error): 402 _msg = _('Can not modify page: %s') # T: error message for read-only pages 403 404 405class Page(Path, SignalEmitter): 406 '''Class to represent a single page in the notebook. 407 408 Page objects inherit from L{Path} but have internal state reflecting 409 content in the notebook. We try to keep Page objects unique 410 by hashing them in L{Notebook.get_page()}, Path object on the other 411 hand are cheap and can have multiple instances for the same logical path. 412 We ask for a path object instead of a name in the constructor to 413 encourage the use of Path objects over passing around page names as 414 string. 415 416 You can use a Page object instead of a Path anywhere in the APIs where 417 a path is needed as argument etc. 418 419 @ivar name: full page name (inherited from L{Path}) 420 @ivar hascontent: C{True} if the page has content 421 @ivar haschildren: C{True} if the page has sub-pages 422 @ivar modified: C{True} if the page was modified since the last 423 store. Will be reset by L{Notebook.store_page()} 424 @ivar readonly: C{True} when the page is read-only or belongs to a readonly notebook 425 426 @signal: C{storage-changed (changed-on-disk)}: signal emitted on page 427 change. The argument "changed-on-disk" is C{True} when an external 428 edit was detected. For internal edits it is C{False}. 429 @signal: C{modified-changed ()}: emitted when the page is edited 430 ''' 431 432 __signals__ = { 433 'storage-changed': (SIGNAL_NORMAL, None, (bool,)), 434 'modified-changed': (SIGNAL_NORMAL, None, ()), 435 } 436 437 def __init__(self, path, haschildren, file, folder, format): 438 assert isinstance(path, Path) 439 self.name = path.name 440 self.haschildren = haschildren 441 # Note: this attribute is updated by the owning notebook 442 # when a child page is stored 443 self._modified = False 444 self._change_counter = 0 445 self._parsetree = None 446 self._textbuffer = None 447 self._meta = None 448 449 self._readonly = None 450 self._last_etag = None 451 if isinstance(format, str): 452 self.format = zim.formats.get_format(format) 453 else: 454 self.format = format 455 self.source = SourceFile(file.path) # XXX 456 self.source_file = file 457 self.attachments_folder = folder 458 459 @property 460 def readonly(self): 461 if self._readonly is None: 462 self._readonly = not self.source_file.iswritable() 463 return self._readonly 464 465 @property 466 def mtime(self): 467 return self.source_file.mtime() if self.source_file.exists() else None 468 469 @property 470 def ctime(self): 471 return self.source_file.ctime() if self.source_file.exists() else None 472 473 @property 474 def hascontent(self): 475 '''Returns whether this page has content''' 476 if self._textbuffer: 477 return self._textbuffer.hascontent 478 elif self._parsetree: 479 return self._parsetree.hascontent 480 else: 481 return self.source_file.exists() 482 483 @property 484 def modified(self): 485 return self._modified 486 487 def set_modified(self, modified): 488 if modified: 489 # HACK: by setting page.modified to a number rather than a 490 # bool we can use this number to check against race conditions 491 # in notebook.store_page_async post handler 492 self._change_counter = max(1, (self._change_counter + 1) % 1000) 493 self._modified = self._change_counter 494 assert bool(self._modified) is True, 'BUG in counter' 495 else: 496 self._modified = False 497 self.emit('modified-changed') 498 499 def on_buffer_modified_changed(self, buffer): 500 # one-way traffic, set page modified after modifying the buffer 501 # but do not set page.modified False again when buffer goes 502 # back to un-modified. Reason is that we use the buffer modified 503 # state to track if we already requested the parse tree (see 504 # get_parsetree()) while page modified is used to track need 505 # for saving and is reset after save was done 506 if buffer.get_modified(): 507 if self.readonly: 508 logger.warn('Buffer edited while page read-only - potential bug') 509 self.set_modified(True) 510 511 def _store(self): 512 tree = self.get_parsetree() 513 self._store_tree(tree) 514 515 def _store_tree(self, tree): 516 if tree and tree.hascontent: 517 if self._meta is not None: 518 tree.meta.update(self._meta) # Preserver headers 519 elif self.source_file.exists(): 520 # Try getting headers from file 521 try: 522 text = self.source_file.read() 523 except zim.newfs.FileNotFoundError: 524 return None 525 else: 526 parser = self.format.Parser() 527 tree = parser.parse(text) 528 self._meta = tree.meta 529 tree.meta.update(self._meta) # Preserver headers 530 else: # not self.source_file.exists() 531 now = datetime.now() 532 tree.meta['Creation-Date'] = now.isoformat() 533 534 lines = self.format.Dumper().dump(tree, file_output=True) 535 self._last_etag = self.source_file.writelines_with_etag(lines, self._last_etag) 536 self._meta = tree.meta 537 else: 538 self.source_file.remove() 539 self._last_etag = None 540 self._meta = None 541 self.emit('storage-changed', False) 542 543 def check_source_changed(self): 544 '''Checks for changes in the source file and load it if needed 545 546 If the page has a C{textbuffer} and it contains unsaved changes, this 547 method will not overwrite them and you'll get an error on next attempt 548 to save. To force overwrite see L{reload_textbuffer()} 549 ''' 550 if ( 551 self._last_etag 552 and not (self.source_file.exists() and self.source_file.verify_etag(self._last_etag)) 553 ) or ( 554 not self._last_etag 555 and self.source_file.exists() 556 ): 557 logger.info('Page changed on disk: %s', self.name) 558 self._last_etag = None 559 self._meta = None 560 if self._textbuffer and not self._textbuffer.get_modified(): 561 self.reload_textbuffer() 562 else: 563 self._parsetree = None 564 565 self.emit('storage-changed', True) 566 return True 567 else: 568 return False 569 570 def exists(self): 571 '''C{True} when the page has either content or children''' 572 return self.haschildren or self.hascontent 573 574 def isequal(self, other): 575 '''Check equality of pages 576 This method is intended to deal with case-insensitive storage 577 backends (e.g. case insensitive file system) where the method 578 is supposed to check equality of the resource. 579 Note that this may be the case even when the page objects differ 580 and can have a different name (so L{__cmp__} will not show 581 them to be equal). However default falls back to L{__cmp__}. 582 @returns: C{True} of both page objects point to the same resource 583 @implementation: can be implementated by subclasses 584 ''' 585 if self is other or self == other: 586 return True 587 elif self.source_file.exists(): 588 return self.source_file.isequal(other.source_file) 589 else: 590 return False 591 592 def get_parsetree(self): 593 '''Returns the contents of the page 594 595 @returns: a L{zim.formats.ParseTree} object or C{None} 596 ''' 597 if self._textbuffer: 598 if self._textbuffer.get_modified() or self._parsetree is None: 599 self._parsetree = self._textbuffer.get_parsetree() 600 self._textbuffer.set_modified(False) 601 #~ print self._parsetree.tostring() 602 return self._parsetree 603 elif self._parsetree: 604 return self._parsetree 605 else: 606 try: 607 text, self._last_etag = self.source_file.read_with_etag() 608 except zim.newfs.FileNotFoundError: 609 return None 610 else: 611 parser = self.format.Parser() 612 self._parsetree = parser.parse(text, file_input=True) 613 self._meta = self._parsetree.meta 614 assert self._meta is not None 615 return self._parsetree 616 617 def set_parsetree(self, tree): 618 '''Set the parsetree with content for this page 619 620 @param tree: a L{zim.formats.ParseTree} object with content 621 or C{None} to remove all content from the page 622 623 @note: after setting new content in the Page object it still 624 needs to be stored in the notebook to save this content 625 permanently. See L{Notebook.store_page()}. 626 ''' 627 if self.readonly: 628 raise PageReadOnlyError(self) 629 self._set_parsetree(tree) 630 631 def _set_parsetree(self, tree): 632 self._parsetree = tree 633 if self._textbuffer: 634 assert not self._textbuffer.get_modified(), 'BUG: changing parsetree while buffer was changed as well' 635 try: 636 if tree is None: 637 self._textbuffer.clear() 638 else: 639 self._textbuffer.set_parsetree(tree) 640 except: 641 # Prevent auto-save to kick in at any cost 642 self._textbuffer.set_modified(False) 643 raise 644 else: 645 self._textbuffer.set_modified(False) 646 647 self.set_modified(True) 648 649 def append_parsetree(self, tree): 650 '''Append content 651 652 @param tree: a L{zim.formats.ParseTree} object with content 653 ''' 654 if self._textbuffer: 655 self._textbuffer.append_parsetree(tree) 656 else: 657 ourtree = self.get_parsetree() 658 if ourtree: 659 self.set_parsetree(ourtree + tree) 660 else: 661 self.set_parsetree(tree) 662 663 def get_textbuffer(self, constructor=None): 664 '''Get a C{Gtk.TextBuffer} for the page 665 666 Will either return an existing buffer or construct a new one and return 667 it. A C{Gtk.TextBuffer} can be shared between multiple C{Gtk.TextView}s. 668 The page object owns the textbuffer to allow multiple views on the same 669 page. 670 671 Once a buffer is set, also methods like L{get_parsetree()} and 672 L{get_parsetree()} will interact with this buffer. 673 674 @param constructor: if not buffer was set previously, this function 675 is called to construct the buffer. 676 677 @returns: a C{TextBuffer} object or C{None} if no buffer is set and 678 no constructor is provided. 679 ''' 680 if self._textbuffer is None: 681 if constructor is None: 682 return None 683 684 tree = self.get_parsetree() 685 self._textbuffer = constructor(parsetree=tree) 686 self._textbuffer.connect('modified-changed', self.on_buffer_modified_changed) 687 688 return self._textbuffer 689 690 def reload_textbuffer(self): 691 '''Reload page content from source file and update the textbuffer if set 692 693 NOTE: this method overwrites any changes in the C{textbuffer} or 694 C{parsetree} that have not been saved to file ! 695 ''' 696 buffer = self._textbuffer 697 self._textbuffer = None 698 self._parsetree = None 699 if buffer is not None: 700 tree = self.get_parsetree() 701 self._textbuffer = buffer 702 buffer.set_modified(False) 703 self._set_parsetree(tree) 704 # load new tree in buffer, undo-able in 1 step 705 # private method circumvents readonly check ! 706 self.set_modified(False) 707 # else do nothing - source will be read with next call to `get_parsetree()` 708 709 def dump(self, format, linker=None): 710 '''Get content in a specific format 711 712 Convenience method that converts the current parse tree to a 713 particular format first. 714 715 @param format: either a format module or a string 716 that is understood by L{zim.formats.get_format()}. 717 718 @param linker: a linker object (see e.g. L{BaseLinker}) 719 720 @returns: text as a list of lines or an empty list 721 ''' 722 if isinstance(format, str): 723 format = zim.formats.get_format(format) 724 725 if not linker is None: 726 linker.set_path(self) 727 728 tree = self.get_parsetree() 729 if tree: 730 return format.Dumper(linker=linker).dump(tree) 731 else: 732 return [] 733 734 def parse(self, format, text, append=False): 735 '''Store formatted text in the page 736 737 Convenience method that parses text and sets the parse tree 738 accordingly. 739 740 @param format: either a format module or a string 741 that is understood by L{zim.formats.get_format()}. 742 @param text: text as a string or as a list of lines 743 @param append: if C{True} the text is appended instead of 744 replacing current content. 745 ''' 746 if isinstance(format, str): 747 format = zim.formats.get_format(format) 748 749 if append: 750 self.append_parsetree(format.Parser().parse(text)) 751 else: 752 self.set_parsetree(format.Parser().parse(text)) 753 754 def get_links(self): 755 '''Generator for links in the page content 756 757 This method gives the raw links from the content, if you want 758 nice L{Link} objects use 759 L{index.list_links()<zim.index.Index.list_links()>} instead. 760 761 @returns: yields a list of 3-tuples C{(type, href, attrib)} 762 where: 763 - C{type} is the link type (e.g. "page" or "file") 764 - C{href} is the link itself 765 - C{attrib} is a dict with link properties 766 ''' 767 # FIXME optimize with a ParseTree.get_links that does not 768 # use Node 769 tree = self.get_parsetree() 770 if tree: 771 for elt in tree.findall(zim.formats.LINK): 772 href = elt.attrib.pop('href') 773 type = link_type(href) 774 yield type, href, elt.attrib 775 776 for elt in tree.findall(zim.formats.IMAGE): 777 if not 'href' in elt.attrib: 778 continue 779 href = elt.attrib.pop('href') 780 type = link_type(href) 781 yield type, href, elt.attrib 782 783 784 def get_tags(self): 785 '''Generator for tags in the page content 786 787 @returns: yields an unordered list of unique 2-tuples 788 C{(name, attrib)} for tags in the parsetree. 789 ''' 790 # FIXME optimize with a ParseTree.get_links that does not 791 # use Node 792 tree = self.get_parsetree() 793 if tree: 794 seen = set() 795 for elt in tree.findall(zim.formats.TAG): 796 name = elt.gettext() 797 if not name in seen: 798 seen.add(name) 799 yield name.lstrip('@'), elt.attrib 800 801 def get_anchors(self): 802 '''Generator returning all the (explicit) anchors in the page content''' 803 tree = self.get_parsetree() 804 if tree: 805 seen = set() 806 for elt in tree.findall(zim.formats.ANCHOR): 807 name = elt.gettext() 808 if not name in seen: 809 seen.add(name) 810 yield name, elt.attrib 811 812 def get_title(self): 813 tree = self.get_parsetree() 814 if tree: 815 return tree.get_heading_text() or self.basename 816 else: 817 return self.basename 818 819 def heading_matches_pagename(self): 820 '''Returns whether the heading matches the page name. 821 Used to determine whether the page should have its heading 822 auto-changed on rename/move. 823 @returns: C{True} when the heading can be auto-changed. 824 ''' 825 tree = self.get_parsetree() 826 if tree: 827 return tree.get_heading_text() == self.basename 828 else: 829 return False 830