1 2# Copyright 2009-2018 Jaap Karssenberg <jaap.karssenberg@gmail.com> 3 4 5from datetime import datetime 6 7import sqlite3 8import logging 9 10logger = logging.getLogger('zim.notebook.index') 11 12from zim.utils import natural_sort_key 13from zim.notebook.page import Path, HRef, \ 14 HREF_REL_ABSOLUTE, HREF_REL_FLOATING, HREF_REL_RELATIVE 15from zim.tokenparser import TokenBuilder 16 17from zim.formats import ParseTreeBuilder 18 19from .base import * 20 21 22from zim.notebook.layout import \ 23FILE_TYPE_PAGE_SOURCE, \ 24FILE_TYPE_ATTACHMENT 25 26ROOT_PATH = Path(':') 27ROOT_ID = 1 # Constant for the ID of the root namespace in "pages" 28 # (Primary key starts count at 1 and first entry will be root) 29 30PAGE_EXISTS_UNCERTAIN = 0 # e.g. folder with unknown children - not shown to outside world 31PAGE_EXISTS_AS_LINK = 1 # placeholder for link target 32PAGE_EXISTS_HAS_CONTENT = 2 # either has content or children have content 33 34 35def emptyParseTree(): 36 b = ParseTreeBuilder() 37 b.start('zim-tree') 38 b.end('zim-tree') 39 return b.get_parsetree() 40 41 42class PagesIndexer(IndexerBase): 43 '''Indexer for the "pages" table. 44 45 @signal: C{page-row-inserted (row)}: new row inserted 46 @signal: C{page-row-changed (row, oldrow)}: row changed 47 @signal: C{page-row-delete (row)}: row to be deleted 48 @signal: C{page-row-deleted (row)}: row that has been deleted 49 50 @signal: C{page-changed (row, content)}: page contents changed 51 ''' 52 53 __signals__ = { 54 'page-row-inserted': (None, None, (object,)), 55 'page-row-changed': (None, None, (object, object)), 56 'page-row-delete': (None, None, (object,)), 57 'page-row-deleted': (None, None, (object,)), 58 'page-changed': (None, None, (object, object)) 59 } 60 61 def __init__(self, db, layout, filesindexer): 62 IndexerBase.__init__(self, db) 63 self.layout = layout 64 self.connectto_all(filesindexer, ( 65 'file-row-inserted', 'file-row-changed', 'file-row-deleted' 66 )) 67 68 self.db.executescript(''' 69 CREATE TABLE IF NOT EXISTS pages( 70 id INTEGER PRIMARY KEY, 71 parent INTEGER REFERENCES pages(id), 72 n_children INTEGER DEFAULT 0, 73 74 name TEXT UNIQUE NOT NULL, 75 lowerbasename TEXT NOT NULL, 76 sortkey TEXT NOT NULL, 77 mtime TIMESTAMP, 78 79 source_file INTEGER REFERENCES files(id), 80 is_link_placeholder BOOLEAN DEFAULT 0 81 82 CONSTRAINT no_self_ref CHECK (parent <> id) 83 ); 84 CREATE UNIQUE INDEX IF NOT EXISTS pages_name ON pages(name); 85 CREATE INDEX IF NOT EXISTS pages_sortkey ON pages(sortkey); 86 CREATE INDEX IF NOT EXISTS pages_parent ON pages(parent); 87 ''') 88 row = self.db.execute('SELECT * FROM pages WHERE id == 1').fetchone() 89 if row is None: 90 c = self.db.execute( 91 'INSERT INTO pages(parent, name, lowerbasename, sortkey, source_file) ' 92 'VALUES (?, ?, ?, ?, ?)', 93 (0, '', '', '', 1) 94 ) 95 assert c.lastrowid == 1 # ensure we start empty 96 97 def _select(self, pagename): 98 return self.db.execute( 99 'SELECT * FROM pages WHERE name=?', (pagename.name,) 100 ).fetchone() 101 102 # We should not read file contents on db-file-inserted because 103 # there can be many in one iterarion when the FileIndexer indexes 104 # a folder. Therefore we only send page-changed in response to 105 # db-file-updated and trust we get this signal for each file 106 # that is inserted in a separate iteration. 107 108 def on_file_row_inserted(self, o, filerow): 109 pagename, file_type = self.layout.map_filepath(filerow['path']) 110 if file_type != FILE_TYPE_PAGE_SOURCE: 111 return # nothing to do 112 113 row = self._select(pagename) 114 if row is None: 115 self.insert_page(pagename, filerow['id']) 116 elif row['source_file'] is None: 117 self.db.execute( 118 'UPDATE pages SET source_file=?, mtime=?, is_link_placeholder=? WHERE name=?', 119 (filerow['id'], None, False, pagename.name) 120 ) 121 self.update_parent(pagename.parent) 122 newrow = self._select(pagename) 123 self.emit('page-row-changed', newrow, row) 124 else: 125 # TODO: Flag conflict 126 raise NotImplementedError 127 128 def on_file_row_changed(self, o, filerow): 129 pagename, file_type = self.layout.map_filepath(filerow['path']) 130 if file_type != FILE_TYPE_PAGE_SOURCE: 131 return # nothing to do 132 133 row = self._select(pagename) 134 if row is None: 135 self.insert_page(pagename, filerow['id']) # May have been changed file type 136 row = self._select(pagename) 137 138 if row['source_file'] == filerow['id']: 139 file = self.layout.root.file(filerow['path']) 140 format = self.layout.get_format(file) 141 mtime = file.mtime() 142 tree = format.Parser().parse(file.read()) 143 self.update_page(pagename, mtime, tree) 144 else: 145 pass # some conflict file changed 146 147 def on_file_row_deleted(self, o, filerow): 148 pagename, file_type = self.layout.map_filepath(filerow['path']) 149 if file_type != FILE_TYPE_PAGE_SOURCE: 150 return # nothing to do 151 152 row = self._select(pagename) 153 if row is None: 154 return # not a source file after all (e.g. .txt attachment) 155 156 if row['source_file'] == filerow['id']: 157 if row['n_children'] > 0: 158 self.db.execute( 159 'UPDATE pages SET source_file=?, mtime=? WHERE name=?', 160 (None, None, pagename.name) 161 ) 162 self.update_parent(pagename, oldrow=row) 163 # checks if any children have sources - else will be removed 164 try: 165 row = self._select(pagename) 166 self.emit('page-changed', row, emptyParseTree()) 167 except IndexNotFoundError: 168 pass 169 else: 170 self.remove_page(pagename) 171 else: 172 pass # some conflict removed - or not a source file in the first place (.txt attachment) 173 174 def insert_page(self, pagename, file_id): 175 return self._insert_page(pagename, False, file_id) 176 177 def insert_link_placeholder(self, pagename): 178 return self._insert_page(pagename, True) 179 180 def delete_link_placeholder(self, pagename): 181 row = self._select(pagename) 182 assert row is not None 183 184 if not row['is_link_placeholder']: 185 raise AssertionError('Not a placeholder') 186 else: 187 self.remove_page(pagename) 188 189 def _insert_page(self, pagename, is_link_placeholder, file_id=None): 190 assert not (is_link_placeholder and file_id) 191 192 # insert parents 193 parent_row = self._select(pagename.parent) 194 if parent_row is None: 195 self._insert_page(pagename.parent, is_link_placeholder) # recurs 196 parent_row = self._select(pagename.parent) 197 assert parent_row is not None 198 199 # insert new page 200 lowerbasename = pagename.basename.lower() 201 sortkey = natural_sort_key(pagename.basename) 202 try: 203 self.db.execute( 204 'INSERT INTO pages(name, lowerbasename, sortkey, parent, is_link_placeholder, source_file)' 205 'VALUES (?, ?, ?, ?, ?, ?)', 206 (pagename.name, lowerbasename, sortkey, parent_row['id'], is_link_placeholder, file_id) 207 ) 208 except sqlite3.IntegrityError: 209 # This can occur in rare edge cases when resolve_page failed to 210 # see a page existed already - typically due to locale changes 211 # affecting sortkey 212 logger.exception('Error while inserting page - re-index needed?') 213 self.db.execute( 214 'UPDATE pages SET sortkey=? WHERE name=?', 215 (sortkey, pagename.name) 216 ) 217 row = self._select(pagename) 218 else: 219 row = self._select(pagename) 220 self._update_parent_nchildren(pagename.parent) 221 self.emit('page-row-inserted', row) 222 223 # update parent(s) 224 self.update_parent(pagename.parent) 225 226 return row['id'] 227 228 def update_parent(self, parentname, allow_cleanup=lambda r: True, oldrow=None): 229 row = self._select(parentname) 230 assert row is not None 231 232 # get new status 233 n_children, all_child_are_placeholder = self.db.execute( 234 'SELECT count(*), min(is_link_placeholder) FROM pages WHERE parent=?', 235 # "min()" works as "any(not is_link_placeholder)" 236 # because False is "0" in sqlite 237 (row['id'],) 238 ).fetchone() 239 if all_child_are_placeholder is None: 240 all_child_are_placeholder = True 241 242 if n_children == 0 and row['source_file'] is None and allow_cleanup(row): 243 # cleanup if no longer needed 244 self.db.execute( 245 'UPDATE pages SET n_children=? WHERE id=?', 246 (n_children, row['id']) 247 ) 248 self.remove_page(parentname, allow_cleanup) # indirect recurs 249 else: 250 # update table 251 is_placeholder = row['source_file'] is None and all_child_are_placeholder 252 self.db.execute( 253 'UPDATE pages SET n_children=?, is_link_placeholder=? WHERE id=?', 254 (n_children, is_placeholder, row['id']) 255 ) 256 if bool(row['is_link_placeholder']) is not is_placeholder: 257 self.update_parent(parentname.parent) # recurs 258 259 # notify others 260 if not parentname.isroot: 261 newrow = self._select(parentname) 262 self.emit('page-row-changed', newrow, oldrow or row) 263 264 def update_page(self, pagename, mtime, content): 265 self.db.execute( 266 'UPDATE pages SET mtime=? WHERE name=?', 267 (mtime, pagename.name), 268 ) 269 270 row = self._select(pagename) 271 self.emit('page-changed', row, content) 272 self.emit('page-row-changed', row, row) 273 274 def remove_page(self, pagename, allow_cleanup=lambda r: True): 275 # allow_cleanup is used by LinksIndexer when cleaning up placeholders 276 277 row = self._select(pagename) 278 assert row['id'] != 1, 'BUG: can\'t delete notebook root' 279 if row['n_children'] > 0: 280 raise AssertionError('Page has child pages') 281 282 self.emit('page-row-delete', row) 283 self.db.execute('DELETE FROM pages WHERE name=?', (pagename.name,)) 284 self._update_parent_nchildren(pagename.parent) 285 self.emit('page-row-deleted', row) 286 self.update_parent(pagename.parent, allow_cleanup) 287 288 def _update_parent_nchildren(self, parentname): 289 # parent n_children needs to be up-to-date when we emit the "deleted" 290 # signal, else Gtk.TreeView sees an inconsistency 291 # We still call update_parent() after the fact to do the rest of the 292 # house keeping 293 row = self._select(parentname) 294 assert row is not None 295 296 n_children, = self.db.execute( 297 'SELECT count(*) FROM pages WHERE parent=?', 298 (row['id'],) 299 ).fetchone() 300 self.db.execute( 301 'UPDATE pages SET n_children=? WHERE id=?', 302 (n_children, row['id']) 303 ) 304 305 306class PageIndexRecord(Path): 307 '''Object representing a page L{Path} in the index, with data 308 for the corresponding row in the C{pages} table. 309 ''' 310 311 __slots__ = ('_row') 312 313 def __init__(self, row): 314 '''Constructor 315 @param row: a C{sqlite3.Row} object for this page in the 316 "pages" table, specifies most other attributes for this object 317 The property C{hasdata} is C{True} when the row is set. 318 ''' 319 Path.__init__(self, row['name']) 320 self._row = row 321 322 @property 323 def id(self): return self._row['id'] 324 325 @property 326 def hascontent(self): return self._row['source_file'] is not None 327 328 @property 329 def haschildren(self): return self._row['n_children'] > 0 330 331 @property 332 def n_children(self): return self._row['n_children'] 333 334 @property 335 def mtime(self): return self._row['mtime'] 336 337 def exists(self): 338 return not self._row['is_link_placeholder'] 339 340 341class PagesViewInternal(object): 342 '''This class defines private methods used by L{PagesView}, 343 L{LinksView}, L{TagsView} and others. 344 ''' 345 346 def __init__(self, db): 347 self.db = db 348 349 def get_pagename(self, page_id): 350 row = self.db.execute( 351 'SELECT * FROM pages WHERE id=?', (page_id,) 352 ).fetchone() 353 if row is None: 354 raise IndexConsistencyError('No page for page_id "%r"' % page_id) 355 return PageIndexRecord(row) 356 357 def get_page_id(self, pagename): 358 row = self.db.execute( 359 'SELECT id FROM pages WHERE name=?', (pagename.name,) 360 ).fetchone() 361 if row is None: 362 raise IndexNotFoundError('Page not found in index: %s' % pagename.name) 363 return row['id'] 364 365 def resolve_link(self, source, href, ignore_link_placeholders=True, source_id=None): 366 parent, parent_id, names = self._resolve_link(source, href, ignore_link_placeholders, source_id) 367 return self.resolve_pagename(parent, names) 368 369 def _resolve_link(self, source, href, ignore_link_placeholders=True, source_id=None): 370 if href.rel == HREF_REL_ABSOLUTE or source.isroot: 371 return (ROOT_PATH, ROOT_ID, href.parts()) 372 373 start, start_id, relnames = source, source_id, [] 374 while start_id is None: 375 # Do not assume source exists, find start point that does 376 try: 377 start_id = self.get_page_id(start) 378 except IndexNotFoundError: 379 relnames.insert(0, start.basename) 380 start = start.parent 381 else: 382 break 383 384 if href.rel == HREF_REL_RELATIVE: 385 return (start, start_id, relnames + href.parts()) 386 else: 387 # HREF_REL_FLOATING 388 # Search upward namespaces for existing pages, 389 # By default ignore link placeholders to avoid circular 390 # dependencies between links and placeholders 391 assert href.rel == HREF_REL_FLOATING 392 393 # link to anchor on current page 394 if not href.parts(): 395 return (start, start_id, relnames + href.parts()) 396 397 anchor_key = natural_sort_key(href.parts()[0]) 398 399 if relnames: 400 # Check if we are anchored in non-existing part 401 keys = list(map(natural_sort_key, relnames)) 402 if anchor_key in keys: 403 i = [c for c, k in enumerate(keys) if k == anchor_key][-1] 404 return (start, start_id, relnames[:i] + href.parts()) 405 406 if ignore_link_placeholders: 407 c = self.db.execute( 408 'SELECT name, id FROM pages ' 409 'WHERE sortkey=? and is_link_placeholder=0 ' 410 'ORDER BY name DESC', 411 (anchor_key,) 412 ) # sort longest first 413 else: 414 c = self.db.execute( 415 'SELECT name, id FROM pages ' 416 'WHERE sortkey=? ' 417 'ORDER BY name DESC', 418 (anchor_key,) 419 ) # sort longest first 420 421 maxdepth = source.name.count(':') 422 depth = -1 # level where items were found 423 found = [] # candidates that match the link - these can only differ in case of the basename 424 for name, pid in c: 425 mydepth = name.count(':') 426 if mydepth > maxdepth: 427 continue 428 elif mydepth < depth: 429 break 430 431 if mydepth > 0: # check whether we have a common parent 432 parentname = name.rsplit(':', 1)[0] 433 if start.name.startswith(parentname): 434 depth = mydepth 435 found.append((name, pid)) 436 else: # resolve from root namespace 437 found.append((name, pid)) 438 439 if found: # try to match case first, else just use first match 440 parts = href.parts() 441 anchor = parts.pop(0) 442 for name, pid in found: 443 if name.endswith(anchor): 444 return (Path(name), pid, parts) 445 else: 446 name, pid = found[0] 447 return (Path(name), pid, parts) 448 449 else: 450 # Return "brother" of source 451 if relnames: 452 return (start, start_id, relnames[:-1] + href.parts()) 453 else: 454 return (start.parent, None, href.parts()) 455 456 def resolve_pagename(self, parent, names, parent_id=None): 457 '''Resolve a pagename in the right case''' 458 # We do not ignore placeholders here. This can lead to a dependencies 459 # in how links are resolved based on order of indexing. However, this 460 # is not really a problem. Ignoring them means you could see duplicates 461 # if the tree for multiple links with slightly different spelling. 462 # Also we would need another call to return the page_id if a resolved 463 # page happens to exist. 464 assert isinstance(parent, Path) 465 pagename = parent 466 page_id = parent_id or self.get_page_id(parent) 467 for i, basename in enumerate(names): 468 sortkey = natural_sort_key(basename) 469 candidates = self.db.execute( 470 'SELECT id, name FROM pages ' 471 'WHERE parent=? and sortkey=? ORDER BY name', 472 (page_id, sortkey) 473 ).fetchall() 474 475 exact = pagename.child(basename).name 476 for row in candidates: 477 if row['name'] == exact: 478 pagename = Path(row['name']) 479 page_id = row['id'] 480 break 481 else: 482 if candidates: # case insensitive match(es) 483 row = candidates[0] 484 pagename = Path(row['name']) 485 page_id = row['id'] 486 else: # no match 487 return None, pagename.child(':'.join(names[i:])) 488 else: 489 return page_id, pagename 490 491 def walk(self, parent_id): 492 # Need to do this recursive to preserve sorting 493 # else we could just do "name LIKE parent%" 494 for row in self.db.execute( 495 'SELECT * FROM pages WHERE parent=? ' 496 'ORDER BY sortkey, name', 497 (parent_id,) 498 ): 499 yield PageIndexRecord(row) 500 if row['n_children'] > 0: 501 for child in self.walk(row['id']): # recurs 502 yield child 503 504 def walk_bottomup(self, parent_id): 505 for row in self.db.execute( 506 'SELECT * FROM pages WHERE parent=? ' 507 'ORDER BY sortkey, name', 508 (parent_id,) 509 ): 510 if row['n_children'] > 0: 511 for child in self.walk_bottomup(row['id']): # recurs 512 yield child 513 yield PageIndexRecord(row) 514 515 516class PagesView(IndexView): 517 '''Index view that exposes the "pages" table in the index''' 518 519 def __init__(self, db): 520 IndexView.__init__(self, db) 521 self._pages = PagesViewInternal(db) 522 523 def lookup_by_pagename(self, pagename): 524 r = self.db.execute( 525 'SELECT * FROM pages WHERE name=?', (pagename.name,) 526 ).fetchone() 527 if r is None: 528 raise IndexNotFoundError 529 else: 530 return PageIndexRecord(r) 531 532 def list_pages(self, path=None): 533 '''Generator for child pages of C{path} 534 @param path: a L{Path} object 535 @returns: yields L{Path} objects for children of C{path} 536 @raises IndexNotFoundError: if C{path} is not found in the index 537 ''' 538 if path is None: 539 page_id = ROOT_ID 540 else: 541 page_id = self._pages.get_page_id(path) # can raise 542 return self._list_pages(page_id) 543 544 def _list_pages(self, page_id): 545 for row in self.db.execute( 546 'SELECT * FROM pages WHERE parent=? ORDER BY sortkey, name', 547 (page_id,) 548 ): 549 yield PageIndexRecord(row) 550 551 def n_list_pages(self, path=None): 552 page_id = self._pages.get_page_id(path or ROOT_PATH) 553 c, = self.db.execute( 554 'SELECT COUNT(*) FROM pages WHERE parent=?', (page_id,) 555 ).fetchone() 556 return c 557 558 def match_pages(self, path, text, limit=10): 559 '''Generator for child pages of C{path} that match C{text} in their name 560 @param path: a L{Path} object 561 @param text: a string 562 @param limit: max number of results 563 @returns: yields L{Path} objects for children of C{path} 564 @raises IndexNotFoundError: if C{path} is not found in the index 565 ''' 566 if path is None: 567 page_id = ROOT_ID 568 else: 569 page_id = self._pages.get_page_id(path) # can raise 570 return self._match_pages(page_id, text, limit) 571 572 def _match_pages(self, page_id, text, limit): 573 # The LIKE keyword does not handle unicode case-insensitivity 574 # therefore we need python lower() to do the job 575 for row in self.db.execute( 576 'SELECT * FROM pages WHERE parent=? and lowerbasename LIKE ? ORDER BY sortkey, name LIMIT ?', 577 (page_id, "%%%s%%" % text.lower(), limit) 578 ): 579 yield PageIndexRecord(row) 580 581 def match_all_pages(self, text, limit=10): 582 '''Like C{match_pages()} except not limited a specific namespace''' 583 for row in self.db.execute( 584 'SELECT * FROM pages WHERE lowerbasename LIKE ? ORDER BY length(name), sortkey, name LIMIT ?', 585 ("%%%s%%" % text.lower(), limit) 586 ): 587 yield PageIndexRecord(row) 588 589 def walk(self, path=None): 590 '''Generator function to yield all pages in the index, depth 591 first 592 593 @param path: a L{Path} object for the starting point, can be 594 used to only iterate a sub-tree. When this is C{None} the 595 whole notebook is iterated over 596 @returns: an iterator that yields L{Path} objects 597 @raises IndexNotFoundError: if C{path} does not exist in the index 598 ''' 599 # Need to do this recursive to preserve sorting 600 # else we could just do "name LIKE parent%" 601 page_id = self._pages.get_page_id(path) if path else ROOT_ID # can raise 602 return self._pages.walk(page_id) 603 604 def walk_bottomup(self, path=None): 605 page_id = self._pages.get_page_id(path) if path else ROOT_ID # can raise 606 return self._pages.walk_bottomup(page_id) 607 608 def n_all_pages(self): 609 '''Returns to total number of pages in the index''' 610 c, = self.db.execute('SELECT COUNT(*) FROM pages').fetchone() 611 return c - 1 # don't count ROOT 612 613 def get_has_previous_has_next(self, path): 614 if path.isroot: 615 raise ValueError('Can\'t use root') 616 617 r = self.db.execute( 618 'SELECT * FROM pages WHERE parent=? ' 619 'ORDER BY sortkey ASC, name ASC LIMIT 1', 620 (ROOT_ID,) 621 ).fetchone() 622 is_first = (r['name'] == path.name) if r else True 623 624 r = self.db.execute( 625 'SELECT * FROM pages WHERE parent=? ' 626 'ORDER BY sortkey DESC, name DESC LIMIT 1', 627 (ROOT_ID,) 628 ).fetchone() 629 is_last = (r['name'] == path.name) if r else True 630 631 return not is_first, not is_last 632 633 def get_previous(self, path): 634 '''Get the previous path in the index, in the same order that 635 L{walk()} will yield them 636 @param path: a L{Path} object 637 @returns: a L{Path} object or C{None} if {path} is the first page in 638 the index 639 ''' 640 # Find last (grand)child of previous item with same parent 641 # If no previous item, yield parent 642 if path.isroot: 643 raise ValueError('Can\'t use root') 644 645 r = self.db.execute( 646 'SELECT parent FROM pages WHERE name=?', (path.name,) 647 ).fetchone() 648 if r is None: 649 raise IndexNotFoundError('No such page: %s' % path) 650 else: 651 parent_id = r[0] 652 653 sortkey = natural_sort_key(path.basename) 654 r = self.db.execute(''' 655 SELECT * FROM pages WHERE parent=? and ( 656 sortkey<? or (sortkey=? and name<?) 657 ) ORDER BY sortkey DESC, name DESC LIMIT 1''', 658 (parent_id, sortkey, sortkey, path.name) 659 ).fetchone() 660 if not r: 661 parent = self._pages.get_pagename(parent_id) 662 return None if parent.isroot else parent 663 else: 664 while r['n_children'] > 0: 665 r = self.db.execute( 666 'SELECT * FROM pages WHERE parent=? ' 667 'ORDER BY sortkey DESC, name DESC LIMIT 1', 668 (r['id'],) 669 ).fetchone() 670 if r is None: 671 raise IndexConsistencyError('Missing children') 672 else: 673 return PageIndexRecord(r) 674 675 def get_next(self, path): 676 '''Get the next path in the index, in the same order that 677 L{walk()} will yield them 678 @param path: a L{Path} object 679 @returns: a L{Path} object or C{None} if C{path} is the last page in 680 the index 681 ''' 682 # If item has children, yield first child 683 # Else find next item with same parent 684 # If no next item, find next item for parent 685 if path.isroot: 686 raise ValueError('Can\'t use root') 687 688 r = self.db.execute( 689 'SELECT * FROM pages WHERE name=?', (path.name,) 690 ).fetchone() 691 if r is None: 692 raise IndexNotFoundError('No such page: %s' % path) 693 694 if r['n_children'] > 0: 695 r = self.db.execute( 696 'SELECT name FROM pages WHERE parent=? ' 697 'ORDER BY sortkey, name LIMIT 1', 698 (r['id'],) 699 ).fetchone() 700 if r is None: 701 raise IndexConsistencyError('Missing children') 702 else: 703 return PageIndexRecord(r) 704 else: 705 while True: 706 n = self.db.execute(''' 707 SELECT * FROM pages WHERE parent=? and ( 708 sortkey>? or (sortkey=? and name>?) 709 ) ORDER BY sortkey, name LIMIT 1''', 710 (r['parent'], r['sortkey'], r['sortkey'], r['name']) 711 ).fetchone() 712 if n is not None: 713 return PageIndexRecord(n) 714 elif r['parent'] == ROOT_ID: 715 return None 716 else: 717 r = self.db.execute( 718 'SELECT * FROM pages WHERE id=?', (r['parent'],) 719 ).fetchone() 720 if r is None: 721 raise IndexConsistencyError('Missing parent') 722 723 def lookup_from_user_input(self, name, reference=None): 724 '''Lookup a pagename based on user input 725 @param name: the user input as string 726 @param reference: a L{Path} in case relative links are supported as 727 customer input 728 @returns: a L{Path} object for C{name} 729 @raises ValueError: when C{name} would reduce to empty string 730 after removing all invalid characters, or if C{name} is a 731 relative link while no C{reference} page is given. 732 @raises IndexNotFoundError: when C{reference} is not indexed 733 ''' 734 # This method re-uses most of resolve_link() but is defined 735 # separate because it has a distinct different purpose. 736 # Only accidental that we treat user input as links ... ;) 737 href = HRef.new_from_wiki_link(name) 738 if reference is None and href.rel == HREF_REL_RELATIVE: 739 raise ValueError('Got relative page name without parent: %s' % name) 740 else: 741 source = reference or ROOT_PATH 742 id, pagename = self._pages.resolve_link( 743 source, href, ignore_link_placeholders=False) 744 return pagename 745 746 def resolve_link(self, source, href): 747 '''Find the end point of a link 748 Depending on the link type (absolute, relative, or floating), 749 this method first determines the starting point of the link 750 path. Then it goes downward doing a case insensitive match 751 against the index. 752 @param source: a L{Path} for the starting point of the link 753 @param href: a L{HRef} object for the link 754 @returns: a L{Path} object for the target of the link. 755 ''' 756 assert isinstance(source, Path) 757 assert isinstance(href, HRef) 758 id, pagename = self._pages.resolve_link(source, href) 759 return pagename 760 761 def create_link(self, source, target): 762 '''Determine best way to represent a link between two pages 763 @param source: a L{Path} object 764 @param target: a L{Path} object 765 @returns: a L{HRef} object 766 ''' 767 if target == source: # weird edge case .. 768 return HRef(HREF_REL_FLOATING, target.basename) 769 elif target.ischild(source): 770 return HRef(HREF_REL_RELATIVE, target.relname(source)) 771 else: 772 href = self._find_floating_link(source, target) 773 return href or HRef(HREF_REL_ABSOLUTE, target.name) 774 775 def _find_floating_link(self, source, target): 776 # Relative links only resolve for pages that have a common parent 777 # with the source page. So we start finding the common parents and 778 # if that does not resolve (e.g. because same name also occurs on a 779 # lower level) try one level up to "anchor" the link. 780 # It is absolute must to use resolve_link() here - this ensures the 781 # outcome is always consistent between these functions. 782 parentnames = [] 783 for n1, n2 in zip(source.parts, target.parts): 784 if n1 == n2: 785 parentnames.append(n1) 786 else: 787 break 788 789 def try_link(names): 790 assert names 791 href = HRef(HREF_REL_FLOATING, ':'.join(names)) 792 pid, pagename = self._pages.resolve_link(source, href) 793 return href if pagename == target else None 794 795 relnames = target.parts[len(parentnames):] 796 if not relnames: # Target is direct parent 797 relnames.insert(0, parentnames.pop()) 798 href = try_link(relnames) 799 if href is not None: 800 return href 801 else: 802 while parentnames: 803 relnames.insert(0, parentnames.pop()) 804 href = try_link(relnames) 805 if href: 806 return href 807 else: 808 return None # no floating link possible 809 810 def list_recent_changes(self, limit=None, offset=None): 811 assert not (offset and not limit), "Can't use offset without limit" 812 if limit: 813 selection = ' LIMIT %i OFFSET %i' % (limit, offset or 0) 814 else: 815 selection = '' 816 817 for row in self.db.execute( 818 'SELECT * FROM pages WHERE id<>1 ORDER BY mtime DESC' + selection, 819 ): 820 yield PageIndexRecord(row) 821 822 823try: 824 from gi.repository import Gtk 825except ImportError: 826 Gtk = None 827 828 829IS_PAGE = 1 #: Hint for MyTreeIter 830 831class PagesTreeModelMixin(TreeModelMixinBase): 832 833 # Optimize lookup for finding records in the same level 834 # - always cache parent, to retrieve other children more quickly 835 # - cache a range of 20 records at once 836 837 # Signals use "find_all" instead of "find" to allow for subclasses that 838 # have multiple entries, like models for tags 839 840 def __init__(self, index, root=None, reverse=False): 841 TreeModelMixinBase.__init__(self, index) 842 self._REVERSE = reverse 843 if root is None: 844 self._MY_ROOT_NAME = '' 845 self._MY_ROOT_NAME_C = '' 846 self._MY_ROOT_ID = ROOT_ID 847 else: 848 self._MY_ROOT_NAME = root.name 849 self._MY_ROOT_NAME_C = root.name + ':' 850 self._set_root_id() 851 852 self._deleted_paths = None 853 854 def _set_root_id(self): 855 myrow = self.db.execute( 856 'SELECT * FROM pages WHERE name=?', (self._MY_ROOT_NAME,) 857 ).fetchone() 858 self._MY_ROOT_ID = myrow['id'] if myrow else None 859 860 def connect_to_updateiter(self, index, update_iter): 861 self.connectto_all(update_iter.pages, 862 ('page-row-inserted', 'page-row-changed', 'page-row-delete', 'page-row-deleted') 863 ) 864 865 def on_page_row_inserted(self, o, row): 866 self.flush_cache() 867 if row['name'] == self._MY_ROOT_NAME: 868 self._set_root_id() 869 else: 870 for treepath in self._find_all_pages(row['name']): 871 treeiter = self.get_iter(treepath) # not mytreeiter ! 872 self.emit('row-inserted', treepath, treeiter) 873 if treepath[-1] == 0 and len(treepath) > 1: 874 self._check_parent_has_child_toggled(treepath, 1) 875 876 def _check_parent_has_child_toggled(self, treepath, count): 877 parent = self.get_mytreeiter(treepath[:-1]) 878 if parent.n_children == count: 879 treeiter = self.get_iter(parent.treepath) # not mytreeiter ! 880 self.emit('row-has-child-toggled', parent.treepath, treeiter) 881 882 def on_page_row_changed(self, o, row, oldrow): 883 # no clear cache here - just update row 884 for treepath in self._find_all_pages(row['name']): 885 treeiter = self.get_iter(treepath) # not mytreeiter ! 886 self.cache[tuple(treepath)].row = row # ensure uptodate info 887 self.emit('row-changed', treepath, treeiter) 888 889 def on_page_row_delete(self, o, row): 890 self._deleted_paths = list(self._find_all_pages(row['name'])) 891 892 def on_page_row_deleted(self, o, row): 893 # Technically "_deleted_paths" should always be a single path 894 # here, else two things changed at once, and Gtk.TreeView cannot 895 # always deal with that. 896 897 self.flush_cache() 898 if row['name'] == self._MY_ROOT_NAME: 899 self._MY_ROOT_ID = None 900 else: 901 for treepath in self._deleted_paths: 902 self.emit('row-deleted', treepath) 903 if treepath[-1] == 0 and len(treepath) > 1: 904 self._check_parent_has_child_toggled(treepath, 0) 905 906 self._deleted_paths = None 907 908 def n_children_top(self): 909 if self._MY_ROOT_ID is None: 910 return 0 911 else: 912 return self.db.execute( 913 'SELECT COUNT(*) FROM pages WHERE parent=?', (self._MY_ROOT_ID,) 914 ).fetchone()[0] 915 916 def get_mytreeiter(self, treepath): 917 if self._MY_ROOT_ID is None: 918 return None 919 920 treepath = tuple(treepath) # used to cache 921 if treepath in self.cache: 922 return self.cache[treepath] 923 924 # Find parent 925 parentpath = treepath[:-1] 926 if not parentpath: 927 parent_id = self._MY_ROOT_ID 928 else: 929 parent_iter = self.cache.get(parentpath, None) \ 930 or self.get_mytreeiter(parentpath) # recurs 931 if parent_iter: 932 parent_id = parent_iter.row['id'] 933 else: 934 return None 935 936 # Now cache a slice at the target level 937 offset = treepath[-1] 938 if self._REVERSE: 939 rows = self.db.execute(''' 940 SELECT * FROM pages WHERE parent=? 941 ORDER BY sortkey DESC, name DESC LIMIT 20 OFFSET ? 942 ''', 943 (parent_id, offset) 944 ) 945 else: 946 rows = self.db.execute(''' 947 SELECT * FROM pages WHERE parent=? 948 ORDER BY sortkey ASC, name ASC LIMIT 20 OFFSET ? 949 ''', 950 (parent_id, offset) 951 ) 952 for i, row in enumerate(rows): 953 mytreepath = tuple(parentpath) + (offset + i,) 954 if mytreepath not in self.cache: 955 self.cache[mytreepath] = MyTreeIter( 956 Gtk.TreePath(mytreepath), 957 row, 958 row['n_children'], 959 IS_PAGE 960 ) 961 else: 962 break # avoid overwriting cache because of ref count 963 964 return self.cache.get(treepath, None) 965 966 def find(self, path): 967 '''Returns the C{Gtk.TreePath} for a notebook page L{Path} 968 If the L{Path} appears multiple times returns the first occurrence 969 @raises IndexNotFoundError: if path not found 970 ''' 971 if path.isroot: 972 raise ValueError 973 treepaths = sorted(self._find_all_pages(path.name)) 974 try: 975 return treepaths[0] 976 except IndexError: 977 raise IndexNotFoundError(path) 978 979 def find_all(self, path): 980 '''Returns a list of C{Gtk.TreePath} for a notebook page L{Path} 981 Returns all occurrences in the treeview 982 @raises IndexNotFoundError: if path not found 983 ''' 984 if path.isroot: 985 raise ValueError 986 treepaths = self._find_all_pages(path.name) 987 if not treepaths: 988 raise IndexNotFoundError(path) 989 else: 990 return treepaths 991 992 def _find_all_pages(self, name, update_cache=True): 993 if self._MY_ROOT_ID is None or \ 994 not name.startswith(self._MY_ROOT_NAME_C): 995 return [] 996 997 parent_id = self._MY_ROOT_ID 998 names = name[len(self._MY_ROOT_NAME_C):].split(':') 999 treepath = [] 1000 for i, basename in enumerate(names): 1001 # Get treepath 1002 name = self._MY_ROOT_NAME_C + ':'.join(names[:i + 1]) 1003 myrow = self.db.execute( 1004 'SELECT * FROM pages WHERE name=?', (name,) 1005 ).fetchone() 1006 if myrow is None: 1007 raise IndexNotFoundError 1008 1009 sortkey = myrow['sortkey'] 1010 if self._REVERSE: 1011 row = self.db.execute(''' 1012 SELECT COUNT(*) FROM pages 1013 WHERE parent=? and ( 1014 sortkey>? or (sortkey=? and name>?) 1015 )''', 1016 (parent_id, sortkey, sortkey, name) 1017 ).fetchone() 1018 else: 1019 row = self.db.execute(''' 1020 SELECT COUNT(*) FROM pages 1021 WHERE parent=? and ( 1022 sortkey<? or (sortkey=? and name<?) 1023 )''', 1024 (parent_id, sortkey, sortkey, name) 1025 ).fetchone() 1026 treepath.append(row[0]) 1027 parent_id = myrow['id'] 1028 1029 if update_cache: 1030 # Update cache (avoid overwriting because of ref count) 1031 mytreepath = tuple(treepath) 1032 if mytreepath not in self.cache: 1033 myiter = MyTreeIter( 1034 Gtk.TreePath(mytreepath), 1035 myrow, 1036 myrow['n_children'], 1037 IS_PAGE 1038 ) 1039 self.cache[mytreepath] = myiter 1040 1041 return [Gtk.TreePath(treepath)] 1042 1043 1044######################################################################## 1045 1046class TestPagesDBTable(object): 1047 # Mixin for test cases, defined here to have all SQL in one place 1048 1049 def assertPagesDBConsistent(self, db): 1050 for row in db.execute('SELECT * FROM pages'): 1051 count, = db.execute( 1052 'SELECT count(*) FROM pages WHERE parent=?', 1053 (row['id'],) 1054 ).fetchone() 1055 self.assertEqual(row['n_children'], count, 1056 'Count for "%s" is %i while n_children=%i' % (row['name'], row['n_children'], count) 1057 ) 1058 1059 if row['source_file'] is not None: 1060 self.assertFalse(row['is_link_placeholder'], 1061 'Placeholder status for %s is wrong (has source itself)' % row['name'] 1062 ) 1063 elif not row['is_link_placeholder']: 1064 # Check downwards - at least one child that is not a placeholder either 1065 child = db.execute( 1066 'SELECT * FROM pages WHERE parent=? and is_link_placeholder=?', 1067 (row['id'], False), 1068 ).fetchone() 1069 self.assertIsNotNone(child, 1070 'Missing child with source for %s' % row['name']) 1071 1072 if row['id'] > 1: 1073 parent = db.execute( 1074 'SELECT * FROM pages WHERE id=?', 1075 (row['id'],) 1076 ).fetchone() 1077 self.assertIsNotNone(parent, 1078 'Missing parent for %s' % row['name']) 1079 1080 if not row['is_link_placeholder']: 1081 # Check upwards - parent(s) must not be placeholder either 1082 self.assertFalse(parent['is_link_placeholder'], 1083 'Placeholder status for parent of %s is inconsistent' % row['name'] 1084 ) 1085 1086 def assertPagesDBEquals(self, db, pages): 1087 rows = db.execute('SELECT * FROM pages WHERE id>1').fetchall() 1088 in_db = set(r['name'] for r in rows) 1089 self.assertEqual(in_db, set(pages)) 1090 1091 def assertPagesDBContains(self, db, pages): 1092 rows = db.execute('SELECT * FROM pages WHERE id>1').fetchall() 1093 in_db = set(r['name'] for r in rows) 1094 self.assertTrue(set(pages).issubset(in_db)) 1095