1#!/usr/bin/env python2 2 3import copy 4import operator 5import os 6import os.path 7import pickle 8import string 9import sys 10 11# Constant for C++ files. 12FILETYPE_CPP = 2 13# Constant for DDDOC files. 14FILETYPE_DDDOC = 1 15# Constant for none of the above. 16FILETYPE_OTHER = 0 17 18SOURCE_ENCODING = 'iso8859-1' 19 20# Extension of C++ files. 21CPP_EXTS = ['c', 'C', 'cpp', 'CPP', 'c++', 'C++', 'h', 'H', 'hpp', 'HPP', 22 'h++', 'H++'] 23# Extensions of DDDOC files. 24DDDOC_EXTS = ['dddoc', 'DDDOC'] 25 26# List of ignored directory names. 27IGNORED_DIRS = ['CSV', '.svn', 'seeds2', 'find2', 'cmake'] 28 29DATA = None 30ID = 0 31 32# Text attribute node keys. 33TEXT_ATTRIBUTE_KEYS = set(['text', 'table', 'tableheader', 'code', 'console', 'section', 34 'subsection', 'image', 'contents', 'note', 'file', 'snippet', 35 'output']) 36 37# Nodes having paths matching the following patterns are considered text 38# container nodes. Their children having only one more component which is in 39# TEXT_ATTRIBUTE_KEYS are processed in a special way. The last component is 40# replaced with 'text' and their content is prefixed by "type=$key:" where $key 41# is the original key. The content of the text container nodes is prefixed with 42# "type=$text:" and moved to a child with key 'text'. 43TEXT_CONTAINER_PATHS = [ 44 'Indexpage.*.description', 45 'Page.*.description', 46 'Page.*.summary', 47 'Page.*.glossary.*', 48 'Function.*.example', 49 'Function.*.summary', 50 'Function.*.description', 51 'Function.*.remarks', 52 'Function.*.status', 53 'Class.*.example', 54 'Class.*.summary', 55 'Class.*.description', 56 'Class.*.remarks', 57 'Class.*.status', 58 'Metafunction.*.example', 59 'Metafunction.*.summary', 60 'Metafunction.*.description', 61 'Metafunction.*.remarks', 62 'Metafunction.*.status', 63 'Memfunc.*.example', 64 'Memfunc.*.summary', 65 'Memfunc.*.description', 66 'Memfunc.*.remarks', 67 'Memfunc.*.status', 68 'Memvar.*.example', 69 'Memvar.*.summary', 70 'Memvar.*.description', 71 'Memvar.*.remarks', 72 'Memvar.*.status', 73 'Macro.*.example', 74 'Macro.*.summary', 75 'Macro.*.description', 76 'Macro.*.remarks', 77 'Macro.*.status', 78 'Enum.*.example', 79 'Enum.*.summary', 80 'Enum.*.description', 81 'Enum.*.remarks', 82 'Enum.*.status', 83 'Spec.*.example', 84 'Spec.*.summary', 85 'Spec.*.description', 86 'Spec.*.remarks', 87 'Spec.*.status', 88 'Shortcut.*.example', 89 'Shortcut.*.summary', 90 'Shortcut.*.description', 91 'Shortcut.*.remarks', 92 'Shortcut.*.status', 93 'Tag.*.example', 94 'Tag.*.summary', 95 'Tag.*.description', 96 'Tag.*.remarks', 97 'Tag.*.status', 98 'Typedef.*.example', 99 'Typedef.*.summary', 100 'Typedef.*.description', 101 'Typedef.*.remarks', 102 'Typedef.*.status', 103 'Demo.*.summary', 104 'Demo.*.description', 105 'Demo.*.remarks', 106 'Demo.*.output', 107 'Adaption.*.example', 108 'Adaption.*.summary', 109 'Adaption.*.description', 110 'Adaption.*.remarks', 111 'Adaption.*.status', 112 'Concept.*.example', 113 'Concept.*.summary', 114 'Concept.*.description', 115 'Concept.*.remarks', 116 'Concept.*.status', 117 ] 118 119def _pathsMatch(path1, path2): 120 """Compare two paths with wildcards.""" 121 if not type(path1) is list: 122 path1 = splitKeys(path1[int(path1[0] == '.'):], '.') # Strip leading '.', if any. 123 if not type(path2) is list: 124 path2 = splitKeys(path2[int(path2[0] == '.'):], '.') 125 if len(path1) != len(path2): 126 return False 127 for i, p1 in enumerate(path1): 128 p2 = path2[i] 129 if not (p1 == '*' or p2 == '*' or p1 == p2): 130 return False 131 return True 132 133 134def transformDddocEntry(entry): 135 """Performs the text container node transformations. 136 137 Returns list of entries to add if any. 138 """ 139 for path in TEXT_CONTAINER_PATHS: 140 if _pathsMatch(path, entry.path) and entry.content: # Is text container. 141 new_entry = copy.deepcopy(entry) 142 new_entry.content = 'type=text:' + entry.content 143 entry.content = '' 144 return [new_entry] # Done. 145 if not entry.path[-1] in TEXT_ATTRIBUTE_KEYS: 146 continue # Skip if last component does not match. 147 if not _pathsMatch(path, entry.path[:-1]): 148 continue # Skip non-matching path. 149 # If we reach here, it is a text node. 150 ## print 'TRANSFORMING ', entry 151 last = entry.path[-1] 152 entry.path = entry.path[:-1] 153 entry.content = 'type=' + last + ':' + entry.content 154 ## print ' to ', entry 155 return [] # Done 156 return [] # No updates. 157 158 159class FileCache(object): 160 """Simple file contents cache. 161 162 Maps paths to (mtime, file contents) pairs. 163 164 Attrs: 165 166 path Path to the cache file. 167 content Dict with cache content mapping file name to pair of mtime 168 and data associated with the cache. 169 """ 170 171 def __init__(self, path): 172 self.path = path 173 self.content = {} 174 self._tryLoad() 175 176 def _tryLoad(self): 177 try: 178 with open(self.path, 'rb') as f: 179 self.content = pickle.load(f) 180 except: 181 print >>sys.stderr, 'Could not load cache %s' % self.path 182 return False 183 print >>sys.stderr, 'Successfully loaded cache %s' % self.path 184 return True 185 186 def flush(self): 187 """Store the cache to its file.""" 188 try: 189 with open(self.path, 'wb') as f: 190 pickle.dump(self.content, f) 191 except: 192 print >>sys.stderr, 'Could not store cache %s' % self.path 193 return False 194 print >>sys.stderr, 'Successfully stored cache %s' % self.path 195 return True 196 197 def has_key(self, key): 198 """Returns True if the cache has data for this key.""" 199 return self.content.has_key(key) 200 201 def isFresh(self, filename): 202 """Returns True if the cache is fresh. 203 204 The cache is fresh if the file at the given path is not newer than the 205 data in the cache. 206 """ 207 if not self.has_key(filename): 208 return False 209 mtime = os.stat(filename).st_mtime 210 return mtime >= self.content[filename][0] 211 212 def get(self, key, defaultValue=None): 213 """Return content of the given entry.""" 214 return self.content.get(key, (None, defaultValue))[1] 215 216 def set(self, filename, value): 217 """Set cache content and mtime.""" 218 mtime = os.stat(filename).st_mtime 219 self.content[filename] = (mtime, value) 220 221 222class DddocEntry(object): 223 def __init__(self, path, content, filename, line_no_begin, line_no_end): 224 self.path = path 225 self.content = content 226 self.filename = filename 227 self.line_no_begin = line_no_begin 228 self.line_no_end = line_no_end 229 230 def __str__(self): 231 tpl = ('DddocEntry(path=%s, content=%s, filename=%s, line_no_begin=%s, ' 232 'line_no_end=%s)') 233 values = (self.path, self.content, self.filename, self.line_no_begin, 234 self.line_no_end) 235 return tpl % tuple(map(repr, values)) 236 237 def __repr__(self): 238 return self.__str__() 239 240 @classmethod 241 def cmpPathLocation(klass, lhs, rhs): 242 """Comparator, by entry path then filename and line number.""" 243 lhs_t = (lhs.path, lhs.filename, lhs.line_no_begin) 244 rhs_t = (rhs.path, rhs.filename, rhs.line_no_begin) 245 if lhs_t < rhs_t: 246 return -1 247 elif lhs_t > rhs_t: 248 return 1 249 else: 250 return 0 251 252 253def splitKeys(text, delimiters, limit=None, _cache={}): 254 """Splitting that considers escaping of keys using quotes. 255 256 >>> splitKeys('.Adaption.\'std::string\'.summary') 257 ['', 'Adaption', '\'std::string\'', 'summary'] 258 """ 259 if '\u0001' in text: 260 text = text.split('\u0001', 1)[0] # Remove optional label, used in inheritance. 261 if _cache.has_key((text, delimiters)): 262 return _cache[(text, delimiters)] 263 count = 0 264 current = [] 265 result = [] 266 str_delimiter = None 267 for i in range(0, len(text)): 268 # Handle text in strings. 269 if str_delimiter: 270 if text[i] == str_delimiter: 271 str_delimiter = None 272 current.append(text[i]) 273 continue 274 elif text[i] in '\'"': 275 str_delimiter = text[i] 276 current.append(text[i]) 277 continue 278 # Handle non-in-string text. 279 if text[i] in delimiters: 280 result.append(''.join(current)) 281 current = [] 282 count += 1 283 if limit and count >= limit: 284 result.append(text[i+1:]) 285 _cache[(text, delimiters)] = result 286 return result 287 else: 288 current.append(text[i]) 289 result.append(''.join(current)) 290 _cache[(text, delimiters)] = result 291 return result 292 293 294def cleanPath(path_arr): 295 """Takes a list with a path and cleans its element. 296 297 Cleaning its element currently only consists in removing singel and double 298 quotes. 299 """ 300 def _cleanPathElement(x): 301 return x.strip().replace('\'', '').replace('"', '') 302 return map(_cleanPathElement, path_arr) 303 304 305class FileLoader(object): 306 """File loader helper class. 307 308 Attrs: 309 310 cache FileCache to use for caching. 311 entries List of DddocEntries objects. 312 """ 313 314 def __init__(self, cache): 315 self.cache = cache 316 self.entries = [] 317 318 def _loadDDDOCFile(self, filename, cache): # TODO(holtgrew): Make Top-Level Function? 319 # Try to load from cache. 320 if cache.isFresh(filename): 321 return cache.get(filename) 322 323 # Load file. 324 with open(filename, 'rb') as f: 325 text = [x.decode(SOURCE_ENCODING).encode("ascii", "xmlcharrefreplace") for x in f.readlines()] 326 cache.set(filename, text) 327 return text 328 329 def _loadCPPFile(self, filename, cache): # TODO(holtgrew): Make Top-Level Function? 330 if cache.isFresh(filename): 331 return cache.get(filename) 332 333 # TODO(holtgrew): This looks overly complicated. 334 f = open(filename) 335 lines = [x.decode(SOURCE_ENCODING).encode("ascii", "xmlcharrefreplace") for x in f.readlines()] 336 f.close() 337 338 ret = [] 339 340 #test for SEQAN_NO_DDDOC 341 for line in lines: 342 if line.find("SEQAN_NO_DDDOC") >= 0: 343 cache.set(filename, ret) 344 return ret; 345 346 347 incomment = False 348 innextcomment = False 349 inextract = False 350 351 for line in lines: 352 line = line.rstrip() 353 str_line = "" 354 if len(line) == 0: 355 if not innextcomment and not incomment: 356 str_line = "." 357 else: 358 str_line = " " 359 360 while len(line) > 0 : 361 if innextcomment: 362 if line[len(line)-1] == "\\" : 363 if inextract: str_line += line[: len(line)-1] 364 else: 365 if inextract: str_line += line 366 innextcomment = False 367 break 368 369 elif incomment: 370 pos1 = line.find("*/") 371 if pos1 < 0: 372 if inextract: str_line += line; 373 break; 374 else: 375 if inextract: 376 str_line += line[:pos1]; 377 line = line[pos1 + 3:]; 378 else: 379 line = line[pos1 + 2:]; 380 incomment = False; 381 382 else: 383 pos1 = line.find("/*") 384 pos2 = line.find("//") 385 pos3 = line.find('"') 386 if (pos1 >= 0) and ((pos2 < 0) or (pos1 < pos2)) and ((pos3 < 0) or (pos1 < pos3)): 387 pos9 = line.find("*/", pos1 + 2) 388 if (len(line) > pos1 + 2): 389 inextract = (line[pos1 + 2] == "/") or (line[pos1 + 2] == "*") 390 else: 391 inextract = False 392 if pos9 < 0 : 393 if inextract: str_line += line[pos1 + 3:] 394 incomment = True 395 break 396 else: 397 if inextract: 398 str_line += line[pos1 + 3: pos3] 399 line = line[pos9 + 3:] 400 else: 401 line = line[pos9 + 2:] 402 403 elif (pos2 >= 0) and ((pos3 < 0) or (pos2 < pos3)): 404 pos2b = pos2 + 2; 405 while ((pos2b < len(line)) and ((line[pos2b] == "/") or (line[pos2b] == "*"))): 406 pos2b += 1 407 inextract = (pos2b > pos2 + 2) 408 if line[len(line)-1] == "\\" : 409 if inextract: str_line += line[pos2b: len(line)-1] 410 innextcomment = True 411 else: 412 if inextract: str_line += line[pos2b:] 413 break 414 415 elif pos3 >= 0: 416 pos9 = line.find('"', pos3 + 2) 417 if pos9 < 0: 418 line = line[pos9+1:] 419 break 420 else: 421 break 422 423 else: 424 break 425 426 ret = ret + [str_line] 427 428 cache.set(filename, ret) 429 return ret 430 431 def _getFileType(self, filename): # TODO(holtgrew): Make Top-Level Function? 432 """Determines file type from filename. 433 434 Determines the file type from the extension of the given filename. 435 436 >>> getFileType('test.cpp') == FILETYPE_CPP 437 True 438 >>> getFileType('path/file.h') == FILETYPE_CPP 439 True 440 >>> getFileType('test.dddoc') == FILETYPE_DDDOC 441 True 442 443 Args: 444 445 filename Filename to parse. 446 447 Returns: 448 449 One of {FILETYPE_CPP, FILETYPE_DDDOC, FILETYPE_OTHER}, depending 450 on the extension of filename. 451 """ 452 # Get file extension. 453 base, ext = os.path.splitext(filename) 454 if ext[1:] in CPP_EXTS: 455 return FILETYPE_CPP 456 elif ext[1:] in DDDOC_EXTS: 457 return FILETYPE_DDDOC 458 else: 459 return FILETYPE_OTHER 460 461 def _loadFile(self, filename): 462 """Load the file with the given filename. 463 464 The line is then split into DDDoc entries, unwrapping entries that span 465 more than one line. Finally, the keys are expanded, and surrounding 466 whitespace is stripped. 467 """ 468 ## print filename 469 # Load file contents, through a cache. 470 file_type = self._getFileType(filename) 471 if file_type == FILETYPE_CPP: 472 text = self._loadCPPFile(filename, self.cache) 473 elif file_type == FILETYPE_DDDOC: 474 text = self._loadDDDOCFile(filename, self.cache) 475 else: 476 raise Error("Unknown file type of file %s." % path) 477 text.append('.') 478 ## print 'LOADING', filename 479 ## print '\n'.join(text) 480 481 # Process all lines in the input, join lines that do not begin with a 482 # dot with the previous ones. This allows the wrapping of lines. 483 str = False 484 dddoc_entries = [] # [(path, filename, begin line no, end line no)] 485 line_no_begin, line_no_end = 1, 1 486 for line in text: 487 ## if line and line != '.': 488 ## print 'LINE', line 489 line_no_end += 1 490 if not line: 491 continue 492 if line[0] == '.': 493 if str is not False and str[0] == '.' and str != '.' and str.strip(): # Skip empty dummy lines. 494 dddoc_entries.append([str, filename, line_no_begin, line_no_end]) 495 ## print dddoc_entries[-1] 496 line_no_begin = line_no_end 497 str = line 498 if str == '.': 499 str = False 500 elif str: 501 if str[-1] != '\n': 502 str += '\n' 503 str += line 504 505 # Now, expand the keys of dddoc_entries, e.g. dddoc_entries[i][0]. 506 # TODO(holtgrew): Consider escaping of keys here. 507 stack = [] 508 stack_len_sum = 0 509 for entry in dddoc_entries: 510 ## print 'ENTRY', entry 511 ## print 'stack=%s' % (stack) 512 # Split out $key:$value of the entry and $the.$path.$elements from $key. 513 maybe_pair = splitKeys(entry[0].strip(), ':', 1) 514 if len(maybe_pair) == 2: 515 key, value = splitKeys(entry[0].strip(), ':', 1) 516 else: 517 key, value = entry[0].strip(), '' 518 path = splitKeys(key, '.')[1:] 519 # Count empty entries in the path. 520 ## print ' ', path 521 empty_count = reduce(operator.add, [1 for x in path if not x], 0) 522 ## print ' empty_count', empty_count 523 if empty_count <= len(stack): 524 stack = stack[:empty_count] 525 stack_len_sum = reduce(operator.add, map(len, stack), 0) 526 stack.append(path[empty_count:]) 527 stack_len_sum += len(stack[-1]) 528 path = reduce(operator.add, stack, []) 529 # Remove any leading and trailing whitespace from value and compute 530 # updated begin and end line no. 531 line_count = len(value.splitlines()) 532 value_no_leading = value.lstrip() 533 line_count2 = len(value_no_leading.splitlines()) 534 line_no_begin = entry[2] + line_count - line_count2 535 value_no_trailing = value_no_leading.rstrip() 536 line_count3 = len(value_no_trailing.splitlines()) 537 line_no_end = entry[3] - line_count2 + line_count3 538 539 # Store the DDDoc entry. 540 if path: 541 self.entries.append(DddocEntry(cleanPath(path), value_no_trailing, filename, line_no_begin, line_no_end)) 542 new_entries = transformDddocEntry(self.entries[-1]) 543 ## if new_entries: 544 ## print 'NEW ENTRIES', new_entries 545 self.entries += new_entries 546 ## print self.entries[-1] 547 548 def run(self, search_path): 549 """Call parseFile() on files. 550 551 All files below search_path will be searched that have file type 552 FILETYPE_CPP or FILETYPE_DOC as determined by getFileType(). 553 Directories with names of IGNORED_DIRS are skipped. 554 555 Args: 556 search_path String, path to search files under. 557 """ 558 for root, dirs, files in os.walk(search_path): 559 # Parse all files. 560 for file in files: 561 if os.path.basename(file).startswith('.'): 562 continue # Skipp hidden files. 563 path = os.path.join(root, file) 564 if self._getFileType(path) in [FILETYPE_CPP, FILETYPE_DDDOC]: 565 self._loadFile(path) 566 # Exclude ignored diretories. 567 for ignored in IGNORED_DIRS: 568 if ignored in dirs: 569 dirs.remove(ignored) 570 571 572class DddocTreeNode(object): 573 """Represents one entry in the DddocTree. 574 575 Attrs: 576 577 tree The DddocTree that the node belongs to. 578 key The key of this child, last element of path. 579 path The full path to the child. 580 entry Range [beg, end) of DddocEntry that this node represents. 581 children dict with the children as key/value pairs. 582 texts Array of strings with the texts. 583 """ 584 585 def __init__(self, tree, key, path, entry, children={}): 586 self.tree = tree 587 self.key = key 588 self.path = path 589 self.entry = entry 590 self.children = children 591 self.texts = [] 592 593 def text(self, spacer=' '): 594 return spacer.join(self.texts) 595 596 def __str__(self): 597 """Returns dump for the whole tree in a user-readable manner.""" 598 def _str(node, level=0, prefix=''): 599 space = ' ' * level 600 if prefix: 601 prefix = prefix + ' --> ' 602 res = '%s %sDddocTreeNode(key=%s, texts=%s)' % (space, prefix, repr(node.key), repr(node.texts)) 603 for k, child in node.children.iteritems(): 604 res += '\n' + _str(child, level + 1, k) 605 return res 606 return _str(self) 607 608 def dump(self, stream=sys.stdout): 609 """Debug recursive dumping of a tree node.""" 610 print >>stream, self 611 612 613class DddocTree(object): 614 """Tree with the information from the DDDoc contents. 615 616 Attrs: 617 618 entries The raw entries. 619 root The root DddocTreeNode. 620 glossary_nodes List of nodes that contain glossary entries. Built 621 in finalize(). 622 """ 623 624 def __init__(self, entries): 625 self.entries = entries 626 #for e in self.entries: 627 # print e 628 self.root = DddocTreeNode(self, 'ROOT', [], (0, 0), self._buildSubtree([], 0, len(entries), 0)) 629 self.cache = None 630 self.glossary_nodes = [] 631 ## self.root.dump() 632 ## for entry in self.entries: 633 ## print entry.path, entry.content 634 635 def _enableFindCache(self): 636 if self.cache is None: 637 self.cache = {} 638 639 def finalize(self): 640 """Called after tree will not be modified any more. 641 642 Enables caching and builds some indices. 643 """ 644 self._enableFindCache() 645 print >>sys.stderr, 'Indexing Glossary Pages' 646 if 'Page' in self.root.children: 647 for key, node in self.root.children['Page'].children.iteritems(): 648 if 'glossary' in node.children: 649 self.glossary_nodes.append(node.children['glossary']) 650 print >>sys.stderr, ' Found Page.%s' % node.key 651 652 def _buildSubtree(self, path, begin_index, end_index, level): 653 # First, identify the entries belonging to each node (entry.path[i] are 654 # equal for i = level, inductively, also i <= level). 655 prev_key = None 656 prev_beg = None 657 subseqs = [] 658 for i in range(begin_index, end_index): 659 if prev_key != self.entries[i].path[level]: 660 if prev_key != None: 661 subseqs.append((prev_beg, i)) 662 prev_key = self.entries[i].path[level] 663 prev_beg = i 664 if prev_key != None and prev_beg != end_index: # Handle last. 665 subseqs.append((prev_beg, end_index)) 666 # Now, subseqs contains a sequence of contiguous half-open intervals. 667 # Each contains the data for one tree node. There is a possibly empty 668 # sequence of leading entries with paths of length level + 1 containing 669 # the data for the current level node. The rest is for the level below. 670 result = {} 671 for (b, c) in subseqs: 672 assert b != c 673 # Split into entries for this and for next level: [a, b); [b, c). 674 a = b # [a, b) will be for this vertex. 675 while b < c and len(self.entries[b].path) == level + 1: 676 b += 1 677 # Compute node. 678 path = self.entries[a].path[:(level + 1)] 679 key = path[level] 680 node = DddocTreeNode(self, key, path, (a, b)) 681 ## print 'new node', key 682 for i in range(a, b): 683 if self.entries[i].content: 684 node.texts.append(self.entries[i].content) 685 # Compute subtree. 686 node.children = self._buildSubtree(path, b, c, level + 1) 687 result[key] = node 688 return result 689 690 def find(self, path): 691 """Query tree for a DddocTreeNode. 692 693 The argument path can either be a dot-separated string or a list with 694 this information. If path is a string then one optional leading dot is 695 optional. Returns None if nothing could be found. 696 697 tree.find(['path', 'to', 'node']) 698 tree.find('path.to.node') 699 tree.find('.path.to.node') 700 """ 701 ## print 'FIND(%s)' % repr(path) 702 # Try to retrieve from cache if there is a cache. 703 if not self.cache is None: 704 if not type(path) is str: 705 key = '.'.join(path) 706 else: 707 key = path 708 if self.cache.has_key(key): 709 return self.cache[key] 710 # Split path if is string, ignore leading dot if any. 711 if type(path) is str: 712 path = splitKeys(path, '.') 713 if path and path[0] == '': 714 path = path[1:] 715 # Now, query the tree. 716 def findRecurse(node, path): 717 """Helper function that searches for the node with given path.""" 718 if not path: 719 return node 720 if not node.children.has_key(path[0]): 721 return None 722 return findRecurse(node.children[path[0]], path[1:]) 723 res = findRecurse(self.root, path) 724 if not self.cache is None: 725 self.cache['.'.join(path)] = res 726 return res 727 728# Paths where the inline summary is moved into a .summary child. See 729# documentation of processInlineSummaries() for details. 730SUMMARY_PATHS = [ 731 '*.*.param.*', 732 '*.*.returns', 733 '*.*.tag.*', 734 '*.*.value.*', 735 '*.*.returns.param.*', # TODO(holtgrew): Used for metafunctions, could be improved. 736 'Adaption.*', 737 'Class.*', 738 'Concept.*', 739 'Demo.*', 740 'Enum.*', 741 'Function.*', 742 'Macro.*', 743 'Memfunc.*', 744 'Metafunction.*', 745 'Shortcut.*', 746 'Spec.*', 747 'Tag.*', 748 ] 749 750# TODO(holtgrew): Also use for generateAutomaticReferences() 751 752def _matchTreesInNode(tree, node, path, func, block_paths=[['globals']], level=0): 753 """Calls func on nodes matching path.""" 754 ## print ' ' * level, '_matchTreesInNode(tree', node.path, path, func, level, ')' 755 if path: 756 if path[0] == '*': 757 for child in node.children.itervalues(): 758 _matchTreesInNode(tree, child, path[1:], func, block_paths, level+1) 759 else: 760 if node.children.has_key(path[0]): 761 _matchTreesInNode(tree, node.children[path[0]], path[1:], func, block_paths, level+1) 762 else: 763 for block_path in block_paths: 764 ## print node.path[:len(block_path)], '==', block_path 765 if node.path[:len(block_path)] == block_path: 766 return # Path is blocked. 767 func(node) 768 769 770def processInlineSummaries(tree, paths): 771 """Move inline documentation to .summary subpaths. 772 773 The nodes matching the values in path are such that inline values are moved 774 to .summary subnodes for greater consistency and lower variance. 775 776 E.g. the following: 777 778 .Function.f.param.x:This is param x. 779 780 will be transformed into 781 782 .Function.f.param.x 783 ..summary:This is param x. 784 """ 785 # First, collect nodes for the summary transfer. 786 collected_nodes = set() 787 def f(node): 788 if node.texts: 789 collected_nodes.add(node) 790 for path in paths: 791 _matchTreesInNode(tree, tree.root, splitKeys(path, '.'), f) 792 # Then, move the inline summaries into a summary node. 793 for node in collected_nodes: 794 if not 'summary' in node.children: # Create node if necessary. 795 summaryNode = DddocTreeNode(tree, 'summary', node.path + ['summary'], (-2,-2)) 796 node.children['summary'] = summaryNode 797 node.children['summary'].texts += node.texts 798 node.texts = [] 799 800 801def generateAutomaticReferences(tree): 802 """Interpret the globals.relations entries.""" 803 print >>sys.stderr, 'Generating Automatic References' 804 relations_node = tree.find('globals.relations') 805 if not relations_node: 806 return # Empty, do nothing. 807 808 # We first collect all automatic links, scheduled to be added later. 809 additions = [] 810 def appendToAdditions(node): 811 for node_path in node.texts: 812 node_path = splitKeys(node_path, '.') 813 ## print ' ' * level, ' ', node_path 814 res = tree.find(node_path) 815 ## print ' ' * level, ' ', res is not None 816 if not res: 817 continue # Not found, Skip # TODO(holtgrew): Warning? 818 additions.append((res.path + [key], '.'.join(node.path[:2]))) 819 for key, node in relations_node.children.iteritems(): 820 ## print 'GENERATE', key, node 821 for txt in node.texts: 822 path = splitKeys(txt, '.') 823 _matchTreesInNode(tree, tree.root, splitKeys(txt, '.'), appendToAdditions) 824 825 # Now, add these additions. This circumvents problems leading to infinite 826 # recursions. 827 for path, text in additions: 828 res = tree.find(path) 829 if not res: 830 parent = tree.find(path[:-1]) 831 assert parent 832 res = DddocTreeNode(tree, path[-1], path, None) 833 parent.children[path[-1]] = res 834 if not text in res.texts: 835 res.texts.append(text) 836 837 838def generateInheritedElements(tree): 839 """Push through inheritances.""" 840 print >>sys.stderr, 'Linking Inherited Entities' 841 inherit_node = tree.find('globals.inherit') 842 # Contains children: $TARGET_FIELD:$THROUGH_FIELD.$SOURCE_FIELD 843 844 all_paths = set() 845 depends_on = {} 846 inheritance_rules = [] 847 848 # First build a dependency graph. 849 for target_field, child in inherit_node.children.items(): 850 for txt in child.texts: 851 arr = splitKeys(txt, '.') 852 through_field = arr[0] 853 if len(arr) > 1: 854 source_field = arr[1] 855 else: 856 source_field = target_field 857 inheritance_rules.append((target_field, through_field, source_field)) 858 def registerDependencies(node): 859 all_paths.add('.'.join(node.path)) 860 if not through_field in node.children: 861 return 862 for path in node.children[through_field].texts: 863 pth = '.'.join(node.path) 864 depends_on.setdefault(pth, set()).add(path) 865 _matchTreesInNode(tree, tree.root, ['*', '*'], registerDependencies) 866 ## print 'ALL PATHS', all_paths 867 868 # Now, push through references by inheritance for all paths that are not 869 # linked to and not completed yet. 870 done = set() 871 to_do = all_paths - done - set(depends_on.keys()) 872 while to_do: 873 # Process all that are not completed and have no dependencies. 874 if not to_do: 875 raise Exception('Could not process all dependencies. Cyclic dependencies?') 876 # Actually perform the preprocessing. 877 for target_path in to_do: 878 for target_field, through_field, source_field in inheritance_rules: 879 target_node = tree.find(target_path) 880 if not through_field in target_node.children: 881 continue # Skip if no source children. 882 ## print 'TRYING', target_path, through_field, source_field 883 for source_path in target_node.children[through_field].texts: 884 source_node = tree.find(source_path) 885 if not source_field in source_node.children: 886 continue # Skip if no source field. 887 for path in source_node.children[source_field].texts: 888 if not '\u0001' in path: # We use this ugly hack to add the inheritance source here. 889 path = path + '\u0001' + '.'.join(source_node.path) 890 # If necessary then create child in target node. 891 if not target_field in target_node.children: 892 target_node.children[target_field] = DddocTreeNode(tree, target_field, target_node.path + [target_field], source_node.children[source_field].entry) 893 # Copy over path. 894 target_node.children[target_field].texts.append(path) 895 ## print ' appending', path 896 897 # Clear out the stuff that we completed. 898 to_delete = [] 899 for key in depends_on: # Clear out all done. 900 depends_on[key] -= to_do 901 if not depends_on[key]: 902 to_delete.append(key) 903 for key in to_delete: 904 del depends_on[key] 905 done |= to_do # Add done. 906 to_do = all_paths - done - set(depends_on.keys()) 907 908 909def removeDuplicateTexts(tree): 910 """Remove duplicates from texts members. 911 912 Suffixes starting with '\u0001' are ignored for the comparisons 913 and strings with these suffixes are preferred. 914 """ 915 ##print 'remove duplicates' 916 def recurse(node): 917 in_cleaned = {} 918 cleaned = [] 919 for txt in node.texts: 920 clean = txt 921 pos = txt.find('\u0001') 922 if pos != -1: 923 clean = txt[:pos] 924 ##print cleaned, repr(clean) 925 if clean in in_cleaned: 926 if '\u0001' in clean and not '\u0001' in cleaned[in_cleaned[clean]]: 927 cleaned[in_cleaned[clean]] = txt 928 else: 929 in_cleaned[clean] = len(cleaned) 930 cleaned.append(txt) 931 node.texts = cleaned 932 for child in node.children.itervalues(): 933 recurse(child) 934 for child in tree.root.children.itervalues(): 935 recurse(child) 936 937 938# TODO(holtgrew): If needed, this could easily be generalized. 939def buildByTypeAndCatIndex(tree): 940 """Build an index into the given DddocTree. 941 942 The index will be a two-dimensional dict, mapping (first element of path, 943 value of cat field) to a list of nodes in the DddocTree. 944 """ 945 result = {} 946 def recurse(result, path, node): 947 ## print path, node.path 948 if len(path) == 2: 949 if node.children.has_key('cat'): 950 for cat in node.children['cat'].texts: 951 result.setdefault(path[0], {}).setdefault(cat, []).append(node) 952 else: 953 result.setdefault(path[0], {})[path[1]] = node 954 if len(path) < 2: 955 for key, child in node.children.iteritems(): 956 recurse(result, path + [key], child) 957 for key, child in tree.root.children.iteritems(): 958 recurse(result, [key], child) 959 ## for k1, v1 in result.iteritems(): 960 ## for k2, v2 in v1.iteritems(): 961 ## print 'k1=%s\tk2=%s\tv=%s' % (k1, k2, [x.path for x in v2]) 962 return result 963 964 965class ErrorLogger(object): 966 def __init__(self): 967 self.error_count = 0 968 def invalidReference(self, txt, locations): 969 self.error_count += 1 970 if not locations: 971 print >>sys.stderr, 'ERROR: Invalid Reference %s in unknown location (sorry).' % txt 972 else: 973 print >>sys.stderr, 'ERROR: Invalid Reference %s in one of the following locations:' % txt 974 for filename, line in locations: 975 print >>sys.stderr, ' %s:%s' % (filename, line) 976 977 978class App(object): 979 """Application object for DDDoc. 980 981 Provides a facade to the functionality of the core module. 982 983 Usage: 984 app = App() 985 app.loadFiles([<files>]) 986 app.loadFiles([<files>]) 987 app.loadingComplete() 988 989 Attrs: 990 data The global state Data object. 991 """ 992 993 def __init__(self): 994 """Initialize object members.""" 995 self.cache = FileCache('dddoc_cache.bin') 996 self.file_loader = FileLoader(self.cache) 997 self.dddoc_tree = None 998 self.error_logger = ErrorLogger() 999 1000 def loadFiles(self, path): 1001 """Load the files with the given file name.""" 1002 self.file_loader.run(path) 1003 1004 def loadingComplete(self): 1005 """Initialize data object. 1006 1007 This method is called after all calls to loadFiles(). 1008 """ 1009 # Save the cache to disk again. 1010 self.cache.flush() 1011 # Sort Dddoc Entries and build tree. 1012 self.file_loader.entries.sort(cmp=DddocEntry.cmpPathLocation) 1013 self.dddoc_tree = DddocTree(self.file_loader.entries) 1014 # Generate automatic references. 1015 generateAutomaticReferences(self.dddoc_tree) 1016 # Perform inheritance as configured in global configuration. 1017 generateInheritedElements(self.dddoc_tree) 1018 # Clean duplicates from 'texts' members 1019 removeDuplicateTexts(self.dddoc_tree) 1020 # Move inline summaries into .summary children. 1021 processInlineSummaries(self.dddoc_tree, SUMMARY_PATHS) 1022 # Finally, after all modifications, enable caching and build indices in 1023 # tree. 1024 self.dddoc_tree.finalize() 1025 1026 def getNextId(self): 1027 """Returns an identifier. 1028 1029 Each id is only returned once. 1030 """ 1031 assert False, "For future use." 1032 self.next_id += 1 1033 return self.next_id - 1 1034