1#!/usr/bin/env python2
2
3import copy
4import operator
5import os
6import os.path
7import pickle
8import string
9import sys
10
11# Constant for C++ files.
12FILETYPE_CPP = 2
13# Constant for DDDOC files.
14FILETYPE_DDDOC = 1
15# Constant for none of the above.
16FILETYPE_OTHER = 0
17
18SOURCE_ENCODING = 'iso8859-1'
19
20# Extension of C++ files.
21CPP_EXTS = ['c', 'C', 'cpp', 'CPP', 'c++', 'C++', 'h', 'H', 'hpp', 'HPP',
22            'h++', 'H++']
23# Extensions of DDDOC files.
24DDDOC_EXTS = ['dddoc', 'DDDOC']
25
26# List of ignored directory names.
27IGNORED_DIRS = ['CSV', '.svn', 'seeds2', 'find2', 'cmake']
28
29DATA = None
30ID = 0
31
32# Text attribute node keys.
33TEXT_ATTRIBUTE_KEYS = set(['text', 'table', 'tableheader', 'code', 'console', 'section',
34                           'subsection', 'image', 'contents', 'note', 'file', 'snippet',
35                           'output'])
36
37# Nodes having paths matching the following patterns are considered text
38# container nodes.  Their children having only one more component which is in
39# TEXT_ATTRIBUTE_KEYS are processed in a special way.  The last component is
40# replaced with 'text' and their content is prefixed by "type=$key:" where $key
41# is the original key.  The content of the text container nodes is prefixed with
42# "type=$text:" and moved to a child with key 'text'.
43TEXT_CONTAINER_PATHS = [
44    'Indexpage.*.description',
45    'Page.*.description',
46    'Page.*.summary',
47    'Page.*.glossary.*',
48    'Function.*.example',
49    'Function.*.summary',
50    'Function.*.description',
51    'Function.*.remarks',
52    'Function.*.status',
53    'Class.*.example',
54    'Class.*.summary',
55    'Class.*.description',
56    'Class.*.remarks',
57    'Class.*.status',
58    'Metafunction.*.example',
59    'Metafunction.*.summary',
60    'Metafunction.*.description',
61    'Metafunction.*.remarks',
62    'Metafunction.*.status',
63    'Memfunc.*.example',
64    'Memfunc.*.summary',
65    'Memfunc.*.description',
66    'Memfunc.*.remarks',
67    'Memfunc.*.status',
68    'Memvar.*.example',
69    'Memvar.*.summary',
70    'Memvar.*.description',
71    'Memvar.*.remarks',
72    'Memvar.*.status',
73    'Macro.*.example',
74    'Macro.*.summary',
75    'Macro.*.description',
76    'Macro.*.remarks',
77    'Macro.*.status',
78    'Enum.*.example',
79    'Enum.*.summary',
80    'Enum.*.description',
81    'Enum.*.remarks',
82    'Enum.*.status',
83    'Spec.*.example',
84    'Spec.*.summary',
85    'Spec.*.description',
86    'Spec.*.remarks',
87    'Spec.*.status',
88    'Shortcut.*.example',
89    'Shortcut.*.summary',
90    'Shortcut.*.description',
91    'Shortcut.*.remarks',
92    'Shortcut.*.status',
93    'Tag.*.example',
94    'Tag.*.summary',
95    'Tag.*.description',
96    'Tag.*.remarks',
97    'Tag.*.status',
98    'Typedef.*.example',
99    'Typedef.*.summary',
100    'Typedef.*.description',
101    'Typedef.*.remarks',
102    'Typedef.*.status',
103    'Demo.*.summary',
104    'Demo.*.description',
105    'Demo.*.remarks',
106    'Demo.*.output',
107    'Adaption.*.example',
108    'Adaption.*.summary',
109    'Adaption.*.description',
110    'Adaption.*.remarks',
111    'Adaption.*.status',
112    'Concept.*.example',
113    'Concept.*.summary',
114    'Concept.*.description',
115    'Concept.*.remarks',
116    'Concept.*.status',
117    ]
118
119def _pathsMatch(path1, path2):
120    """Compare two paths with wildcards."""
121    if not type(path1) is list:
122        path1 = splitKeys(path1[int(path1[0] == '.'):], '.')  # Strip leading '.', if any.
123    if not type(path2) is list:
124        path2 = splitKeys(path2[int(path2[0] == '.'):], '.')
125    if len(path1) != len(path2):
126        return False
127    for i, p1 in enumerate(path1):
128        p2 = path2[i]
129        if not (p1 == '*' or p2 == '*' or p1 == p2):
130            return False
131    return True
132
133
134def transformDddocEntry(entry):
135    """Performs the text container node transformations.
136
137    Returns list of entries to add if any.
138    """
139    for path in TEXT_CONTAINER_PATHS:
140        if _pathsMatch(path, entry.path) and entry.content:  # Is text container.
141            new_entry = copy.deepcopy(entry)
142            new_entry.content = 'type=text:' + entry.content
143            entry.content = ''
144            return [new_entry]  # Done.
145        if not entry.path[-1] in TEXT_ATTRIBUTE_KEYS:
146            continue  # Skip if last component does not match.
147        if not _pathsMatch(path, entry.path[:-1]):
148            continue  # Skip non-matching path.
149        # If we reach here, it is a text node.
150        ## print 'TRANSFORMING ', entry
151        last = entry.path[-1]
152        entry.path = entry.path[:-1]
153        entry.content = 'type=' + last + ':' + entry.content
154        ## print '  to ', entry
155        return []  # Done
156    return []  # No updates.
157
158
159class FileCache(object):
160    """Simple file contents cache.
161
162    Maps paths to (mtime, file contents) pairs.
163
164    Attrs:
165
166        path     Path to the cache file.
167        content  Dict with cache content mapping file name to pair of mtime
168                 and data associated with the cache.
169    """
170
171    def __init__(self, path):
172        self.path = path
173        self.content = {}
174        self._tryLoad()
175
176    def _tryLoad(self):
177        try:
178            with open(self.path, 'rb') as f:
179                self.content = pickle.load(f)
180        except:
181            print >>sys.stderr, 'Could not load cache %s' % self.path
182            return False
183        print >>sys.stderr, 'Successfully loaded cache %s' % self.path
184        return True
185
186    def flush(self):
187        """Store the cache to its file."""
188        try:
189            with open(self.path, 'wb') as f:
190                pickle.dump(self.content, f)
191        except:
192            print >>sys.stderr, 'Could not store cache %s' % self.path
193            return False
194        print >>sys.stderr, 'Successfully stored cache %s' % self.path
195        return True
196
197    def has_key(self, key):
198        """Returns True if the cache has data for this key."""
199        return self.content.has_key(key)
200
201    def isFresh(self, filename):
202        """Returns True if the cache is fresh.
203
204        The cache is fresh if the file at the given path is not newer than the
205        data in the cache.
206        """
207        if not self.has_key(filename):
208            return False
209        mtime = os.stat(filename).st_mtime
210        return mtime >= self.content[filename][0]
211
212    def get(self, key, defaultValue=None):
213        """Return content of the given entry."""
214        return self.content.get(key, (None, defaultValue))[1]
215
216    def set(self, filename, value):
217        """Set cache content and mtime."""
218        mtime = os.stat(filename).st_mtime
219        self.content[filename] = (mtime, value)
220
221
222class DddocEntry(object):
223    def __init__(self, path, content, filename, line_no_begin, line_no_end):
224        self.path = path
225        self.content = content
226        self.filename = filename
227        self.line_no_begin = line_no_begin
228        self.line_no_end = line_no_end
229
230    def __str__(self):
231        tpl = ('DddocEntry(path=%s, content=%s, filename=%s, line_no_begin=%s, '
232               'line_no_end=%s)')
233        values = (self.path, self.content, self.filename, self.line_no_begin,
234                  self.line_no_end)
235        return tpl % tuple(map(repr, values))
236
237    def __repr__(self):
238        return self.__str__()
239
240    @classmethod
241    def cmpPathLocation(klass, lhs, rhs):
242        """Comparator, by entry path then filename and line number."""
243        lhs_t = (lhs.path, lhs.filename, lhs.line_no_begin)
244        rhs_t = (rhs.path, rhs.filename, rhs.line_no_begin)
245        if lhs_t < rhs_t:
246            return -1
247        elif lhs_t > rhs_t:
248            return 1
249        else:
250            return 0
251
252
253def splitKeys(text, delimiters, limit=None, _cache={}):
254    """Splitting that considers escaping of keys using quotes.
255
256        >>> splitKeys('.Adaption.\'std::string\'.summary')
257        ['', 'Adaption', '\'std::string\'', 'summary']
258    """
259    if '\u0001' in text:
260        text = text.split('\u0001', 1)[0]  # Remove optional label, used in inheritance.
261    if _cache.has_key((text, delimiters)):
262        return _cache[(text, delimiters)]
263    count = 0
264    current = []
265    result = []
266    str_delimiter = None
267    for i in range(0, len(text)):
268        # Handle text in strings.
269        if str_delimiter:
270            if text[i] == str_delimiter:
271                str_delimiter = None
272            current.append(text[i])
273            continue
274        elif text[i] in '\'"':
275            str_delimiter = text[i]
276            current.append(text[i])
277            continue
278        # Handle non-in-string text.
279        if text[i] in delimiters:
280            result.append(''.join(current))
281            current = []
282            count += 1
283            if limit and count >= limit:
284                result.append(text[i+1:])
285                _cache[(text, delimiters)] = result
286                return result
287        else:
288            current.append(text[i])
289    result.append(''.join(current))
290    _cache[(text, delimiters)] = result
291    return result
292
293
294def cleanPath(path_arr):
295    """Takes a list with a path and cleans its element.
296
297    Cleaning its element currently only consists in removing singel and double
298    quotes.
299    """
300    def _cleanPathElement(x):
301        return x.strip().replace('\'', '').replace('"', '')
302    return map(_cleanPathElement, path_arr)
303
304
305class FileLoader(object):
306    """File loader helper class.
307
308    Attrs:
309
310        cache    FileCache to use for caching.
311        entries  List of DddocEntries objects.
312    """
313
314    def __init__(self, cache):
315        self.cache = cache
316        self.entries = []
317
318    def _loadDDDOCFile(self, filename, cache):  # TODO(holtgrew): Make Top-Level Function?
319        # Try to load from cache.
320        if cache.isFresh(filename):
321            return cache.get(filename)
322
323        # Load file.
324        with open(filename, 'rb') as f:
325            text = [x.decode(SOURCE_ENCODING).encode("ascii", "xmlcharrefreplace") for x in f.readlines()]
326        cache.set(filename, text)
327        return text
328
329    def _loadCPPFile(self, filename, cache):  # TODO(holtgrew): Make Top-Level Function?
330        if cache.isFresh(filename):
331            return cache.get(filename)
332
333        # TODO(holtgrew): This looks overly complicated.
334        f = open(filename)
335        lines = [x.decode(SOURCE_ENCODING).encode("ascii", "xmlcharrefreplace") for x in f.readlines()]
336        f.close()
337
338        ret = []
339
340        #test for SEQAN_NO_DDDOC
341        for line in lines:
342            if line.find("SEQAN_NO_DDDOC") >= 0:
343                cache.set(filename, ret)
344                return ret;
345
346
347        incomment = False
348        innextcomment = False
349        inextract = False
350
351        for line in lines:
352            line = line.rstrip()
353            str_line = ""
354            if len(line) == 0:
355                if not innextcomment and not incomment:
356                    str_line = "."
357                else:
358                    str_line = " "
359
360            while len(line) > 0 :
361                if innextcomment:
362                    if line[len(line)-1] == "\\" :
363                        if inextract: str_line += line[: len(line)-1]
364                    else:
365                        if inextract: str_line += line
366                        innextcomment = False
367                    break
368
369                elif incomment:
370                    pos1 = line.find("*/")
371                    if pos1 < 0:
372                        if inextract: str_line += line;
373                        break;
374                    else:
375                        if inextract:
376                            str_line += line[:pos1];
377                            line = line[pos1 + 3:];
378                        else:
379                            line = line[pos1 + 2:];
380                        incomment = False;
381
382                else:
383                    pos1 = line.find("/*")
384                    pos2 = line.find("//")
385                    pos3 = line.find('"')
386                    if (pos1 >= 0) and ((pos2 < 0) or (pos1 < pos2)) and ((pos3 < 0) or (pos1 < pos3)):
387                        pos9 = line.find("*/", pos1 + 2)
388                        if (len(line) > pos1 + 2):
389                            inextract = (line[pos1 + 2] == "/") or (line[pos1 + 2] == "*")
390                        else:
391                            inextract = False
392                        if pos9 < 0 :
393                            if inextract: str_line += line[pos1 + 3:]
394                            incomment = True
395                            break
396                        else:
397                            if inextract:
398                                str_line += line[pos1 + 3: pos3]
399                                line = line[pos9 + 3:]
400                            else:
401                                line = line[pos9 + 2:]
402
403                    elif (pos2 >= 0) and ((pos3 < 0) or (pos2 < pos3)):
404                        pos2b = pos2 + 2;
405                        while ((pos2b < len(line)) and ((line[pos2b] == "/") or (line[pos2b] == "*"))):
406                            pos2b += 1
407                        inextract = (pos2b > pos2 + 2)
408                        if line[len(line)-1] == "\\" :
409                            if inextract: str_line += line[pos2b: len(line)-1]
410                            innextcomment = True
411                        else:
412                            if inextract: str_line += line[pos2b:]
413                        break
414
415                    elif pos3 >= 0:
416                        pos9 = line.find('"', pos3 + 2)
417                        if pos9 < 0:
418                            line = line[pos9+1:]
419                            break
420                        else:
421                            break
422
423                    else:
424                        break
425
426            ret = ret + [str_line]
427
428        cache.set(filename, ret)
429        return ret
430
431    def _getFileType(self, filename):  # TODO(holtgrew): Make Top-Level Function?
432        """Determines file type from filename.
433
434        Determines the file type from the extension of the given filename.
435
436        >>> getFileType('test.cpp') == FILETYPE_CPP
437        True
438        >>> getFileType('path/file.h') == FILETYPE_CPP
439        True
440        >>> getFileType('test.dddoc') == FILETYPE_DDDOC
441        True
442
443        Args:
444
445            filename  Filename to parse.
446
447        Returns:
448
449            One of {FILETYPE_CPP, FILETYPE_DDDOC, FILETYPE_OTHER}, depending
450            on the extension of filename.
451        """
452        # Get file extension.
453        base, ext = os.path.splitext(filename)
454        if ext[1:] in CPP_EXTS:
455            return FILETYPE_CPP
456        elif ext[1:] in DDDOC_EXTS:
457            return FILETYPE_DDDOC
458        else:
459            return FILETYPE_OTHER
460
461    def _loadFile(self, filename):
462        """Load the file with the given filename.
463
464        The line is then split into DDDoc entries, unwrapping entries that span
465        more than one line.  Finally, the keys are expanded, and surrounding
466        whitespace is stripped.
467        """
468        ## print filename
469        # Load file contents, through a cache.
470        file_type = self._getFileType(filename)
471        if file_type == FILETYPE_CPP:
472            text = self._loadCPPFile(filename, self.cache)
473        elif file_type == FILETYPE_DDDOC:
474            text = self._loadDDDOCFile(filename, self.cache)
475        else:
476            raise Error("Unknown file type of file %s." % path)
477        text.append('.')
478        ## print 'LOADING', filename
479        ## print '\n'.join(text)
480
481        # Process all lines in the input, join lines that do not begin with a
482        # dot with the previous ones.  This allows the wrapping of lines.
483        str = False
484        dddoc_entries = []  # [(path, filename, begin line no, end line no)]
485        line_no_begin, line_no_end = 1, 1
486        for line in text:
487            ## if line and line != '.':
488            ##     print 'LINE', line
489            line_no_end += 1
490            if not line:
491                continue
492            if line[0] == '.':
493                if str is not False and str[0] == '.' and str != '.' and str.strip():  # Skip empty dummy lines.
494                    dddoc_entries.append([str, filename, line_no_begin, line_no_end])
495                    ## print dddoc_entries[-1]
496                line_no_begin = line_no_end
497                str = line
498                if str == '.':
499                    str = False
500            elif str:
501                if str[-1] != '\n':
502                    str += '\n'
503                str += line
504
505        # Now, expand the keys of dddoc_entries, e.g. dddoc_entries[i][0].
506        # TODO(holtgrew): Consider escaping of keys here.
507        stack = []
508        stack_len_sum = 0
509        for entry in dddoc_entries:
510            ## print 'ENTRY', entry
511            ## print 'stack=%s' % (stack)
512            # Split out $key:$value of the entry and $the.$path.$elements from $key.
513            maybe_pair = splitKeys(entry[0].strip(), ':', 1)
514            if len(maybe_pair) == 2:
515                key, value = splitKeys(entry[0].strip(), ':', 1)
516            else:
517                key, value = entry[0].strip(), ''
518            path = splitKeys(key, '.')[1:]
519            # Count empty entries in the path.
520            ## print ' ', path
521            empty_count = reduce(operator.add, [1 for x in path if not x], 0)
522            ## print '  empty_count', empty_count
523            if empty_count <= len(stack):
524                stack = stack[:empty_count]
525                stack_len_sum = reduce(operator.add, map(len, stack), 0)
526            stack.append(path[empty_count:])
527            stack_len_sum += len(stack[-1])
528            path = reduce(operator.add, stack, [])
529            # Remove any leading and trailing whitespace from value and compute
530            # updated begin and end line no.
531            line_count = len(value.splitlines())
532            value_no_leading = value.lstrip()
533            line_count2 = len(value_no_leading.splitlines())
534            line_no_begin = entry[2] + line_count - line_count2
535            value_no_trailing = value_no_leading.rstrip()
536            line_count3 = len(value_no_trailing.splitlines())
537            line_no_end = entry[3] - line_count2 + line_count3
538
539            # Store the DDDoc entry.
540            if path:
541                self.entries.append(DddocEntry(cleanPath(path), value_no_trailing, filename, line_no_begin, line_no_end))
542                new_entries = transformDddocEntry(self.entries[-1])
543                ## if new_entries:
544                ##     print 'NEW ENTRIES', new_entries
545                self.entries += new_entries
546                ## print self.entries[-1]
547
548    def run(self, search_path):
549        """Call parseFile() on files.
550
551        All files below search_path will be searched that have file type
552        FILETYPE_CPP or FILETYPE_DOC as determined by getFileType().
553        Directories with names of IGNORED_DIRS are skipped.
554
555        Args:
556            search_path  String, path to search files under.
557        """
558        for root, dirs, files in os.walk(search_path):
559            # Parse all files.
560            for file in files:
561                if os.path.basename(file).startswith('.'):
562                    continue  # Skipp hidden files.
563                path = os.path.join(root, file)
564                if self._getFileType(path) in [FILETYPE_CPP, FILETYPE_DDDOC]:
565                    self._loadFile(path)
566            # Exclude ignored diretories.
567            for ignored in IGNORED_DIRS:
568                if ignored in dirs:
569                    dirs.remove(ignored)
570
571
572class DddocTreeNode(object):
573    """Represents one entry in the DddocTree.
574
575    Attrs:
576
577        tree      The DddocTree that the node belongs to.
578        key       The key of this child, last element of path.
579        path      The full path to the child.
580        entry     Range [beg, end) of DddocEntry that this node represents.
581        children  dict with the children as key/value pairs.
582        texts     Array of strings with the texts.
583    """
584
585    def __init__(self, tree, key, path, entry, children={}):
586        self.tree = tree
587        self.key = key
588        self.path = path
589        self.entry = entry
590        self.children = children
591        self.texts = []
592
593    def text(self, spacer=' '):
594        return spacer.join(self.texts)
595
596    def __str__(self):
597        """Returns dump for the whole tree in a user-readable manner."""
598        def _str(node, level=0, prefix=''):
599            space = '  ' * level
600            if prefix:
601                prefix = prefix + ' --> '
602            res = '%s %sDddocTreeNode(key=%s, texts=%s)' % (space, prefix, repr(node.key), repr(node.texts))
603            for k, child in node.children.iteritems():
604                res += '\n' + _str(child, level + 1, k)
605            return res
606        return _str(self)
607
608    def dump(self, stream=sys.stdout):
609        """Debug recursive dumping of a tree node."""
610        print >>stream, self
611
612
613class DddocTree(object):
614    """Tree with the information from the DDDoc contents.
615
616    Attrs:
617
618        entries  The raw entries.
619        root     The root DddocTreeNode.
620        glossary_nodes  List of nodes that contain glossary entries.  Built
621                        in finalize().
622    """
623
624    def __init__(self, entries):
625        self.entries = entries
626        #for e in self.entries:
627        #    print e
628        self.root = DddocTreeNode(self, 'ROOT', [], (0, 0), self._buildSubtree([], 0, len(entries), 0))
629        self.cache = None
630        self.glossary_nodes = []
631        ## self.root.dump()
632        ## for entry in self.entries:
633        ##     print entry.path, entry.content
634
635    def _enableFindCache(self):
636        if self.cache is None:
637            self.cache = {}
638
639    def finalize(self):
640        """Called after tree will not be modified any more.
641
642        Enables caching and builds some indices.
643        """
644        self._enableFindCache()
645        print >>sys.stderr, 'Indexing Glossary Pages'
646        if 'Page' in self.root.children:
647            for key, node in self.root.children['Page'].children.iteritems():
648                if 'glossary' in node.children:
649                    self.glossary_nodes.append(node.children['glossary'])
650                    print >>sys.stderr, '  Found Page.%s' % node.key
651
652    def _buildSubtree(self, path, begin_index, end_index, level):
653        # First, identify the entries belonging to each node (entry.path[i] are
654        # equal for i = level, inductively, also i <= level).
655        prev_key = None
656        prev_beg = None
657        subseqs = []
658        for i in range(begin_index, end_index):
659            if prev_key != self.entries[i].path[level]:
660                if prev_key != None:
661                    subseqs.append((prev_beg, i))
662                prev_key = self.entries[i].path[level]
663                prev_beg = i
664        if prev_key != None and prev_beg != end_index:  # Handle last.
665            subseqs.append((prev_beg, end_index))
666        # Now, subseqs contains a sequence of contiguous half-open intervals.
667        # Each contains the data for one tree node.  There is a possibly empty
668        # sequence of leading entries with paths of length level + 1 containing
669        # the data for the current level node.  The rest is for the level below.
670        result = {}
671        for (b, c) in subseqs:
672            assert b != c
673            # Split into entries for this and for next level: [a, b); [b, c).
674            a = b  # [a, b) will be for this vertex.
675            while b < c and len(self.entries[b].path) == level + 1:
676                b += 1
677            # Compute node.
678            path = self.entries[a].path[:(level + 1)]
679            key = path[level]
680            node = DddocTreeNode(self, key, path, (a, b))
681            ## print 'new node', key
682            for i in range(a, b):
683                if self.entries[i].content:
684                    node.texts.append(self.entries[i].content)
685            # Compute subtree.
686            node.children = self._buildSubtree(path, b, c, level + 1)
687            result[key] = node
688        return result
689
690    def find(self, path):
691        """Query tree for a DddocTreeNode.
692
693        The argument path can either be a dot-separated string or a list with
694        this information.  If path is a string then one optional leading dot is
695        optional.  Returns None if nothing could be found.
696
697            tree.find(['path', 'to', 'node'])
698            tree.find('path.to.node')
699            tree.find('.path.to.node')
700        """
701        ## print 'FIND(%s)' % repr(path)
702        # Try to retrieve from cache if there is a cache.
703        if not self.cache is None:
704            if not type(path) is str:
705                key = '.'.join(path)
706            else:
707                key = path
708            if self.cache.has_key(key):
709                return self.cache[key]
710        # Split path if is string, ignore leading dot if any.
711        if type(path) is str:
712            path = splitKeys(path, '.')
713            if path and path[0] == '':
714                path = path[1:]
715        # Now, query the tree.
716        def findRecurse(node, path):
717            """Helper function that searches for the node with given path."""
718            if not path:
719                return node
720            if not node.children.has_key(path[0]):
721                return None
722            return findRecurse(node.children[path[0]], path[1:])
723        res = findRecurse(self.root, path)
724        if not self.cache is None:
725            self.cache['.'.join(path)] = res
726        return res
727
728# Paths where the inline summary is moved into a .summary child.  See
729# documentation of processInlineSummaries() for details.
730SUMMARY_PATHS = [
731    '*.*.param.*',
732    '*.*.returns',
733    '*.*.tag.*',
734    '*.*.value.*',
735    '*.*.returns.param.*',  # TODO(holtgrew): Used for metafunctions, could be improved.
736    'Adaption.*',
737    'Class.*',
738    'Concept.*',
739    'Demo.*',
740    'Enum.*',
741    'Function.*',
742    'Macro.*',
743    'Memfunc.*',
744    'Metafunction.*',
745    'Shortcut.*',
746    'Spec.*',
747    'Tag.*',
748    ]
749
750# TODO(holtgrew): Also use for generateAutomaticReferences()
751
752def _matchTreesInNode(tree, node, path, func, block_paths=[['globals']], level=0):
753    """Calls func on nodes matching path."""
754    ## print '  ' * level, '_matchTreesInNode(tree', node.path, path, func, level, ')'
755    if path:
756        if path[0] == '*':
757            for child in node.children.itervalues():
758                _matchTreesInNode(tree, child, path[1:], func, block_paths, level+1)
759        else:
760            if node.children.has_key(path[0]):
761                _matchTreesInNode(tree, node.children[path[0]], path[1:], func, block_paths, level+1)
762    else:
763        for block_path in block_paths:
764            ## print node.path[:len(block_path)], '==', block_path
765            if node.path[:len(block_path)] == block_path:
766                return  # Path is blocked.
767        func(node)
768
769
770def processInlineSummaries(tree, paths):
771    """Move inline documentation to .summary subpaths.
772
773    The nodes matching the values in path are such that inline values are moved
774    to .summary subnodes for greater consistency and lower variance.
775
776    E.g. the following:
777
778        .Function.f.param.x:This is param x.
779
780    will be transformed into
781
782        .Function.f.param.x
783        ..summary:This is param x.
784    """
785    # First, collect nodes for the summary transfer.
786    collected_nodes = set()
787    def f(node):
788        if node.texts:
789            collected_nodes.add(node)
790    for path in paths:
791        _matchTreesInNode(tree, tree.root, splitKeys(path, '.'), f)
792    # Then, move the inline summaries into a summary node.
793    for node in collected_nodes:
794        if not 'summary' in node.children:  # Create node if necessary.
795            summaryNode = DddocTreeNode(tree, 'summary', node.path + ['summary'], (-2,-2))
796            node.children['summary'] = summaryNode
797        node.children['summary'].texts += node.texts
798        node.texts = []
799
800
801def generateAutomaticReferences(tree):
802    """Interpret the globals.relations entries."""
803    print >>sys.stderr, 'Generating Automatic References'
804    relations_node = tree.find('globals.relations')
805    if not relations_node:
806        return  # Empty, do nothing.
807
808    # We first collect all automatic links, scheduled to be added later.
809    additions = []
810    def appendToAdditions(node):
811        for node_path in node.texts:
812            node_path = splitKeys(node_path, '.')
813            ## print '  ' * level, ' ', node_path
814            res = tree.find(node_path)
815            ## print '  ' * level, ' ', res is not None
816            if not res:
817                continue  # Not found, Skip  # TODO(holtgrew): Warning?
818            additions.append((res.path + [key], '.'.join(node.path[:2])))
819    for key, node in relations_node.children.iteritems():
820        ## print 'GENERATE', key, node
821        for txt in node.texts:
822            path = splitKeys(txt, '.')
823            _matchTreesInNode(tree, tree.root, splitKeys(txt, '.'), appendToAdditions)
824
825    # Now, add these additions.  This circumvents problems leading to infinite
826    # recursions.
827    for path, text in additions:
828        res = tree.find(path)
829        if not res:
830            parent = tree.find(path[:-1])
831            assert parent
832            res = DddocTreeNode(tree, path[-1], path, None)
833            parent.children[path[-1]] = res
834        if not text in res.texts:
835            res.texts.append(text)
836
837
838def generateInheritedElements(tree):
839    """Push through inheritances."""
840    print >>sys.stderr, 'Linking Inherited Entities'
841    inherit_node = tree.find('globals.inherit')
842    # Contains children: $TARGET_FIELD:$THROUGH_FIELD.$SOURCE_FIELD
843
844    all_paths = set()
845    depends_on = {}
846    inheritance_rules = []
847
848    # First build a dependency graph.
849    for target_field, child in inherit_node.children.items():
850        for txt in child.texts:
851            arr = splitKeys(txt, '.')
852            through_field = arr[0]
853            if len(arr) > 1:
854                source_field = arr[1]
855            else:
856                source_field = target_field
857            inheritance_rules.append((target_field, through_field, source_field))
858            def registerDependencies(node):
859                all_paths.add('.'.join(node.path))
860                if not through_field in node.children:
861                    return
862                for path in node.children[through_field].texts:
863                    pth = '.'.join(node.path)
864                    depends_on.setdefault(pth, set()).add(path)
865            _matchTreesInNode(tree, tree.root, ['*', '*'], registerDependencies)
866    ## print 'ALL PATHS', all_paths
867
868    # Now, push through references by inheritance for all paths that are not
869    # linked to and not completed yet.
870    done = set()
871    to_do = all_paths - done - set(depends_on.keys())
872    while to_do:
873        # Process all that are not completed and have no dependencies.
874        if not to_do:
875            raise Exception('Could not process all dependencies. Cyclic dependencies?')
876        # Actually perform the preprocessing.
877        for target_path in to_do:
878            for target_field, through_field, source_field in inheritance_rules:
879                target_node = tree.find(target_path)
880                if not through_field in target_node.children:
881                    continue  # Skip if no source children.
882                ## print 'TRYING', target_path, through_field, source_field
883                for source_path in target_node.children[through_field].texts:
884                    source_node = tree.find(source_path)
885                    if not source_field in source_node.children:
886                        continue  # Skip if no source field.
887                    for path in source_node.children[source_field].texts:
888                        if not '\u0001' in path:  # We use this ugly hack to add the inheritance source here.
889                            path = path + '\u0001' + '.'.join(source_node.path)
890                        # If necessary then create child in target node.
891                        if not target_field in target_node.children:
892                            target_node.children[target_field] = DddocTreeNode(tree, target_field, target_node.path + [target_field], source_node.children[source_field].entry)
893                        # Copy over path.
894                        target_node.children[target_field].texts.append(path)
895                        ## print '  appending', path
896
897        # Clear out the stuff that we completed.
898        to_delete = []
899        for key in depends_on:  # Clear out all done.
900            depends_on[key] -= to_do
901            if not depends_on[key]:
902                to_delete.append(key)
903        for key in to_delete:
904            del depends_on[key]
905        done |= to_do  # Add done.
906        to_do = all_paths - done - set(depends_on.keys())
907
908
909def removeDuplicateTexts(tree):
910    """Remove duplicates from texts members.
911
912    Suffixes starting with '\u0001' are ignored for the comparisons
913    and strings with these suffixes are preferred.
914    """
915    ##print 'remove duplicates'
916    def recurse(node):
917        in_cleaned = {}
918        cleaned = []
919        for txt in node.texts:
920            clean = txt
921            pos = txt.find('\u0001')
922            if pos != -1:
923                clean = txt[:pos]
924            ##print cleaned, repr(clean)
925            if clean in in_cleaned:
926                if '\u0001' in clean and not '\u0001' in cleaned[in_cleaned[clean]]:
927                    cleaned[in_cleaned[clean]] = txt
928            else:
929                in_cleaned[clean] = len(cleaned)
930                cleaned.append(txt)
931        node.texts = cleaned
932        for child in node.children.itervalues():
933            recurse(child)
934    for child in tree.root.children.itervalues():
935        recurse(child)
936
937
938# TODO(holtgrew): If needed, this could easily be generalized.
939def buildByTypeAndCatIndex(tree):
940    """Build an index into the given DddocTree.
941
942    The index will be a two-dimensional dict, mapping (first element of path,
943    value of cat field) to a list of nodes in the DddocTree.
944    """
945    result = {}
946    def recurse(result, path, node):
947        ## print path, node.path
948        if len(path) == 2:
949            if node.children.has_key('cat'):
950                for cat in node.children['cat'].texts:
951                    result.setdefault(path[0], {}).setdefault(cat, []).append(node)
952            else:
953                result.setdefault(path[0], {})[path[1]] = node
954        if len(path) < 2:
955            for key, child in node.children.iteritems():
956                recurse(result, path + [key], child)
957    for key, child in tree.root.children.iteritems():
958        recurse(result, [key], child)
959    ## for k1, v1 in result.iteritems():
960    ##     for k2, v2 in v1.iteritems():
961    ##         print 'k1=%s\tk2=%s\tv=%s' % (k1, k2, [x.path for x in v2])
962    return result
963
964
965class ErrorLogger(object):
966    def __init__(self):
967        self.error_count = 0
968    def invalidReference(self, txt, locations):
969        self.error_count += 1
970        if not locations:
971            print >>sys.stderr, 'ERROR: Invalid Reference %s in unknown location (sorry).' % txt
972        else:
973            print >>sys.stderr, 'ERROR: Invalid Reference %s in one of the following locations:' % txt
974            for filename, line in locations:
975                print >>sys.stderr, '  %s:%s' % (filename, line)
976
977
978class App(object):
979    """Application object for DDDoc.
980
981    Provides a facade to the functionality of the core module.
982
983    Usage:
984       app = App()
985       app.loadFiles([<files>])
986       app.loadFiles([<files>])
987       app.loadingComplete()
988
989    Attrs:
990      data       The global state Data object.
991    """
992
993    def __init__(self):
994        """Initialize object members."""
995        self.cache = FileCache('dddoc_cache.bin')
996        self.file_loader = FileLoader(self.cache)
997        self.dddoc_tree = None
998        self.error_logger = ErrorLogger()
999
1000    def loadFiles(self, path):
1001        """Load the files with the given file name."""
1002        self.file_loader.run(path)
1003
1004    def loadingComplete(self):
1005        """Initialize data object.
1006
1007        This method is called after all calls to loadFiles().
1008        """
1009        # Save the cache to disk again.
1010        self.cache.flush()
1011        # Sort Dddoc Entries and build tree.
1012        self.file_loader.entries.sort(cmp=DddocEntry.cmpPathLocation)
1013        self.dddoc_tree = DddocTree(self.file_loader.entries)
1014        # Generate automatic references.
1015        generateAutomaticReferences(self.dddoc_tree)
1016        # Perform inheritance as configured in global configuration.
1017        generateInheritedElements(self.dddoc_tree)
1018        # Clean duplicates from 'texts' members
1019        removeDuplicateTexts(self.dddoc_tree)
1020        # Move inline summaries into .summary children.
1021        processInlineSummaries(self.dddoc_tree, SUMMARY_PATHS)
1022        # Finally, after all modifications, enable caching and build indices in
1023        # tree.
1024        self.dddoc_tree.finalize()
1025
1026    def getNextId(self):
1027        """Returns an identifier.
1028
1029        Each id is only returned once.
1030        """
1031        assert False, "For future use."
1032        self.next_id += 1
1033        return self.next_id - 1
1034