1#!/usr/bin/env python2
2"""Writing for HTML pages."""
3
4import distutils.dir_util
5import json
6import os
7import os.path
8import shutil
9import sys
10import xml.sax.saxutils
11import urllib
12
13import jinja2
14import proc_doc
15import raw_doc
16
17
18def escapeForXml(s):
19    """Return escaped XML of s."""
20    return xml.sax.saxutils.escape(s)
21
22
23def escapeName(name):
24    """Escape a name such that it is safe to use for files and anchors."""
25    """TODO(rmaerker): Encode special chars using urllib.quote(c.encode('utf8'))"""
26    escape = '_'
27    xs = []
28    for c in name:
29        if c.isalnum() or c in "-":
30            xs.append(c)
31        else:
32            xs += [escape, str(ord(c))]
33    return ''.join(xs)
34
35
36def escapeAnchor(name):
37    """Escape a name such that it is safe to use for anchors."""
38    return name
39
40
41class PathManager(object):
42    """Handles the path and link generation."""
43
44    def __init__(self, out_dir):
45        self.out_dir = out_dir
46        self.this_dir = os.path.dirname(os.path.realpath(__file__))
47
48    def getTplPath(self, path):
49        """Returns path to template."""
50        return os.path.join(self.this_dir, 'tpl', path)
51
52    def getEntryPath(self, entry):
53        """Returns target path for page for entry."""
54        path = '%s_%s.html' % (entry.kind, escapeName(entry.name))
55        return os.path.join(self.out_dir, path)
56
57    def getTopFramesetPath(self):
58        """Returns target path for top frameset."""
59        return os.path.join(self.out_dir, 'index.html')
60
61    def getListPath(self):
62        """Returns target path for list."""
63        return os.path.join(self.out_dir, 'list.html')
64
65    def translateDemoPath(self, path):
66        """Translate demo path."""
67        return path
68
69
70class TextNodeToHtml(object):
71    def __init__(self, text_node, skip_top_tag=None, start_heading=2, path_mgr=None):
72        self.skip_top_tag = skip_top_tag
73        self.text_node = text_node
74        self.res = []
75        self.start_heading = start_heading
76        self.heading_table = {}
77        self.path_mgr = path_mgr
78        for i in range(0, 10):
79            self.heading_table['h%d' % i] = 'h%d' % (i + start_heading - 1)
80
81    def openTag(self, text_node, **kwargs):
82        if text_node.raw_html:
83            res = ['<', text_node.type]
84        else:
85            res = ['<', self.heading_table.get(text_node.type, text_node.type)]
86        for key, value in text_node.attrs.iteritems():
87            res += [' ', key, '=', '"', repr(value)[1:-1], '"']
88        for key, value in kwargs.iteritems():
89            res += [' ', key, '=', '"', value, '"']
90        res.append('>')
91        return res
92
93    def closeTag(self, text_node):
94        if text_node.raw_html:
95            return ['</', text_node.type, '>']
96        else:
97            return ['</', self.heading_table.get(text_node.type, text_node.type), '>']
98
99    def convertCode(self, source_code):
100        # TODO(holtgrew): Interpret source type.
101        try:
102            import pygments, pygments.lexers, pygments.formatters
103            return pygments.highlight(
104                source_code, pygments.lexers.CppLexer(),
105                pygments.formatters.HtmlFormatter(style='friendly'))
106        except ImportError:
107            return '<pre class="code">' + escapeForXml(source_code) + '</pre>'
108        return
109
110    def handleTag(self, text_node):
111        if text_node.type == '<text>':
112            self.res.append(text_node.text)
113        elif text_node.type == 'dox:code':
114            target_path = text_node.attrs.get('path')
115            if text_node.attrs.get('type') in ['.cpp', '.h']:
116                self.res.append('<div data-src-path="%s">%s' %
117                                (target_path, self.convertCode(text_node.children[0].text)))
118                if self.path_mgr:
119                    target_path = self.path_mrg.translateDemoPath(self.path_mgr)
120                if text_node.attrs.get('source') == 'snippet':
121                    self.res.append(
122                        '<div class="path_label"><span class="label">Snippet from:'
123                        '</span> <a href="%s" target="_top">%s</a></div>' %
124                        (target_path, text_node.attrs.get('path')))
125                elif text_node.attrs.get('source') == 'include':
126                    self.res.append(
127                        '<div class="path_label"><span class="label">Demo:'
128                        '</span> <a href="%s" target="_top">%s</a></div>' %
129                        (target_path, text_node.attrs.get('path')))
130                self.res.append('</div>')
131            elif text_node.attrs.get('type') in ['.console', '.stdout', '.stderr']:
132                self.res.append('<pre class="console" data-src-path="%s">%s</pre>' %
133                                (target_path, escapeForXml(text_node.children[0].text)))
134            else:
135                self.res.append('<pre class="code" data-src-path="%s">%s</pre>' %
136                                (target_path, escapeForXml(text_node.children[0].text)))
137        else:
138            self.res += self.openTag(text_node)
139            for c in text_node.children:
140                self.handleTag(c)
141            self.res += self.closeTag(text_node)
142
143    def convert(self):
144        if not self.text_node:
145            return None
146        if not self.skip_top_tag:
147            self.res += self.openTag(self.text_node)
148        for c in self.text_node.children:
149            self.handleTag(c)
150        if not self.skip_top_tag:
151            self.res += self.closeTag(self.text_node)
152        return ''.join(self.res)
153
154
155def toDox(proc_entry, line_length=110, in_comment=False):
156    """Process a ProcEntry into the dox-like format."""
157    formatter = raw_doc.DoxFormatter()
158    result = []
159    result.append(proc_entry.raw_entry.getFormatted(formatter))
160    for key, lst in proc_entry.subentries.iteritems():
161        for elem in lst:
162            result.append(elem.raw_entry.getFormatted(formatter))
163    if in_comment:
164        result = [' * ' + l for line in result for l in line.splitlines(False)]
165        while result and result[-1] == ' * ':
166            result.pop(-1)
167        result = ['/*!'] + result + [' */']
168    return '\n'.join(result)
169
170
171def transTextNode(text_node, top=True, start_heading=3, path_mgr=None, **kwargs):
172    #return text_node.toHtmlLike(skip_top_tag=not top)
173    converter = TextNodeToHtml(text_node, skip_top_tag=not top, start_heading=start_heading, path_mgr=path_mgr)
174    return converter.convert() or ''
175
176
177def createTransLink(doc, path_mgr):
178    link_converter = LinkConverter(doc)
179    def transLink(entity_name, text=None):
180        if not text:
181            text = entity_name
182        text_node = proc_doc.TextNode(type='a')
183        text_node.attrs['href'] = 'seqan:%s' % entity_name
184        text_node.children = [proc_doc.TextNode(text=text)]
185        link_converter.visit(text_node)
186        return transTextNode(text_node, path_mgr)
187    return transLink
188
189
190def createNameToPath(doc):
191    path_converter = PathConverter(doc)
192    def convertPath(entry_name):
193        return path_converter.convert(entry_name)[0]
194    return convertPath
195
196
197class TemplateManager(object):
198    def __init__(self, path_manager, doc):
199        self.path_manager = path_manager
200        self.env = jinja2.Environment(loader=jinja2.FileSystemLoader(os.path.join(self.path_manager.this_dir, 'tpl')))
201        def identity(x):
202            return x
203        self.env.filters['escape_name'] = escapeName
204        self.env.filters['escape_anchor'] = escapeAnchor
205        self.env.filters['url_encode'] = urllib.quote
206        self.env.filters['transtext'] = transTextNode
207        self.env.filters['to_dox'] = toDox
208        self.env.filters['translink'] = createTransLink(doc, self)
209        self.env.filters['name_to_path'] = createNameToPath(doc)
210        self.env.filters['tojson'] = json.dumps
211        self.tpls = {}
212        for path in ['page.html', 'concept.html']:
213            self.loadTemplate(path)
214
215    def loadTemplate(self, path):
216        """Load template string at path."""
217        self.tpls[path] = self.env.get_template(path)
218
219    def render(self, path, **kwargs):
220        if not path in self.tpls:
221            self.loadTemplate(path)
222        return self.tpls[path].render(**kwargs)
223
224
225class PathConverter(object):
226    """Convert entry names to URL fragments (filename + anchor)."""
227
228    def __init__(self, doc):
229        self.doc = doc
230
231    def convert(self, name):
232        """Return None, None on failure path, title otherwise."""
233        if self.doc.top_level_entries.get(name):
234            entry = self.doc.top_level_entries.get(name)
235            path = '%s_%s.html' % (entry.kind, escapeName(entry.name))
236            title = None
237            if entry.kind == 'page':
238                title = entry.title
239            return path, title, entry
240        elif self.doc.entries.get(name):
241            first, second = proc_doc.splitSecondLevelEntry(name)
242            father = self.doc.top_level_entries.get(first)
243            entry = self.doc.second_level_entries.get(name)
244            path = '%s_%s.html#%s' % (father.kind, escapeName(father.name), escapeAnchor(name))
245            return path, name, entry
246        else:
247            return None, None, None
248
249
250# TODO(holtgrew): Should be doable in a simpler way than recursing ourselves here.  Visitor pattern for TextNode?
251class LinkConverter(proc_doc.TextNodeVisitor):
252    """Convert raw links to HTML-like links.
253
254    Raw links are links of the form <a href="seqan:$target">$label</a>.
255    """
256
257    def __init__(self, doc):
258        self.doc = doc
259        self.path_converter = PathConverter(doc)
260
261    def visit(self, text_node):
262        if not text_node or text_node.type == '<text>':
263            return
264        if text_node.type == 'a':
265            self._translateLink(text_node)
266        else:
267            for i, c in enumerate(text_node.children):
268                text_node.children[i] = self._replaceNode(c)
269
270    def _translateLink(self, a_node):
271        if not a_node.attrs.get('href', '').startswith('seqan:'):
272            return
273        target = a_node.attrs['href'][6:]
274        target_path, target_title, target_obj = self.path_converter.convert(target)
275        # Shorten path title if not manually specified.
276        if (a_node.children and a_node.plainText == target_title and
277           self.doc.local_name_counter.get(target_title, 1) <= 1):
278            short_title = proc_doc.splitSecondLevelEntry(target_title)[1]
279            a_node.children = [proc_doc.TextNode(text=short_title)]
280        if target_title:
281            target_title = proc_doc.TextNode(text=target_title)
282        else:
283            target_title = proc_doc.TextNode(text=target)
284        # TODO(holtgrew): Catch target_title being None, target_path not found!
285        if target_path is not None:
286            if target_obj:
287                a_node.attrs['data-lang-entity'] = target_obj.kind
288            a_node.attrs['href'] = target_path
289            if not a_node.children:
290                a_node.addChild(target_title)
291        else:
292            class_attr = a_node.attrs.get('class', '')
293            if class_attr:
294                class_attr += ' '
295            class_attr += 'error'
296            a_node.attrs['class'] = class_attr
297            if a_node.attrs.get('href'):
298                del a_node.attrs['href']
299            #a_node.addChild(target_title)
300
301    def _replaceNode(self, text_node):
302        if text_node.type == '<text>':
303            return text_node
304        if text_node.type == 'a':
305            self._translateLink(text_node)
306        for i, c in enumerate(text_node.children):
307            text_node.children[i] = self._replaceNode(c)
308        return text_node
309
310
311class ImagePathUpdater(proc_doc.TextNodeVisitor):
312    """Update image paths to target image path."""
313
314    def __init__(self, doc, prefix):
315        self.doc = doc
316        self.prefix = prefix
317
318    def visit(self, text_node):
319        if not text_node or text_node.type == '<text>':
320            return
321        if text_node.type == 'img':
322            self._updateImagePath(text_node)
323        else:
324            for i, c in enumerate(text_node.children):
325                text_node.children[i] = self._replaceNode(c)
326
327    def _updateImagePath(self, img_node):
328        if not img_node.attrs.get('src'):
329            return  # No path.
330        img_node.attrs['src'] = os.path.join(self.prefix, img_node.attrs['src'])
331
332    def _replaceNode(self, text_node):
333        if text_node.type == '<text>':
334            return text_node
335        if text_node.type == 'img':
336            self._updateImagePath(text_node)
337        for i, c in enumerate(text_node.children):
338            text_node.children[i] = self._replaceNode(c)
339        return text_node
340
341
342class HtmlWriter(object):
343    def __init__(self, doc, args, config):
344        self.doc = doc
345        self.out_dirs = {}
346        self.args = args
347        self.config = config
348        # Normalize path.
349        out_dir = args.out_dir
350        # Generate path names.
351        self.out_dirs['root'] = out_dir
352        self.out_dirs['css'] = os.path.join(out_dir, 'css')
353        self.out_dirs['img'] = os.path.join(out_dir, 'img')
354        self.out_dirs['js'] = os.path.join(out_dir, 'js')
355        self.out_dirs['lib'] = os.path.join(out_dir, 'lib')
356        self.out_dirs['lists'] = os.path.join(out_dir, 'lists')
357        self.out_dirs['docs'] = os.path.join(out_dir, 'docs', 'seqan')
358        # Create managers.
359        self.path_manager = PathManager(out_dir)
360        self.tpl_manager = TemplateManager(self.path_manager, doc)
361        self.path_converter = PathConverter(doc)
362
363    def generateFor(self):
364        self.log('Generating HTML documentation')
365        self.log('Output Directory: %s', self.out_dirs['root'])
366        self.makedirs()
367        self.copyStaticFiles()
368        self.copyDocImages()
369        self.generateTopFrameSet()
370        self.generateLists(self.doc)
371        self.translateLinks(self.doc)
372        self.updateImagePaths(self.doc)
373        self.generatePages(self.doc, self.config)
374        self.generateDemoPages(self.doc)
375        self.generateSearchIndex(self.doc)
376        self.generateLinkData(self.doc)
377        self.generateLanguageEntities()
378
379    def makedirs(self):
380        for path in self.out_dirs.values():
381            if not os.path.exists(path):
382                #self.log('Creating directory %s', path)
383                os.makedirs(path)
384
385    def copyStaticFiles(self):
386        """Copy static files."""
387        for kind in ['css', 'js', 'img', 'lib']:
388            in_dir = os.path.join(self.path_manager.this_dir, 'tpl/%s' % kind)
389            out_path = self.out_dirs[kind]
390            self.log('  Copying %s => %s', in_dir, out_path)
391            distutils.dir_util.copy_tree(in_dir, out_path, verbose=True)
392
393    def copyDocImages(self):
394        """Copy images from paths given in --image-dir parameter."""
395        for image_dir in self.args.image_dirs:
396            join = os.path.join  # shortcut
397            files = [f for f in os.listdir(image_dir)
398                     if os.path.isfile(join(image_dir, f))]
399            for f in files:
400                in_path = join(image_dir, f)
401                out_path = os.path.join(self.out_dirs['img'], f)
402                #self.log('  Copying %s => %s', in_path, out_path)
403                shutil.copy(in_path, out_path)
404
405    def generateTopFrameSet(self):
406        """Generate frameset."""
407        html = self.tpl_manager.render('index.html',
408                                       development=self.args.development)  # TODO(holtgrew): Add title.
409        with open(self.path_manager.getTopFramesetPath(), 'w') as f:
410            f.write(html)
411
412    def generateLists(self, doc):
413        """Generate top level/second level/page index."""
414        with open(self.path_manager.getListPath(), 'w') as f:
415            f.write(self.tpl_manager.render('list.html', doc=doc, config=self.config,
416                                            development=self.args.development))
417
418    def translateLinks(self, doc):
419        link_converter = LinkConverter(doc)
420        for proc_entry in doc.entries.values():
421            #self.log('    * %s', proc_entry.name)
422            proc_entry.visitTextNodes(link_converter)
423
424    def updateImagePaths(self, doc):
425        """Appends image output directory to src attributes."""
426        updater = ImagePathUpdater(doc, 'img')
427        for proc_entry in doc.entries.values():
428            #self.log('    * %s', proc_entry.name)
429            proc_entry.visitTextNodes(updater)
430
431    def generatePages(self, doc, config):
432        """Generate pages for proc_doc.Documentation entries."""
433        try:
434            import pygments, pygments.lexers, pygments.formatters
435            pygments_style = pygments.formatters.HtmlFormatter().get_style_defs('.highlight')
436        except ImportError:
437            pygments_style = '<!-- pygments not available -->'
438
439        for entry in doc.top_level_entries.values():
440            path = self.path_manager.getEntryPath(entry)
441            #self.log('Creating %s', path)
442            self.generatePage(entry, path, doc, config, pygments_style)
443
444    def generatePage(self, entry, path, doc, config, pygments_style):
445        """Generate page for entry to file at path."""
446
447        common_kwargs = {'doc': doc,
448                         'config': config,
449                         'development': self.args.development,
450                         'pygments_style': pygments_style,
451                         'entry_kind': entry.kind,
452                         'entry_name': entry.name}
453        if entry.kind == 'page':
454            html = self.tpl_manager.render('page.html', page=entry,  **common_kwargs)
455        elif entry.kind == 'concept':
456            html = self.tpl_manager.render('concept.html', concept=entry,  **common_kwargs)
457        elif entry.kind in ['class', 'specialization']:
458            html = self.tpl_manager.render('class.html', klass=entry,  **common_kwargs)
459        elif entry.kind == 'enum':
460            html = self.tpl_manager.render('enum.html', enum=entry,  **common_kwargs)
461        elif entry.kind == 'adaption':
462            html = self.tpl_manager.render('adaption.html', adaption=entry,  **common_kwargs)
463        elif entry.kind == 'shortcut':
464            html = self.tpl_manager.render('shortcut.html', shortcut=entry,  **common_kwargs)
465        elif entry.kind in ['global_function', 'member_function', 'interface_function']:
466            html = self.tpl_manager.render('function.html', function=entry,  **common_kwargs)
467        elif entry.kind in ['global_metafunction', 'interface_metafunction']:
468            html = self.tpl_manager.render('metafunction.html', metafunction=entry,  **common_kwargs)
469        elif entry.kind == 'group':
470            html = self.tpl_manager.render('group.html', group=entry,  **common_kwargs)
471        elif entry.kind == 'tag':
472            html = self.tpl_manager.render('tag.html', tag=entry,  **common_kwargs)
473        elif entry.kind == 'macro':
474            html = self.tpl_manager.render('macro.html', macro=entry,  **common_kwargs)
475        elif entry.kind == 'global_typedef':
476            html = self.tpl_manager.render('typedef.html', typedef=entry,  **common_kwargs)
477        elif entry.kind == 'global_variable':
478            html = self.tpl_manager.render('variable.html', variable=entry,  **common_kwargs)
479        elif entry.kind == 'variable':
480            html = self.tpl_manager.render('variable.html', variable=entry,  **common_kwargs)
481        else:
482            assert False, entry.kind
483        with open(self.path_manager.getEntryPath(entry), 'w') as f:
484            f.write(html)
485
486    def generateDemoPages(self, doc):
487        """Copy over all demo pages."""
488        file_names = set(doc.doc_processor.include_mgr.file_cache.keys() +
489                         [x[0] for x in doc.doc_processor.include_mgr.snippet_cache.keys()])
490        for path in sorted(file_names):
491            self.generateDemoPage(path)
492
493    def generateDemoPage(self, path):
494        """Generate a demo page."""
495        dirname = os.path.join(self.out_dirs['root'], os.path.dirname(path))
496        if not os.path.exists(dirname):
497            os.makedirs(dirname)
498        in_path = self.doc.doc_processor.include_mgr.resolvePath(path)
499        to_path = os.path.join(self.out_dirs['root'], path)
500        #print >>sys.stderr, in_path, '=>', to_path
501        shutil.copyfile(in_path, to_path)
502
503    def generateSearchIndex(self, doc):
504        """Generate the search index."""
505        js = ['window.searchData = [']
506        js_module = ['window.searchDataModule = [']
507        for entry in doc.top_level_entries.itervalues():
508            akas, subentries, headerfile = '', '', ''
509            if hasattr(entry, 'akas'):
510                akas = ','.join(entry.akas)
511            if hasattr(entry, 'subentries'):
512                subentries = []
513                for t in entry.subentries.values():
514                    for s in t:
515                        sID = s.title
516                        title = proc_doc.splitSecondLevelEntry(s.title)[1]
517                        subentries.append({'type': s.kind, 'name': s.name, 'title': title, 'id': sID})
518            if hasattr(entry, 'headerfiles') and len(entry.headerfiles) > 0 :
519                headerfile = entry.headerfiles[0]
520                headerfile = headerfile[headerfile.find("/")+1:-3]
521
522            if entry in self.doc.doc_processor.topLevelEntry_filenames:
523                delimiter = "/include/seqan/"
524                srcfile = self.doc.doc_processor.topLevelEntry_filenames[entry]
525                srcfile = srcfile[srcfile.find(delimiter)+len(delimiter):]
526            else :
527                srcfile = ""
528
529            js.append('  {title:%s,name:%s,text:%s,akas:%s,subentries:%s,loc:%s,langEntity:%s},' %
530                      (repr(entry.title), repr(entry.name), repr(""), repr(akas), repr(subentries),
531                       repr(self.path_converter.convert(entry.name)[0]), repr(entry.kind)))
532            js_module.append('  {definedIn:%s,srcfile:%s},' % (repr(headerfile), repr(srcfile)))
533        js.append('];')
534        js_module.append('];')
535
536        with open(os.path.join(self.out_dirs['js'], 'search.data.js'), 'wb') as f:
537            f.write('\n'.join(js))
538        with open(os.path.join(self.out_dirs['js'], 'search.data.module.js'), 'wb') as f:
539            f.write('\n'.join(js_module))
540
541    def generateLinkData(self, doc):
542        """Generate the Data for top level entry links."""
543        js = ['window.lookup = {']
544        for entry in doc.top_level_entries.itervalues():
545            js.append('    \'%(name)s\': \'%(kind)s_%(name)s\',' % { 'name': entry.name,
546                                                                     'kind': entry.kind })
547        js.append('};')
548        with open(os.path.join(self.out_dirs['js'], 'link.data.js'), 'wb') as f:
549            f.write('\n'.join(js))
550
551    def generateLanguageEntities(self):
552        """Generate language entities JavaScript file."""
553        js = self.tpl_manager.render('js/lang_entities.js', config=self.config,
554                                     development=self.args.development)
555        with open(os.path.join(self.out_dirs['js'], 'lang_entities.js'), 'wb') as f:
556            f.write(js)
557
558    def log(self, s, *args):
559        print >>sys.stderr, s % args
560
561
562