1#!/usr/local/bin/python3.8
2
3############################################################################
4#
5# MODULE:       Builds manual pages
6# AUTHOR(S):    Markus Neteler
7#               Glynn Clements
8#               Martin Landa <landa.martin gmail.com>
9# PURPOSE:      Create HTML manual page snippets
10# COPYRIGHT:    (C) 2007-2021 by Glynn Clements
11#                and the GRASS Development Team
12#
13#               This program is free software under the GNU General
14#               Public License (>=v2). Read the file COPYING that
15#               comes with GRASS for details.
16#
17#############################################################################
18
19import sys
20import os
21import string
22import re
23from datetime import datetime
24import locale
25import json
26
27try:
28    # Python 2 import
29    from HTMLParser import HTMLParser
30except:
31    # Python 3 import
32    from html.parser import HTMLParser
33try:
34    import urlparse
35except:
36    import urllib.parse as urlparse
37
38if sys.version_info[0] == 2:
39    PY2 = True
40else:
41    PY2 = False
42
43
44if not PY2:
45    unicode = str
46
47
48def _get_encoding():
49    encoding = locale.getdefaultlocale()[1]
50    if not encoding:
51        encoding = 'UTF-8'
52    return encoding
53
54
55def decode(bytes_):
56    """Decode bytes with default locale and return (unicode) string
57
58    No-op if parameter is not bytes (assumed unicode string).
59
60    :param bytes bytes_: the bytes to decode
61    """
62    if isinstance(bytes_, unicode):
63        return bytes_
64    if isinstance(bytes_, bytes):
65        enc = _get_encoding()
66        return bytes_.decode(enc)
67    return unicode(bytes_)
68
69
70html_page_footer_pages_path = os.getenv('HTML_PAGE_FOOTER_PAGES_PATH') if \
71    os.getenv('HTML_PAGE_FOOTER_PAGES_PATH') else ''
72
73pgm = sys.argv[1]
74
75src_file = "%s.html" % pgm
76tmp_file = "%s.tmp.html" % pgm
77
78trunk_url = "https://github.com/OSGeo/grass/tree/master/"
79addons_url = "https://github.com/OSGeo/grass-addons/tree/master/"
80
81header_base = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">
82<html>
83<head>
84<title>GRASS GIS Manual: ${PGM}</title>
85<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
86<link rel="stylesheet" href="grassdocs.css" type="text/css">
87</head>
88<body bgcolor="white">
89<div id="container">
90
91<a href="index.html"><img src="grass_logo.png" alt="GRASS logo"></a>
92<hr class="header">
93"""
94
95header_nopgm = """<h2>${PGM}</h2>
96"""
97
98header_pgm = """<h2>NAME</h2>
99<em><b>${PGM}</b></em>
100"""
101
102header_pgm_desc = """<h2>NAME</h2>
103<em><b>${PGM}</b></em> - ${PGM_DESC}
104"""
105
106sourcecode = string.Template(
107"""<h2>SOURCE CODE</h2>
108<p>Available at: <a href="${URL_SOURCE}">${PGM} source code</a> (<a href="${URL_LOG}">history</a>)</p>
109"""
110)
111
112footer_index = string.Template(
113"""<hr class="header">
114<p>
115<a href="index.html">Main index</a> |
116<a href="${HTML_PAGE_FOOTER_PAGES_PATH}${INDEXNAME}.html">${INDEXNAMECAP} index</a> |
117<a href="${HTML_PAGE_FOOTER_PAGES_PATH}topics.html">Topics index</a> |
118<a href="${HTML_PAGE_FOOTER_PAGES_PATH}keywords.html">Keywords index</a> |
119<a href="${HTML_PAGE_FOOTER_PAGES_PATH}graphical_index.html">Graphical index</a> |
120<a href="${HTML_PAGE_FOOTER_PAGES_PATH}full_index.html">Full index</a>
121</p>
122<p>
123&copy; 2003-${YEAR}
124<a href="http://grass.osgeo.org">GRASS Development Team</a>,
125GRASS GIS ${GRASS_VERSION} Reference Manual
126</p>
127
128</div>
129</body>
130</html>
131""")
132
133footer_noindex = string.Template(
134"""<hr class="header">
135<p>
136<a href="index.html">Main index</a> |
137<a href="${HTML_PAGE_FOOTER_PAGES_PATH}topics.html">Topics index</a> |
138<a href="${HTML_PAGE_FOOTER_PAGES_PATH}keywords.html">Keywords index</a> |
139<a href="${HTML_PAGE_FOOTER_PAGES_PATH}graphical_index.html">Graphical index</a> |
140<a href="${HTML_PAGE_FOOTER_PAGES_PATH}full_index.html">Full index</a>
141</p>
142<p>
143&copy; 2003-${YEAR}
144<a href="http://grass.osgeo.org">GRASS Development Team</a>,
145GRASS GIS ${GRASS_VERSION} Reference Manual
146</p>
147
148</div>
149</body>
150</html>
151""")
152
153def read_file(name):
154    try:
155        f = open(name, 'rb')
156        s = f.read()
157        f.close()
158        if PY2:
159            return s
160        else:
161            return decode(s)
162    except IOError:
163        return ""
164
165
166def create_toc(src_data):
167    class MyHTMLParser(HTMLParser):
168        def __init__(self):
169            HTMLParser.__init__(self)
170            self.reset()
171            self.idx = 1
172            self.tag_curr = ''
173            self.tag_last = ''
174            self.process_text = False
175            self.data = []
176            self.tags_allowed = ('h1', 'h2', 'h3')
177            self.tags_ignored = ('img')
178            self.text = ''
179
180        def handle_starttag(self, tag, attrs):
181            if tag in self.tags_allowed:
182                self.process_text = True
183            self.tag_last = self.tag_curr
184            self.tag_curr = tag
185
186        def handle_endtag(self, tag):
187            if tag in self.tags_allowed:
188                self.data.append((tag, '%s_%d' % (tag, self.idx),
189                                  self.text))
190                self.idx += 1
191                self.process_text = False
192                self.text = ''
193
194            self.tag_curr = self.tag_last
195
196        def handle_data(self, data):
197            if not self.process_text:
198                return
199            if self.tag_curr in self.tags_allowed or self.tag_curr in self.tags_ignored:
200                self.text += data
201            else:
202                self.text += '<%s>%s</%s>' % (self.tag_curr, data, self.tag_curr)
203
204    # instantiate the parser and fed it some HTML
205    parser = MyHTMLParser()
206    parser.feed(src_data)
207
208    return parser.data
209
210def escape_href(label):
211    # remove html tags
212    label = re.sub('<[^<]+?>', '', label)
213    # fix &nbsp;
214    label = label.replace('&nbsp;', '')
215    # fix "
216    label = label.replace('"', '')
217    # replace space with underscore + lower
218    return label.replace(' ', '-').lower()
219
220def write_toc(data):
221    if not data:
222        return
223
224    fd = sys.stdout
225    fd.write('<div class="toc">\n')
226    fd.write('<h4 class="toc">Table of contents</h4>\n')
227    fd.write('<ul class="toc">\n')
228    first = True
229    has_h2 = False
230    in_h3 = False
231    indent = 4
232    for tag, href, text in data:
233        if tag == 'h3' and not in_h3 and has_h2:
234            fd.write('\n%s<ul class="toc">\n' % (' ' * indent))
235            indent += 4
236            in_h3 = True
237        elif not first:
238            fd.write('</li>\n')
239
240        if tag == 'h2':
241            has_h2 = True
242            if in_h3:
243                indent -= 4
244                fd.write('%s</ul></li>\n' % (' ' * indent))
245                in_h3 = False
246
247        text = text.replace(u'\xa0', u' ')
248        fd.write('%s<li class="toc"><a href="#%s" class="toc">%s</a>' % \
249                     (' ' * indent, escape_href(text), text))
250        first = False
251
252    fd.write('</li>\n</ul>\n')
253    fd.write('</div>\n')
254
255def update_toc(data):
256    ret_data = []
257    pat = re.compile(r'(<(h[2|3])>)(.+)(</h[2|3]>)')
258    idx = 1
259    for line in data.splitlines():
260        if pat.search(line):
261            xline = pat.split(line)
262            line = xline[1] + '<a name="%s">' % escape_href(xline[3]) + xline[3] + '</a>' + xline[4]
263            idx += 1
264        ret_data.append(line)
265
266    return '\n'.join(ret_data)
267
268
269def get_addon_path(pgm):
270    """Check if pgm is in addons list and get addon path
271
272    :param pgm str: pgm
273
274    :return tuple: (True, path) if pgm is addon else (None, None)
275    """
276    addon_base = os.getenv('GRASS_ADDON_BASE')
277    if addon_base:
278        """'addons_paths.json' is file created during install extension
279        check get_addons_paths() function in the g.extension.py file
280        """
281        addons_paths = os.path.join(addon_base, 'addons_paths.json')
282        if os.path.exists(addons_paths):
283            with open(addons_paths, 'r') as f:
284                addons_paths = json.load(f)
285            for addon in addons_paths['tree']:
286                split_path = addon['path'].split('/')
287                root_dir, module_dir = split_path[0], split_path[-1]
288                if 'grass7' == root_dir and pgm == module_dir:
289                    return True, addon['path']
290    return None, None
291
292
293# process header
294src_data = read_file(src_file)
295name = re.search('(<!-- meta page name:)(.*)(-->)', src_data, re.IGNORECASE)
296pgm_desc = None
297if name:
298    pgm = name.group(2).strip().split('-', 1)[0].strip()
299    name_desc = re.search('(<!-- meta page name description:)(.*)(-->)', src_data, re.IGNORECASE)
300    if name_desc:
301        pgm_desc = name_desc.group(2).strip()
302desc = re.search('(<!-- meta page description:)(.*)(-->)', src_data,
303                 re.IGNORECASE)
304if desc:
305    pgm = desc.group(2).strip()
306    header_tmpl = string.Template(header_base + header_nopgm)
307else:
308    if not pgm_desc:
309        header_tmpl = string.Template(header_base + header_pgm)
310    else:
311        header_tmpl = string.Template(header_base + header_pgm_desc)
312
313if not re.search('<html>', src_data, re.IGNORECASE):
314    tmp_data = read_file(tmp_file)
315    """
316    Adjusting keywords html pages paths if add-on html man page
317    stored on the server
318    """
319    if html_page_footer_pages_path:
320        new_keywords_paths = []
321        orig_keywords_paths = re.search(
322            r'<h[1-9]>KEYWORDS</h[1-9]>(.*?)<h[1-9]>',
323            tmp_data, re.DOTALL,
324        )
325        if orig_keywords_paths:
326            search_txt = 'href="'
327            for i in orig_keywords_paths.group(1).split(','):
328                if search_txt in i:
329                    index = i.index(search_txt) + len(search_txt)
330                    new_keywords_paths.append(
331                        i[:index] + html_page_footer_pages_path + i[index:],
332                    )
333        if new_keywords_paths:
334            tmp_data = tmp_data.replace(
335                orig_keywords_paths.group(1),
336                ','.join(new_keywords_paths),
337            )
338    if not re.search('<html>', tmp_data, re.IGNORECASE):
339        sys.stdout.write(header_tmpl.substitute(PGM=pgm, PGM_DESC=pgm_desc))
340    if tmp_data:
341        for line in tmp_data.splitlines(True):
342            if not re.search('</body>|</html>', line, re.IGNORECASE):
343                sys.stdout.write(line)
344
345# create TOC
346write_toc(create_toc(src_data))
347
348# process body
349sys.stdout.write(update_toc(src_data))
350
351# if </html> is found, suppose a complete html is provided.
352# otherwise, generate module class reference:
353if re.search('</html>', src_data, re.IGNORECASE):
354    sys.exit()
355
356index_names = {
357    'd' : 'display',
358    'db': 'database',
359    'g' : 'general',
360    'i' : 'imagery',
361    'm' : 'miscellaneous',
362    'ps': 'postscript',
363    'p' : 'paint',
364    'r' : 'raster',
365    'r3': 'raster3d',
366    's' : 'sites',
367    't' : 'temporal',
368    'v' : 'vector'
369    }
370
371
372def to_title(name):
373    """Convert name of command class/family to form suitable for title"""
374    if name == 'raster3d':
375        return '3D raster'
376    elif name == 'postscript':
377        return 'PostScript'
378    else:
379        return name.capitalize()
380
381
382index_titles = {}
383for key, name in index_names.items():
384    index_titles[key] = to_title(name)
385
386# process footer
387index = re.search('(<!-- meta page index:)(.*)(-->)', src_data, re.IGNORECASE)
388if index:
389    index_name = index.group(2).strip()
390    if '|' in index_name:
391        index_name, index_name_cap = index_name.split('|', 1)
392    else:
393        index_name_cap = to_title(index_name)
394else:
395    mod_class = pgm.split('.', 1)[0]
396    index_name = index_names.get(mod_class, '')
397    index_name_cap = index_titles.get(mod_class, '')
398
399grass_version = os.getenv("VERSION_NUMBER", "unknown")
400year = os.getenv("VERSION_DATE")
401if not year:
402    year = str(datetime.now().year)
403
404# check the names of scripts to assign the right folder
405topdir = os.path.abspath(os.getenv("MODULE_TOPDIR"))
406curdir = os.path.abspath(os.path.curdir)
407if curdir.startswith(topdir):
408    source_url = trunk_url
409    pgmdir = curdir.replace(topdir, '').lstrip(os.path.sep)
410else:
411    # addons
412    source_url = addons_url
413    pgmdir = os.path.sep.join(curdir.split(os.path.sep)[-3:])
414url_source = ''
415if os.getenv('SOURCE_URL', ''):
416    # addons
417    for prefix in index_names.keys():
418        cwd = os.getcwd()
419        idx = cwd.find('{0}{1}.'.format(os.path.sep, prefix))
420        if idx > -1:
421            pgmname = cwd[idx+1:]
422            classname = index_names[prefix]
423            url_source = urlparse.urljoin('{0}{1}/'.format(
424                    os.environ['SOURCE_URL'], classname),
425                    pgmname
426            )
427            break
428else:
429    url_source = urlparse.urljoin(source_url, pgmdir)
430if sys.platform == 'win32':
431    url_source = url_source.replace(os.path.sep, '/')
432
433if index_name:
434    tree = 'grass/tree'
435    commits = 'grass/commits'
436    is_addon, addon_path = get_addon_path(pgm=pgm)
437    if is_addon:
438        # Fix gui/wxpython addon url path
439        url_source = urlparse.urljoin(
440            os.environ['SOURCE_URL'], addon_path.split('/', 1)[1],
441        )
442        tree = 'grass-addons/tree'
443        commits = 'grass-addons/commits'
444
445    sys.stdout.write(sourcecode.substitute(
446        URL_SOURCE=url_source, PGM=pgm, URL_LOG=url_source.replace(
447            tree,  commits)))
448    sys.stdout.write(
449        footer_index.substitute(
450            INDEXNAME=index_name,
451            INDEXNAMECAP=index_name_cap,
452            YEAR=year,
453            GRASS_VERSION=grass_version,
454            HTML_PAGE_FOOTER_PAGES_PATH=html_page_footer_pages_path,
455        ),
456    )
457else:
458    sys.stdout.write(
459        footer_noindex.substitute(
460            YEAR=year,
461            GRASS_VERSION=grass_version,
462            HTML_PAGE_FOOTER_PAGES_PATH=html_page_footer_pages_path,
463        ),
464    )
465