1#!/usr/local/bin/python3.8 2 3############################################################################ 4# 5# MODULE: Builds manual pages 6# AUTHOR(S): Markus Neteler 7# Glynn Clements 8# Martin Landa <landa.martin gmail.com> 9# PURPOSE: Create HTML manual page snippets 10# COPYRIGHT: (C) 2007-2021 by Glynn Clements 11# and the GRASS Development Team 12# 13# This program is free software under the GNU General 14# Public License (>=v2). Read the file COPYING that 15# comes with GRASS for details. 16# 17############################################################################# 18 19import sys 20import os 21import string 22import re 23from datetime import datetime 24import locale 25import json 26 27try: 28 # Python 2 import 29 from HTMLParser import HTMLParser 30except: 31 # Python 3 import 32 from html.parser import HTMLParser 33try: 34 import urlparse 35except: 36 import urllib.parse as urlparse 37 38if sys.version_info[0] == 2: 39 PY2 = True 40else: 41 PY2 = False 42 43 44if not PY2: 45 unicode = str 46 47 48def _get_encoding(): 49 encoding = locale.getdefaultlocale()[1] 50 if not encoding: 51 encoding = 'UTF-8' 52 return encoding 53 54 55def decode(bytes_): 56 """Decode bytes with default locale and return (unicode) string 57 58 No-op if parameter is not bytes (assumed unicode string). 59 60 :param bytes bytes_: the bytes to decode 61 """ 62 if isinstance(bytes_, unicode): 63 return bytes_ 64 if isinstance(bytes_, bytes): 65 enc = _get_encoding() 66 return bytes_.decode(enc) 67 return unicode(bytes_) 68 69 70html_page_footer_pages_path = os.getenv('HTML_PAGE_FOOTER_PAGES_PATH') if \ 71 os.getenv('HTML_PAGE_FOOTER_PAGES_PATH') else '' 72 73pgm = sys.argv[1] 74 75src_file = "%s.html" % pgm 76tmp_file = "%s.tmp.html" % pgm 77 78trunk_url = "https://github.com/OSGeo/grass/tree/master/" 79addons_url = "https://github.com/OSGeo/grass-addons/tree/master/" 80 81header_base = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"> 82<html> 83<head> 84<title>GRASS GIS Manual: ${PGM}</title> 85<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1"> 86<link rel="stylesheet" href="grassdocs.css" type="text/css"> 87</head> 88<body bgcolor="white"> 89<div id="container"> 90 91<a href="index.html"><img src="grass_logo.png" alt="GRASS logo"></a> 92<hr class="header"> 93""" 94 95header_nopgm = """<h2>${PGM}</h2> 96""" 97 98header_pgm = """<h2>NAME</h2> 99<em><b>${PGM}</b></em> 100""" 101 102header_pgm_desc = """<h2>NAME</h2> 103<em><b>${PGM}</b></em> - ${PGM_DESC} 104""" 105 106sourcecode = string.Template( 107"""<h2>SOURCE CODE</h2> 108<p>Available at: <a href="${URL_SOURCE}">${PGM} source code</a> (<a href="${URL_LOG}">history</a>)</p> 109""" 110) 111 112footer_index = string.Template( 113"""<hr class="header"> 114<p> 115<a href="index.html">Main index</a> | 116<a href="${HTML_PAGE_FOOTER_PAGES_PATH}${INDEXNAME}.html">${INDEXNAMECAP} index</a> | 117<a href="${HTML_PAGE_FOOTER_PAGES_PATH}topics.html">Topics index</a> | 118<a href="${HTML_PAGE_FOOTER_PAGES_PATH}keywords.html">Keywords index</a> | 119<a href="${HTML_PAGE_FOOTER_PAGES_PATH}graphical_index.html">Graphical index</a> | 120<a href="${HTML_PAGE_FOOTER_PAGES_PATH}full_index.html">Full index</a> 121</p> 122<p> 123© 2003-${YEAR} 124<a href="http://grass.osgeo.org">GRASS Development Team</a>, 125GRASS GIS ${GRASS_VERSION} Reference Manual 126</p> 127 128</div> 129</body> 130</html> 131""") 132 133footer_noindex = string.Template( 134"""<hr class="header"> 135<p> 136<a href="index.html">Main index</a> | 137<a href="${HTML_PAGE_FOOTER_PAGES_PATH}topics.html">Topics index</a> | 138<a href="${HTML_PAGE_FOOTER_PAGES_PATH}keywords.html">Keywords index</a> | 139<a href="${HTML_PAGE_FOOTER_PAGES_PATH}graphical_index.html">Graphical index</a> | 140<a href="${HTML_PAGE_FOOTER_PAGES_PATH}full_index.html">Full index</a> 141</p> 142<p> 143© 2003-${YEAR} 144<a href="http://grass.osgeo.org">GRASS Development Team</a>, 145GRASS GIS ${GRASS_VERSION} Reference Manual 146</p> 147 148</div> 149</body> 150</html> 151""") 152 153def read_file(name): 154 try: 155 f = open(name, 'rb') 156 s = f.read() 157 f.close() 158 if PY2: 159 return s 160 else: 161 return decode(s) 162 except IOError: 163 return "" 164 165 166def create_toc(src_data): 167 class MyHTMLParser(HTMLParser): 168 def __init__(self): 169 HTMLParser.__init__(self) 170 self.reset() 171 self.idx = 1 172 self.tag_curr = '' 173 self.tag_last = '' 174 self.process_text = False 175 self.data = [] 176 self.tags_allowed = ('h1', 'h2', 'h3') 177 self.tags_ignored = ('img') 178 self.text = '' 179 180 def handle_starttag(self, tag, attrs): 181 if tag in self.tags_allowed: 182 self.process_text = True 183 self.tag_last = self.tag_curr 184 self.tag_curr = tag 185 186 def handle_endtag(self, tag): 187 if tag in self.tags_allowed: 188 self.data.append((tag, '%s_%d' % (tag, self.idx), 189 self.text)) 190 self.idx += 1 191 self.process_text = False 192 self.text = '' 193 194 self.tag_curr = self.tag_last 195 196 def handle_data(self, data): 197 if not self.process_text: 198 return 199 if self.tag_curr in self.tags_allowed or self.tag_curr in self.tags_ignored: 200 self.text += data 201 else: 202 self.text += '<%s>%s</%s>' % (self.tag_curr, data, self.tag_curr) 203 204 # instantiate the parser and fed it some HTML 205 parser = MyHTMLParser() 206 parser.feed(src_data) 207 208 return parser.data 209 210def escape_href(label): 211 # remove html tags 212 label = re.sub('<[^<]+?>', '', label) 213 # fix 214 label = label.replace(' ', '') 215 # fix " 216 label = label.replace('"', '') 217 # replace space with underscore + lower 218 return label.replace(' ', '-').lower() 219 220def write_toc(data): 221 if not data: 222 return 223 224 fd = sys.stdout 225 fd.write('<div class="toc">\n') 226 fd.write('<h4 class="toc">Table of contents</h4>\n') 227 fd.write('<ul class="toc">\n') 228 first = True 229 has_h2 = False 230 in_h3 = False 231 indent = 4 232 for tag, href, text in data: 233 if tag == 'h3' and not in_h3 and has_h2: 234 fd.write('\n%s<ul class="toc">\n' % (' ' * indent)) 235 indent += 4 236 in_h3 = True 237 elif not first: 238 fd.write('</li>\n') 239 240 if tag == 'h2': 241 has_h2 = True 242 if in_h3: 243 indent -= 4 244 fd.write('%s</ul></li>\n' % (' ' * indent)) 245 in_h3 = False 246 247 text = text.replace(u'\xa0', u' ') 248 fd.write('%s<li class="toc"><a href="#%s" class="toc">%s</a>' % \ 249 (' ' * indent, escape_href(text), text)) 250 first = False 251 252 fd.write('</li>\n</ul>\n') 253 fd.write('</div>\n') 254 255def update_toc(data): 256 ret_data = [] 257 pat = re.compile(r'(<(h[2|3])>)(.+)(</h[2|3]>)') 258 idx = 1 259 for line in data.splitlines(): 260 if pat.search(line): 261 xline = pat.split(line) 262 line = xline[1] + '<a name="%s">' % escape_href(xline[3]) + xline[3] + '</a>' + xline[4] 263 idx += 1 264 ret_data.append(line) 265 266 return '\n'.join(ret_data) 267 268 269def get_addon_path(pgm): 270 """Check if pgm is in addons list and get addon path 271 272 :param pgm str: pgm 273 274 :return tuple: (True, path) if pgm is addon else (None, None) 275 """ 276 addon_base = os.getenv('GRASS_ADDON_BASE') 277 if addon_base: 278 """'addons_paths.json' is file created during install extension 279 check get_addons_paths() function in the g.extension.py file 280 """ 281 addons_paths = os.path.join(addon_base, 'addons_paths.json') 282 if os.path.exists(addons_paths): 283 with open(addons_paths, 'r') as f: 284 addons_paths = json.load(f) 285 for addon in addons_paths['tree']: 286 split_path = addon['path'].split('/') 287 root_dir, module_dir = split_path[0], split_path[-1] 288 if 'grass7' == root_dir and pgm == module_dir: 289 return True, addon['path'] 290 return None, None 291 292 293# process header 294src_data = read_file(src_file) 295name = re.search('(<!-- meta page name:)(.*)(-->)', src_data, re.IGNORECASE) 296pgm_desc = None 297if name: 298 pgm = name.group(2).strip().split('-', 1)[0].strip() 299 name_desc = re.search('(<!-- meta page name description:)(.*)(-->)', src_data, re.IGNORECASE) 300 if name_desc: 301 pgm_desc = name_desc.group(2).strip() 302desc = re.search('(<!-- meta page description:)(.*)(-->)', src_data, 303 re.IGNORECASE) 304if desc: 305 pgm = desc.group(2).strip() 306 header_tmpl = string.Template(header_base + header_nopgm) 307else: 308 if not pgm_desc: 309 header_tmpl = string.Template(header_base + header_pgm) 310 else: 311 header_tmpl = string.Template(header_base + header_pgm_desc) 312 313if not re.search('<html>', src_data, re.IGNORECASE): 314 tmp_data = read_file(tmp_file) 315 """ 316 Adjusting keywords html pages paths if add-on html man page 317 stored on the server 318 """ 319 if html_page_footer_pages_path: 320 new_keywords_paths = [] 321 orig_keywords_paths = re.search( 322 r'<h[1-9]>KEYWORDS</h[1-9]>(.*?)<h[1-9]>', 323 tmp_data, re.DOTALL, 324 ) 325 if orig_keywords_paths: 326 search_txt = 'href="' 327 for i in orig_keywords_paths.group(1).split(','): 328 if search_txt in i: 329 index = i.index(search_txt) + len(search_txt) 330 new_keywords_paths.append( 331 i[:index] + html_page_footer_pages_path + i[index:], 332 ) 333 if new_keywords_paths: 334 tmp_data = tmp_data.replace( 335 orig_keywords_paths.group(1), 336 ','.join(new_keywords_paths), 337 ) 338 if not re.search('<html>', tmp_data, re.IGNORECASE): 339 sys.stdout.write(header_tmpl.substitute(PGM=pgm, PGM_DESC=pgm_desc)) 340 if tmp_data: 341 for line in tmp_data.splitlines(True): 342 if not re.search('</body>|</html>', line, re.IGNORECASE): 343 sys.stdout.write(line) 344 345# create TOC 346write_toc(create_toc(src_data)) 347 348# process body 349sys.stdout.write(update_toc(src_data)) 350 351# if </html> is found, suppose a complete html is provided. 352# otherwise, generate module class reference: 353if re.search('</html>', src_data, re.IGNORECASE): 354 sys.exit() 355 356index_names = { 357 'd' : 'display', 358 'db': 'database', 359 'g' : 'general', 360 'i' : 'imagery', 361 'm' : 'miscellaneous', 362 'ps': 'postscript', 363 'p' : 'paint', 364 'r' : 'raster', 365 'r3': 'raster3d', 366 's' : 'sites', 367 't' : 'temporal', 368 'v' : 'vector' 369 } 370 371 372def to_title(name): 373 """Convert name of command class/family to form suitable for title""" 374 if name == 'raster3d': 375 return '3D raster' 376 elif name == 'postscript': 377 return 'PostScript' 378 else: 379 return name.capitalize() 380 381 382index_titles = {} 383for key, name in index_names.items(): 384 index_titles[key] = to_title(name) 385 386# process footer 387index = re.search('(<!-- meta page index:)(.*)(-->)', src_data, re.IGNORECASE) 388if index: 389 index_name = index.group(2).strip() 390 if '|' in index_name: 391 index_name, index_name_cap = index_name.split('|', 1) 392 else: 393 index_name_cap = to_title(index_name) 394else: 395 mod_class = pgm.split('.', 1)[0] 396 index_name = index_names.get(mod_class, '') 397 index_name_cap = index_titles.get(mod_class, '') 398 399grass_version = os.getenv("VERSION_NUMBER", "unknown") 400year = os.getenv("VERSION_DATE") 401if not year: 402 year = str(datetime.now().year) 403 404# check the names of scripts to assign the right folder 405topdir = os.path.abspath(os.getenv("MODULE_TOPDIR")) 406curdir = os.path.abspath(os.path.curdir) 407if curdir.startswith(topdir): 408 source_url = trunk_url 409 pgmdir = curdir.replace(topdir, '').lstrip(os.path.sep) 410else: 411 # addons 412 source_url = addons_url 413 pgmdir = os.path.sep.join(curdir.split(os.path.sep)[-3:]) 414url_source = '' 415if os.getenv('SOURCE_URL', ''): 416 # addons 417 for prefix in index_names.keys(): 418 cwd = os.getcwd() 419 idx = cwd.find('{0}{1}.'.format(os.path.sep, prefix)) 420 if idx > -1: 421 pgmname = cwd[idx+1:] 422 classname = index_names[prefix] 423 url_source = urlparse.urljoin('{0}{1}/'.format( 424 os.environ['SOURCE_URL'], classname), 425 pgmname 426 ) 427 break 428else: 429 url_source = urlparse.urljoin(source_url, pgmdir) 430if sys.platform == 'win32': 431 url_source = url_source.replace(os.path.sep, '/') 432 433if index_name: 434 tree = 'grass/tree' 435 commits = 'grass/commits' 436 is_addon, addon_path = get_addon_path(pgm=pgm) 437 if is_addon: 438 # Fix gui/wxpython addon url path 439 url_source = urlparse.urljoin( 440 os.environ['SOURCE_URL'], addon_path.split('/', 1)[1], 441 ) 442 tree = 'grass-addons/tree' 443 commits = 'grass-addons/commits' 444 445 sys.stdout.write(sourcecode.substitute( 446 URL_SOURCE=url_source, PGM=pgm, URL_LOG=url_source.replace( 447 tree, commits))) 448 sys.stdout.write( 449 footer_index.substitute( 450 INDEXNAME=index_name, 451 INDEXNAMECAP=index_name_cap, 452 YEAR=year, 453 GRASS_VERSION=grass_version, 454 HTML_PAGE_FOOTER_PAGES_PATH=html_page_footer_pages_path, 455 ), 456 ) 457else: 458 sys.stdout.write( 459 footer_noindex.substitute( 460 YEAR=year, 461 GRASS_VERSION=grass_version, 462 HTML_PAGE_FOOTER_PAGES_PATH=html_page_footer_pages_path, 463 ), 464 ) 465