1# $Id: __init__.py 8671 2021-04-07 12:09:51Z milde $ 2# Author: David Goodger <goodger@python.org> 3# Copyright: This module has been placed in the public domain. 4 5""" 6This is ``docutils.parsers.rst`` package. It exports a single class, `Parser`, 7the reStructuredText parser. 8 9 10Usage 11===== 12 131. Create a parser:: 14 15 parser = docutils.parsers.rst.Parser() 16 17 Several optional arguments may be passed to modify the parser's behavior. 18 Please see `Customizing the Parser`_ below for details. 19 202. Gather input (a multi-line string), by reading a file or the standard 21 input:: 22 23 input = sys.stdin.read() 24 253. Create a new empty `docutils.nodes.document` tree:: 26 27 document = docutils.utils.new_document(source, settings) 28 29 See `docutils.utils.new_document()` for parameter details. 30 314. Run the parser, populating the document tree:: 32 33 parser.parse(input, document) 34 35 36Parser Overview 37=============== 38 39The reStructuredText parser is implemented as a state machine, examining its 40input one line at a time. To understand how the parser works, please first 41become familiar with the `docutils.statemachine` module, then see the 42`states` module. 43 44 45Customizing the Parser 46---------------------- 47 48Anything that isn't already customizable is that way simply because that type 49of customizability hasn't been implemented yet. Patches welcome! 50 51When instantiating an object of the `Parser` class, two parameters may be 52passed: ``rfc2822`` and ``inliner``. Pass ``rfc2822=True`` to enable an 53initial RFC-2822 style header block, parsed as a "field_list" element (with 54"class" attribute set to "rfc2822"). Currently this is the only body-level 55element which is customizable without subclassing. (Tip: subclass `Parser` 56and change its "state_classes" and "initial_state" attributes to refer to new 57classes. Contact the author if you need more details.) 58 59The ``inliner`` parameter takes an instance of `states.Inliner` or a subclass. 60It handles inline markup recognition. A common extension is the addition of 61further implicit hyperlinks, like "RFC 2822". This can be done by subclassing 62`states.Inliner`, adding a new method for the implicit markup, and adding a 63``(pattern, method)`` pair to the "implicit_dispatch" attribute of the 64subclass. See `states.Inliner.implicit_inline()` for details. Explicit 65inline markup can be customized in a `states.Inliner` subclass via the 66``patterns.initial`` and ``dispatch`` attributes (and new methods as 67appropriate). 68""" 69 70__docformat__ = 'reStructuredText' 71 72 73import docutils.parsers 74import docutils.statemachine 75from docutils.parsers.rst import roles, states 76from docutils import frontend, nodes, Component 77from docutils.transforms import universal 78 79 80class Parser(docutils.parsers.Parser): 81 82 """The reStructuredText parser.""" 83 84 supported = ('restructuredtext', 'rst', 'rest', 'restx', 'rtxt', 'rstx') 85 """Aliases this parser supports.""" 86 87 settings_spec = docutils.parsers.Parser.settings_spec + ( 88 'reStructuredText Parser Options', 89 None, 90 (('Recognize and link to standalone PEP references (like "PEP 258").', 91 ['--pep-references'], 92 {'action': 'store_true', 'validator': frontend.validate_boolean}), 93 ('Base URL for PEP references ' 94 '(default "http://www.python.org/dev/peps/").', 95 ['--pep-base-url'], 96 {'metavar': '<URL>', 'default': 'http://www.python.org/dev/peps/', 97 'validator': frontend.validate_url_trailing_slash}), 98 ('Template for PEP file part of URL. (default "pep-%04d")', 99 ['--pep-file-url-template'], 100 {'metavar': '<URL>', 'default': 'pep-%04d'}), 101 ('Recognize and link to standalone RFC references (like "RFC 822").', 102 ['--rfc-references'], 103 {'action': 'store_true', 'validator': frontend.validate_boolean}), 104 ('Base URL for RFC references (default "http://tools.ietf.org/html/").', 105 ['--rfc-base-url'], 106 {'metavar': '<URL>', 'default': 'http://tools.ietf.org/html/', 107 'validator': frontend.validate_url_trailing_slash}), 108 ('Set number of spaces for tab expansion (default 8).', 109 ['--tab-width'], 110 {'metavar': '<width>', 'type': 'int', 'default': 8, 111 'validator': frontend.validate_nonnegative_int}), 112 ('Remove spaces before footnote references.', 113 ['--trim-footnote-reference-space'], 114 {'action': 'store_true', 'validator': frontend.validate_boolean}), 115 ('Leave spaces before footnote references.', 116 ['--leave-footnote-reference-space'], 117 {'action': 'store_false', 'dest': 'trim_footnote_reference_space'}), 118 ('Token name set for parsing code with Pygments: one of ' 119 '"long", "short", or "none" (no parsing). Default is "long".', 120 ['--syntax-highlight'], 121 {'choices': ['long', 'short', 'none'], 122 'default': 'long', 'metavar': '<format>'}), 123 ('Change straight quotation marks to typographic form: ' 124 'one of "yes", "no", "alt[ernative]" (default "no").', 125 ['--smart-quotes'], 126 {'default': False, 'metavar': '<yes/no/alt>', 127 'validator': frontend.validate_ternary}), 128 ('Characters to use as "smart quotes" for <language>. ', 129 ['--smartquotes-locales'], 130 {'metavar': '<language:quotes[,language:quotes,...]>', 131 'action': 'append', 132 'validator': frontend.validate_smartquotes_locales}), 133 ('Inline markup recognized at word boundaries only ' 134 '(adjacent to punctuation or whitespace). ' 135 'Force character-level inline markup recognition with ' 136 '"\\ " (backslash + space). Default.', 137 ['--word-level-inline-markup'], 138 {'action': 'store_false', 'dest': 'character_level_inline_markup'}), 139 ('Inline markup recognized anywhere, regardless of surrounding ' 140 'characters. Backslash-escapes must be used to avoid unwanted ' 141 'markup recognition. Useful for East Asian languages. ' 142 'Experimental.', 143 ['--character-level-inline-markup'], 144 {'action': 'store_true', 'default': False, 145 'dest': 'character_level_inline_markup'}), 146 )) 147 148 config_section = 'restructuredtext parser' 149 config_section_dependencies = ('parsers',) 150 151 def __init__(self, rfc2822=False, inliner=None): 152 if rfc2822: 153 self.initial_state = 'RFC2822Body' 154 else: 155 self.initial_state = 'Body' 156 self.state_classes = states.state_classes 157 self.inliner = inliner 158 159 def get_transforms(self): 160 return Component.get_transforms(self) + [ 161 universal.SmartQuotes] 162 163 def parse(self, inputstring, document): 164 """Parse `inputstring` and populate `document`, a document tree.""" 165 self.setup_parse(inputstring, document) 166 # provide fallbacks in case the document has only generic settings 167 self.document.settings.setdefault('tab_width', 8) 168 self.document.settings.setdefault('syntax_highlight', 'long') 169 self.statemachine = states.RSTStateMachine( 170 state_classes=self.state_classes, 171 initial_state=self.initial_state, 172 debug=document.reporter.debug_flag) 173 inputlines = docutils.statemachine.string2lines( 174 inputstring, tab_width=document.settings.tab_width, 175 convert_whitespace=True) 176 for i, line in enumerate(inputlines): 177 if len(line) > self.document.settings.line_length_limit: 178 error = self.document.reporter.error( 179 'Line %d exceeds the line-length-limit.'%(i+1)) 180 self.document.append(error) 181 break 182 else: 183 self.statemachine.run(inputlines, document, inliner=self.inliner) 184 # restore the "default" default role after parsing a document 185 if '' in roles._roles: 186 del roles._roles[''] 187 self.finish_parse() 188 189 190class DirectiveError(Exception): 191 192 """ 193 Store a message and a system message level. 194 195 To be thrown from inside directive code. 196 197 Do not instantiate directly -- use `Directive.directive_error()` 198 instead! 199 """ 200 201 def __init__(self, level, message): 202 """Set error `message` and `level`""" 203 Exception.__init__(self) 204 self.level = level 205 self.msg = message 206 207 208class Directive(object): 209 210 """ 211 Base class for reStructuredText directives. 212 213 The following attributes may be set by subclasses. They are 214 interpreted by the directive parser (which runs the directive 215 class): 216 217 - `required_arguments`: The number of required arguments (default: 218 0). 219 220 - `optional_arguments`: The number of optional arguments (default: 221 0). 222 223 - `final_argument_whitespace`: A boolean, indicating if the final 224 argument may contain whitespace (default: False). 225 226 - `option_spec`: A dictionary, mapping known option names to 227 conversion functions such as `int` or `float` (default: {}, no 228 options). Several conversion functions are defined in the 229 directives/__init__.py module. 230 231 Option conversion functions take a single parameter, the option 232 argument (a string or ``None``), validate it and/or convert it 233 to the appropriate form. Conversion functions may raise 234 `ValueError` and `TypeError` exceptions. 235 236 - `has_content`: A boolean; True if content is allowed. Client 237 code must handle the case where content is required but not 238 supplied (an empty content list will be supplied). 239 240 Arguments are normally single whitespace-separated words. The 241 final argument may contain whitespace and/or newlines if 242 `final_argument_whitespace` is True. 243 244 If the form of the arguments is more complex, specify only one 245 argument (either required or optional) and set 246 `final_argument_whitespace` to True; the client code must do any 247 context-sensitive parsing. 248 249 When a directive implementation is being run, the directive class 250 is instantiated, and the `run()` method is executed. During 251 instantiation, the following instance variables are set: 252 253 - ``name`` is the directive type or name (string). 254 255 - ``arguments`` is the list of positional arguments (strings). 256 257 - ``options`` is a dictionary mapping option names (strings) to 258 values (type depends on option conversion functions; see 259 `option_spec` above). 260 261 - ``content`` is a list of strings, the directive content line by line. 262 263 - ``lineno`` is the absolute line number of the first line 264 of the directive. 265 266 - ``content_offset`` is the line offset of the first line of the content from 267 the beginning of the current input. Used when initiating a nested parse. 268 269 - ``block_text`` is a string containing the entire directive. 270 271 - ``state`` is the state which called the directive function. 272 273 - ``state_machine`` is the state machine which controls the state which called 274 the directive function. 275 276 Directive functions return a list of nodes which will be inserted 277 into the document tree at the point where the directive was 278 encountered. This can be an empty list if there is nothing to 279 insert. 280 281 For ordinary directives, the list must contain body elements or 282 structural elements. Some directives are intended specifically 283 for substitution definitions, and must return a list of `Text` 284 nodes and/or inline elements (suitable for inline insertion, in 285 place of the substitution reference). Such directives must verify 286 substitution definition context, typically using code like this:: 287 288 if not isinstance(state, states.SubstitutionDef): 289 error = state_machine.reporter.error( 290 'Invalid context: the "%s" directive can only be used ' 291 'within a substitution definition.' % (name), 292 nodes.literal_block(block_text, block_text), line=lineno) 293 return [error] 294 """ 295 296 # There is a "Creating reStructuredText Directives" how-to at 297 # <http://docutils.sf.net/docs/howto/rst-directives.html>. If you 298 # update this docstring, please update the how-to as well. 299 300 required_arguments = 0 301 """Number of required directive arguments.""" 302 303 optional_arguments = 0 304 """Number of optional arguments after the required arguments.""" 305 306 final_argument_whitespace = False 307 """May the final argument contain whitespace?""" 308 309 option_spec = None 310 """Mapping of option names to validator functions.""" 311 312 has_content = False 313 """May the directive have content?""" 314 315 def __init__(self, name, arguments, options, content, lineno, 316 content_offset, block_text, state, state_machine): 317 self.name = name 318 self.arguments = arguments 319 self.options = options 320 self.content = content 321 self.lineno = lineno 322 self.content_offset = content_offset 323 self.block_text = block_text 324 self.state = state 325 self.state_machine = state_machine 326 327 def run(self): 328 raise NotImplementedError('Must override run() is subclass.') 329 330 # Directive errors: 331 332 def directive_error(self, level, message): 333 """ 334 Return a DirectiveError suitable for being thrown as an exception. 335 336 Call "raise self.directive_error(level, message)" from within 337 a directive implementation to return one single system message 338 at level `level`, which automatically gets the directive block 339 and the line number added. 340 341 Preferably use the `debug`, `info`, `warning`, `error`, or `severe` 342 wrapper methods, e.g. ``self.error(message)`` to generate an 343 ERROR-level directive error. 344 """ 345 return DirectiveError(level, message) 346 347 def debug(self, message): 348 return self.directive_error(0, message) 349 350 def info(self, message): 351 return self.directive_error(1, message) 352 353 def warning(self, message): 354 return self.directive_error(2, message) 355 356 def error(self, message): 357 return self.directive_error(3, message) 358 359 def severe(self, message): 360 return self.directive_error(4, message) 361 362 # Convenience methods: 363 364 def assert_has_content(self): 365 """ 366 Throw an ERROR-level DirectiveError if the directive doesn't 367 have contents. 368 """ 369 if not self.content: 370 raise self.error('Content block expected for the "%s" directive; ' 371 'none found.' % self.name) 372 373 def add_name(self, node): 374 """Append self.options['name'] to node['names'] if it exists. 375 376 Also normalize the name string and register it as explicit target. 377 """ 378 if 'name' in self.options: 379 name = nodes.fully_normalize_name(self.options.pop('name')) 380 if 'name' in node: 381 del(node['name']) 382 node['names'].append(name) 383 self.state.document.note_explicit_target(node, node) 384 385 386def convert_directive_function(directive_fn): 387 """ 388 Define & return a directive class generated from `directive_fn`. 389 390 `directive_fn` uses the old-style, functional interface. 391 """ 392 393 class FunctionalDirective(Directive): 394 395 option_spec = getattr(directive_fn, 'options', None) 396 has_content = getattr(directive_fn, 'content', False) 397 _argument_spec = getattr(directive_fn, 'arguments', (0, 0, False)) 398 required_arguments, optional_arguments, final_argument_whitespace \ 399 = _argument_spec 400 401 def run(self): 402 return directive_fn( 403 self.name, self.arguments, self.options, self.content, 404 self.lineno, self.content_offset, self.block_text, 405 self.state, self.state_machine) 406 407 # Return new-style directive. 408 return FunctionalDirective 409