1# $Id: __init__.py 8671 2021-04-07 12:09:51Z milde $
2# Author: David Goodger <goodger@python.org>
3# Copyright: This module has been placed in the public domain.
4
5"""
6This is ``docutils.parsers.rst`` package. It exports a single class, `Parser`,
7the reStructuredText parser.
8
9
10Usage
11=====
12
131. Create a parser::
14
15       parser = docutils.parsers.rst.Parser()
16
17   Several optional arguments may be passed to modify the parser's behavior.
18   Please see `Customizing the Parser`_ below for details.
19
202. Gather input (a multi-line string), by reading a file or the standard
21   input::
22
23       input = sys.stdin.read()
24
253. Create a new empty `docutils.nodes.document` tree::
26
27       document = docutils.utils.new_document(source, settings)
28
29   See `docutils.utils.new_document()` for parameter details.
30
314. Run the parser, populating the document tree::
32
33       parser.parse(input, document)
34
35
36Parser Overview
37===============
38
39The reStructuredText parser is implemented as a state machine, examining its
40input one line at a time. To understand how the parser works, please first
41become familiar with the `docutils.statemachine` module, then see the
42`states` module.
43
44
45Customizing the Parser
46----------------------
47
48Anything that isn't already customizable is that way simply because that type
49of customizability hasn't been implemented yet.  Patches welcome!
50
51When instantiating an object of the `Parser` class, two parameters may be
52passed: ``rfc2822`` and ``inliner``.  Pass ``rfc2822=True`` to enable an
53initial RFC-2822 style header block, parsed as a "field_list" element (with
54"class" attribute set to "rfc2822").  Currently this is the only body-level
55element which is customizable without subclassing.  (Tip: subclass `Parser`
56and change its "state_classes" and "initial_state" attributes to refer to new
57classes. Contact the author if you need more details.)
58
59The ``inliner`` parameter takes an instance of `states.Inliner` or a subclass.
60It handles inline markup recognition.  A common extension is the addition of
61further implicit hyperlinks, like "RFC 2822".  This can be done by subclassing
62`states.Inliner`, adding a new method for the implicit markup, and adding a
63``(pattern, method)`` pair to the "implicit_dispatch" attribute of the
64subclass.  See `states.Inliner.implicit_inline()` for details.  Explicit
65inline markup can be customized in a `states.Inliner` subclass via the
66``patterns.initial`` and ``dispatch`` attributes (and new methods as
67appropriate).
68"""
69
70__docformat__ = 'reStructuredText'
71
72
73import docutils.parsers
74import docutils.statemachine
75from docutils.parsers.rst import roles, states
76from docutils import frontend, nodes, Component
77from docutils.transforms import universal
78
79
80class Parser(docutils.parsers.Parser):
81
82    """The reStructuredText parser."""
83
84    supported = ('restructuredtext', 'rst', 'rest', 'restx', 'rtxt', 'rstx')
85    """Aliases this parser supports."""
86
87    settings_spec = docutils.parsers.Parser.settings_spec + (
88        'reStructuredText Parser Options',
89        None,
90        (('Recognize and link to standalone PEP references (like "PEP 258").',
91          ['--pep-references'],
92          {'action': 'store_true', 'validator': frontend.validate_boolean}),
93         ('Base URL for PEP references '
94          '(default "http://www.python.org/dev/peps/").',
95          ['--pep-base-url'],
96          {'metavar': '<URL>', 'default': 'http://www.python.org/dev/peps/',
97           'validator': frontend.validate_url_trailing_slash}),
98         ('Template for PEP file part of URL. (default "pep-%04d")',
99          ['--pep-file-url-template'],
100          {'metavar': '<URL>', 'default': 'pep-%04d'}),
101         ('Recognize and link to standalone RFC references (like "RFC 822").',
102          ['--rfc-references'],
103          {'action': 'store_true', 'validator': frontend.validate_boolean}),
104         ('Base URL for RFC references (default "http://tools.ietf.org/html/").',
105          ['--rfc-base-url'],
106          {'metavar': '<URL>', 'default': 'http://tools.ietf.org/html/',
107           'validator': frontend.validate_url_trailing_slash}),
108         ('Set number of spaces for tab expansion (default 8).',
109          ['--tab-width'],
110          {'metavar': '<width>', 'type': 'int', 'default': 8,
111           'validator': frontend.validate_nonnegative_int}),
112         ('Remove spaces before footnote references.',
113          ['--trim-footnote-reference-space'],
114          {'action': 'store_true', 'validator': frontend.validate_boolean}),
115         ('Leave spaces before footnote references.',
116          ['--leave-footnote-reference-space'],
117          {'action': 'store_false', 'dest': 'trim_footnote_reference_space'}),
118         ('Token name set for parsing code with Pygments: one of '
119          '"long", "short", or "none" (no parsing). Default is "long".',
120          ['--syntax-highlight'],
121          {'choices': ['long', 'short', 'none'],
122           'default': 'long', 'metavar': '<format>'}),
123         ('Change straight quotation marks to typographic form: '
124          'one of "yes", "no", "alt[ernative]" (default "no").',
125          ['--smart-quotes'],
126          {'default': False, 'metavar': '<yes/no/alt>',
127           'validator': frontend.validate_ternary}),
128         ('Characters to use as "smart quotes" for <language>. ',
129          ['--smartquotes-locales'],
130          {'metavar': '<language:quotes[,language:quotes,...]>',
131           'action': 'append',
132           'validator': frontend.validate_smartquotes_locales}),
133         ('Inline markup recognized at word boundaries only '
134          '(adjacent to punctuation or whitespace). '
135          'Force character-level inline markup recognition with '
136          '"\\ " (backslash + space). Default.',
137          ['--word-level-inline-markup'],
138          {'action': 'store_false', 'dest': 'character_level_inline_markup'}),
139         ('Inline markup recognized anywhere, regardless of surrounding '
140          'characters. Backslash-escapes must be used to avoid unwanted '
141          'markup recognition. Useful for East Asian languages. '
142          'Experimental.',
143          ['--character-level-inline-markup'],
144          {'action': 'store_true', 'default': False,
145           'dest': 'character_level_inline_markup'}),
146        ))
147
148    config_section = 'restructuredtext parser'
149    config_section_dependencies = ('parsers',)
150
151    def __init__(self, rfc2822=False, inliner=None):
152        if rfc2822:
153            self.initial_state = 'RFC2822Body'
154        else:
155            self.initial_state = 'Body'
156        self.state_classes = states.state_classes
157        self.inliner = inliner
158
159    def get_transforms(self):
160        return Component.get_transforms(self) + [
161            universal.SmartQuotes]
162
163    def parse(self, inputstring, document):
164        """Parse `inputstring` and populate `document`, a document tree."""
165        self.setup_parse(inputstring, document)
166        # provide fallbacks in case the document has only generic settings
167        self.document.settings.setdefault('tab_width', 8)
168        self.document.settings.setdefault('syntax_highlight', 'long')
169        self.statemachine = states.RSTStateMachine(
170              state_classes=self.state_classes,
171              initial_state=self.initial_state,
172              debug=document.reporter.debug_flag)
173        inputlines = docutils.statemachine.string2lines(
174              inputstring, tab_width=document.settings.tab_width,
175              convert_whitespace=True)
176        for i, line in enumerate(inputlines):
177            if len(line) > self.document.settings.line_length_limit:
178                error = self.document.reporter.error(
179                            'Line %d exceeds the line-length-limit.'%(i+1))
180                self.document.append(error)
181                break
182        else:
183            self.statemachine.run(inputlines, document, inliner=self.inliner)
184        # restore the "default" default role after parsing a document
185        if '' in roles._roles:
186            del roles._roles['']
187        self.finish_parse()
188
189
190class DirectiveError(Exception):
191
192    """
193    Store a message and a system message level.
194
195    To be thrown from inside directive code.
196
197    Do not instantiate directly -- use `Directive.directive_error()`
198    instead!
199    """
200
201    def __init__(self, level, message):
202        """Set error `message` and `level`"""
203        Exception.__init__(self)
204        self.level = level
205        self.msg = message
206
207
208class Directive(object):
209
210    """
211    Base class for reStructuredText directives.
212
213    The following attributes may be set by subclasses.  They are
214    interpreted by the directive parser (which runs the directive
215    class):
216
217    - `required_arguments`: The number of required arguments (default:
218      0).
219
220    - `optional_arguments`: The number of optional arguments (default:
221      0).
222
223    - `final_argument_whitespace`: A boolean, indicating if the final
224      argument may contain whitespace (default: False).
225
226    - `option_spec`: A dictionary, mapping known option names to
227      conversion functions such as `int` or `float` (default: {}, no
228      options).  Several conversion functions are defined in the
229      directives/__init__.py module.
230
231      Option conversion functions take a single parameter, the option
232      argument (a string or ``None``), validate it and/or convert it
233      to the appropriate form.  Conversion functions may raise
234      `ValueError` and `TypeError` exceptions.
235
236    - `has_content`: A boolean; True if content is allowed.  Client
237      code must handle the case where content is required but not
238      supplied (an empty content list will be supplied).
239
240    Arguments are normally single whitespace-separated words.  The
241    final argument may contain whitespace and/or newlines if
242    `final_argument_whitespace` is True.
243
244    If the form of the arguments is more complex, specify only one
245    argument (either required or optional) and set
246    `final_argument_whitespace` to True; the client code must do any
247    context-sensitive parsing.
248
249    When a directive implementation is being run, the directive class
250    is instantiated, and the `run()` method is executed.  During
251    instantiation, the following instance variables are set:
252
253    - ``name`` is the directive type or name (string).
254
255    - ``arguments`` is the list of positional arguments (strings).
256
257    - ``options`` is a dictionary mapping option names (strings) to
258      values (type depends on option conversion functions; see
259      `option_spec` above).
260
261    - ``content`` is a list of strings, the directive content line by line.
262
263    - ``lineno`` is the absolute line number of the first line
264      of the directive.
265
266    - ``content_offset`` is the line offset of the first line of the content from
267      the beginning of the current input.  Used when initiating a nested parse.
268
269    - ``block_text`` is a string containing the entire directive.
270
271    - ``state`` is the state which called the directive function.
272
273    - ``state_machine`` is the state machine which controls the state which called
274      the directive function.
275
276    Directive functions return a list of nodes which will be inserted
277    into the document tree at the point where the directive was
278    encountered.  This can be an empty list if there is nothing to
279    insert.
280
281    For ordinary directives, the list must contain body elements or
282    structural elements.  Some directives are intended specifically
283    for substitution definitions, and must return a list of `Text`
284    nodes and/or inline elements (suitable for inline insertion, in
285    place of the substitution reference).  Such directives must verify
286    substitution definition context, typically using code like this::
287
288        if not isinstance(state, states.SubstitutionDef):
289            error = state_machine.reporter.error(
290                'Invalid context: the "%s" directive can only be used '
291                'within a substitution definition.' % (name),
292                nodes.literal_block(block_text, block_text), line=lineno)
293            return [error]
294    """
295
296    # There is a "Creating reStructuredText Directives" how-to at
297    # <http://docutils.sf.net/docs/howto/rst-directives.html>.  If you
298    # update this docstring, please update the how-to as well.
299
300    required_arguments = 0
301    """Number of required directive arguments."""
302
303    optional_arguments = 0
304    """Number of optional arguments after the required arguments."""
305
306    final_argument_whitespace = False
307    """May the final argument contain whitespace?"""
308
309    option_spec = None
310    """Mapping of option names to validator functions."""
311
312    has_content = False
313    """May the directive have content?"""
314
315    def __init__(self, name, arguments, options, content, lineno,
316                 content_offset, block_text, state, state_machine):
317        self.name = name
318        self.arguments = arguments
319        self.options = options
320        self.content = content
321        self.lineno = lineno
322        self.content_offset = content_offset
323        self.block_text = block_text
324        self.state = state
325        self.state_machine = state_machine
326
327    def run(self):
328        raise NotImplementedError('Must override run() is subclass.')
329
330    # Directive errors:
331
332    def directive_error(self, level, message):
333        """
334        Return a DirectiveError suitable for being thrown as an exception.
335
336        Call "raise self.directive_error(level, message)" from within
337        a directive implementation to return one single system message
338        at level `level`, which automatically gets the directive block
339        and the line number added.
340
341        Preferably use the `debug`, `info`, `warning`, `error`, or `severe`
342        wrapper methods, e.g. ``self.error(message)`` to generate an
343        ERROR-level directive error.
344        """
345        return DirectiveError(level, message)
346
347    def debug(self, message):
348        return self.directive_error(0, message)
349
350    def info(self, message):
351        return self.directive_error(1, message)
352
353    def warning(self, message):
354        return self.directive_error(2, message)
355
356    def error(self, message):
357        return self.directive_error(3, message)
358
359    def severe(self, message):
360        return self.directive_error(4, message)
361
362    # Convenience methods:
363
364    def assert_has_content(self):
365        """
366        Throw an ERROR-level DirectiveError if the directive doesn't
367        have contents.
368        """
369        if not self.content:
370            raise self.error('Content block expected for the "%s" directive; '
371                             'none found.' % self.name)
372
373    def add_name(self, node):
374        """Append self.options['name'] to node['names'] if it exists.
375
376        Also normalize the name string and register it as explicit target.
377        """
378        if 'name' in self.options:
379            name = nodes.fully_normalize_name(self.options.pop('name'))
380            if 'name' in node:
381                del(node['name'])
382            node['names'].append(name)
383            self.state.document.note_explicit_target(node, node)
384
385
386def convert_directive_function(directive_fn):
387    """
388    Define & return a directive class generated from `directive_fn`.
389
390    `directive_fn` uses the old-style, functional interface.
391    """
392
393    class FunctionalDirective(Directive):
394
395        option_spec = getattr(directive_fn, 'options', None)
396        has_content = getattr(directive_fn, 'content', False)
397        _argument_spec = getattr(directive_fn, 'arguments', (0, 0, False))
398        required_arguments, optional_arguments, final_argument_whitespace \
399            = _argument_spec
400
401        def run(self):
402            return directive_fn(
403                self.name, self.arguments, self.options, self.content,
404                self.lineno, self.content_offset, self.block_text,
405                self.state, self.state_machine)
406
407    # Return new-style directive.
408    return FunctionalDirective
409