1# Human friendly input/output in Python.
2#
3# Author: Peter Odding <peter@peterodding.com>
4# Last Change: June 11, 2021
5# URL: https://humanfriendly.readthedocs.io
6
7"""
8Parsing and reformatting of usage messages.
9
10The :mod:`~humanfriendly.usage` module parses and reformats usage messages:
11
12- The :func:`format_usage()` function takes a usage message and inserts ANSI
13  escape sequences that highlight items of special significance like command
14  line options, meta variables, etc. The resulting usage message is (intended
15  to be) easier to read on a terminal.
16
17- The :func:`render_usage()` function takes a usage message and rewrites it to
18  reStructuredText_ suitable for inclusion in the documentation of a Python
19  package. This provides a DRY solution to keeping a single authoritative
20  definition of the usage message while making it easily available in
21  documentation. As a cherry on the cake it's not just a pre-formatted dump of
22  the usage message but a nicely formatted reStructuredText_ fragment.
23
24- The remaining functions in this module support the two functions above.
25
26Usage messages in general are free format of course, however the functions in
27this module assume a certain structure from usage messages in order to
28successfully parse and reformat them, refer to :func:`parse_usage()` for
29details.
30
31.. _DRY: https://en.wikipedia.org/wiki/Don%27t_repeat_yourself
32.. _reStructuredText: https://en.wikipedia.org/wiki/ReStructuredText
33"""
34
35# Standard library modules.
36import csv
37import functools
38import logging
39import re
40
41# Standard library module or external dependency (see setup.py).
42from importlib import import_module
43
44# Modules included in our package.
45from humanfriendly.compat import StringIO
46from humanfriendly.text import dedent, split_paragraphs, trim_empty_lines
47
48# Public identifiers that require documentation.
49__all__ = (
50    'find_meta_variables',
51    'format_usage',
52    'import_module',  # previously exported (backwards compatibility)
53    'inject_usage',
54    'parse_usage',
55    'render_usage',
56    'USAGE_MARKER',
57)
58
59USAGE_MARKER = "Usage:"
60"""The string that starts the first line of a usage message."""
61
62START_OF_OPTIONS_MARKER = "Supported options:"
63"""The string that marks the start of the documented command line options."""
64
65# Compiled regular expression used to tokenize usage messages.
66USAGE_PATTERN = re.compile(r'''
67    # Make sure whatever we're matching isn't preceded by a non-whitespace
68    # character.
69    (?<!\S)
70    (
71        # A short command line option or a long command line option
72        # (possibly including a meta variable for a value).
73        (-\w|--\w+(-\w+)*(=\S+)?)
74        # Or ...
75        |
76        # An environment variable.
77        \$[A-Za-z_][A-Za-z0-9_]*
78        # Or ...
79        |
80        # Might be a meta variable (usage() will figure it out).
81        [A-Z][A-Z0-9_]+
82    )
83''', re.VERBOSE)
84
85# Compiled regular expression used to recognize options.
86OPTION_PATTERN = re.compile(r'^(-\w|--\w+(-\w+)*(=\S+)?)$')
87
88# Initialize a logger for this module.
89logger = logging.getLogger(__name__)
90
91
92def format_usage(usage_text):
93    """
94    Highlight special items in a usage message.
95
96    :param usage_text: The usage message to process (a string).
97    :returns: The usage message with special items highlighted.
98
99    This function highlights the following special items:
100
101    - The initial line of the form "Usage: ..."
102    - Short and long command line options
103    - Environment variables
104    - Meta variables (see :func:`find_meta_variables()`)
105
106    All items are highlighted in the color defined by
107    :data:`.HIGHLIGHT_COLOR`.
108    """
109    # Ugly workaround to avoid circular import errors due to interdependencies
110    # between the humanfriendly.terminal and humanfriendly.usage modules.
111    from humanfriendly.terminal import ansi_wrap, HIGHLIGHT_COLOR
112    formatted_lines = []
113    meta_variables = find_meta_variables(usage_text)
114    for line in usage_text.strip().splitlines(True):
115        if line.startswith(USAGE_MARKER):
116            # Highlight the "Usage: ..." line in bold font and color.
117            formatted_lines.append(ansi_wrap(line, color=HIGHLIGHT_COLOR))
118        else:
119            # Highlight options, meta variables and environment variables.
120            formatted_lines.append(replace_special_tokens(
121                line, meta_variables,
122                lambda token: ansi_wrap(token, color=HIGHLIGHT_COLOR),
123            ))
124    return ''.join(formatted_lines)
125
126
127def find_meta_variables(usage_text):
128    """
129    Find the meta variables in the given usage message.
130
131    :param usage_text: The usage message to parse (a string).
132    :returns: A list of strings with any meta variables found in the usage
133              message.
134
135    When a command line option requires an argument, the convention is to
136    format such options as ``--option=ARG``. The text ``ARG`` in this example
137    is the meta variable.
138    """
139    meta_variables = set()
140    for match in USAGE_PATTERN.finditer(usage_text):
141        token = match.group(0)
142        if token.startswith('-'):
143            option, _, value = token.partition('=')
144            if value:
145                meta_variables.add(value)
146    return list(meta_variables)
147
148
149def parse_usage(text):
150    """
151    Parse a usage message by inferring its structure (and making some assumptions :-).
152
153    :param text: The usage message to parse (a string).
154    :returns: A tuple of two lists:
155
156              1. A list of strings with the paragraphs of the usage message's
157                 "introduction" (the paragraphs before the documentation of the
158                 supported command line options).
159
160              2. A list of strings with pairs of command line options and their
161                 descriptions: Item zero is a line listing a supported command
162                 line option, item one is the description of that command line
163                 option, item two is a line listing another supported command
164                 line option, etc.
165
166    Usage messages in general are free format of course, however
167    :func:`parse_usage()` assume a certain structure from usage messages in
168    order to successfully parse them:
169
170    - The usage message starts with a line ``Usage: ...`` that shows a symbolic
171      representation of the way the program is to be invoked.
172
173    - After some free form text a line ``Supported options:`` (surrounded by
174      empty lines) precedes the documentation of the supported command line
175      options.
176
177    - The command line options are documented as follows::
178
179        -v, --verbose
180
181          Make more noise.
182
183      So all of the variants of the command line option are shown together on a
184      separate line, followed by one or more paragraphs describing the option.
185
186    - There are several other minor assumptions, but to be honest I'm not sure if
187      anyone other than me is ever going to use this functionality, so for now I
188      won't list every intricate detail :-).
189
190      If you're curious anyway, refer to the usage message of the `humanfriendly`
191      package (defined in the :mod:`humanfriendly.cli` module) and compare it with
192      the usage message you see when you run ``humanfriendly --help`` and the
193      generated usage message embedded in the readme.
194
195      Feel free to request more detailed documentation if you're interested in
196      using the :mod:`humanfriendly.usage` module outside of the little ecosystem
197      of Python packages that I have been building over the past years.
198    """
199    introduction = []
200    documented_options = []
201    # Split the raw usage message into paragraphs.
202    paragraphs = split_paragraphs(text)
203    # Get the paragraphs that are part of the introduction.
204    while paragraphs:
205        # Check whether we've found the end of the introduction.
206        end_of_intro = (paragraphs[0] == START_OF_OPTIONS_MARKER)
207        # Append the current paragraph to the introduction.
208        introduction.append(paragraphs.pop(0))
209        # Stop after we've processed the complete introduction.
210        if end_of_intro:
211            break
212    logger.debug("Parsed introduction: %s", introduction)
213    # Parse the paragraphs that document command line options.
214    while paragraphs:
215        documented_options.append(dedent(paragraphs.pop(0)))
216        description = []
217        while paragraphs:
218            # Check if the next paragraph starts the documentation of another
219            # command line option. We split on a comma followed by a space so
220            # that our parsing doesn't trip up when the label used for an
221            # option's value contains commas.
222            tokens = [t.strip() for t in re.split(r',\s', paragraphs[0]) if t and not t.isspace()]
223            if all(OPTION_PATTERN.match(t) for t in tokens):
224                break
225            else:
226                description.append(paragraphs.pop(0))
227        # Join the description's paragraphs back together so we can remove
228        # common leading indentation.
229        documented_options.append(dedent('\n\n'.join(description)))
230    logger.debug("Parsed options: %s", documented_options)
231    return introduction, documented_options
232
233
234def render_usage(text):
235    """
236    Reformat a command line program's usage message to reStructuredText_.
237
238    :param text: The plain text usage message (a string).
239    :returns: The usage message rendered to reStructuredText_ (a string).
240    """
241    meta_variables = find_meta_variables(text)
242    introduction, options = parse_usage(text)
243    output = [render_paragraph(p, meta_variables) for p in introduction]
244    if options:
245        output.append('\n'.join([
246            '.. csv-table::',
247            '   :header: Option, Description',
248            '   :widths: 30, 70',
249            '',
250        ]))
251        csv_buffer = StringIO()
252        csv_writer = csv.writer(csv_buffer)
253        while options:
254            variants = options.pop(0)
255            description = options.pop(0)
256            csv_writer.writerow([
257                render_paragraph(variants, meta_variables),
258                ('\n\n'.join(render_paragraph(p, meta_variables) for p in split_paragraphs(description))).rstrip(),
259            ])
260        csv_lines = csv_buffer.getvalue().splitlines()
261        output.append('\n'.join('   %s' % line for line in csv_lines))
262    logger.debug("Rendered output: %s", output)
263    return '\n\n'.join(trim_empty_lines(o) for o in output)
264
265
266def inject_usage(module_name):
267    """
268    Use cog_ to inject a usage message into a reStructuredText_ file.
269
270    :param module_name: The name of the module whose ``__doc__`` attribute is
271                        the source of the usage message (a string).
272
273    This simple wrapper around :func:`render_usage()` makes it very easy to
274    inject a reformatted usage message into your documentation using cog_. To
275    use it you add a fragment like the following to your ``*.rst`` file::
276
277       .. [[[cog
278       .. from humanfriendly.usage import inject_usage
279       .. inject_usage('humanfriendly.cli')
280       .. ]]]
281       .. [[[end]]]
282
283    The lines in the fragment above are single line reStructuredText_ comments
284    that are not copied to the output. Their purpose is to instruct cog_ where
285    to inject the reformatted usage message. Once you've added these lines to
286    your ``*.rst`` file, updating the rendered usage message becomes really
287    simple thanks to cog_:
288
289    .. code-block:: sh
290
291       $ cog.py -r README.rst
292
293    This will inject or replace the rendered usage message in your
294    ``README.rst`` file with an up to date copy.
295
296    .. _cog: http://nedbatchelder.com/code/cog/
297    """
298    import cog
299    usage_text = import_module(module_name).__doc__
300    cog.out("\n" + render_usage(usage_text) + "\n\n")
301
302
303def render_paragraph(paragraph, meta_variables):
304    # Reformat the "Usage:" line to highlight "Usage:" in bold and show the
305    # remainder of the line as pre-formatted text.
306    if paragraph.startswith(USAGE_MARKER):
307        tokens = paragraph.split()
308        return "**%s** `%s`" % (tokens[0], ' '.join(tokens[1:]))
309    # Reformat the "Supported options:" line to highlight it in bold.
310    if paragraph == 'Supported options:':
311        return "**%s**" % paragraph
312    # Reformat shell transcripts into code blocks.
313    if re.match(r'^\s*\$\s+\S', paragraph):
314        # Split the paragraph into lines.
315        lines = paragraph.splitlines()
316        # Check if the paragraph is already indented.
317        if not paragraph[0].isspace():
318            # If the paragraph isn't already indented we'll indent it now.
319            lines = ['  %s' % line for line in lines]
320        lines.insert(0, '.. code-block:: sh')
321        lines.insert(1, '')
322        return "\n".join(lines)
323    # The following reformatting applies only to paragraphs which are not
324    # indented. Yes this is a hack - for now we assume that indented paragraphs
325    # are code blocks, even though this assumption can be wrong.
326    if not paragraph[0].isspace():
327        # Change UNIX style `quoting' so it doesn't trip up DocUtils.
328        paragraph = re.sub("`(.+?)'", r'"\1"', paragraph)
329        # Escape asterisks.
330        paragraph = paragraph.replace('*', r'\*')
331        # Reformat inline tokens.
332        paragraph = replace_special_tokens(
333            paragraph, meta_variables,
334            lambda token: '``%s``' % token,
335        )
336    return paragraph
337
338
339def replace_special_tokens(text, meta_variables, replace_fn):
340    return USAGE_PATTERN.sub(functools.partial(
341        replace_tokens_callback,
342        meta_variables=meta_variables,
343        replace_fn=replace_fn
344    ), text)
345
346
347def replace_tokens_callback(match, meta_variables, replace_fn):
348    token = match.group(0)
349    if not (re.match('^[A-Z][A-Z0-9_]+$', token) and token not in meta_variables):
350        token = replace_fn(token)
351    return token
352