1# -*- coding: utf-8 -*- #
2# Copyright 2017 Google LLC. All Rights Reserved.
3#
4# Licensed under the Apache License, Version 2.0 (the "License");
5# you may not use this file except in compliance with the License.
6# You may obtain a copy of the License at
7#
8#    http://www.apache.org/licenses/LICENSE-2.0
9#
10# Unless required by applicable law or agreed to in writing, software
11# distributed under the License is distributed on an "AS IS" BASIS,
12# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13# See the License for the specific language governing permissions and
14# limitations under the License.
15
16"""Cloud SDK markdown document token renderer.
17
18This is different from the other renderers:
19
20(1) The output is a list of (token, text) tuples returned by
21    TokenRenderer.Finish().
22(2) A token is an empty object that conveys font style and embellishment by
23    convention using the token name. Callers set up a style sheet indexed by
24    tokens to control how the embellishments are rendered, e.g. color.
25(3) The rendering is constrained by width and height.
26
27Tokens generated by this module:
28
29  Token.Markdown.Bold: bold text
30  Token.Markdown.BoldItalic: bold+italic text
31  Token.Markdown.Code: code text for command line examples
32  Token.Markdown.Definition: definition list item (flag or subcommand or choice)
33  Token.Markdown.Italic: italic text
34  Token.Markdown.Normal: normal text
35  Token.Markdown.Section: section header
36  Token.Markdown.Truncated: the last token => indicates truncation
37  Token.Markdown.Value: definition list item value (flag value)
38
39The Token objects self-define on first usage. Don't champion this pattern in the
40Cloud SDK.
41
42Usage:
43
44  from six.moves import StringIO
45
46  from googlecloudsdk.core.document_renderers import token_renderer
47  from googlecloudsdk.core.document_renderers import render_document
48
49  markdown = <markdown document string>
50  tokens = render_document.MarkdownRenderer(
51      token_renderer.TokenRenderer(width=W, height=H),
52      StringIO(markdown)).Run()
53"""
54
55from __future__ import absolute_import
56from __future__ import division
57from __future__ import unicode_literals
58
59import re
60
61from googlecloudsdk.core.console import console_attr
62from googlecloudsdk.core.document_renderers import renderer
63
64from prompt_toolkit.token import Token
65
66
67class TokenRenderer(renderer.Renderer):
68  """Renders markdown to a list of lines where each line is a list of Tokens.
69
70  Attributes:
71    _attr: console_attr.ConsoleAttr object.
72    _bullet: List of bullet characters indexed by list level modulo #bullets.
73    _compact: Compact representation if True. Saves rendering real estate.
74    _csi: The control sequence indicator character. Token does not
75      have control sequences. This renderer uses them internally to manage
76      font styles and attributes (bold, code, italic).
77    _current_token_type: current default Token.Markdown.* type
78    _fill: The number of characters in the current output line.
79    _height: The height of the output window, 0 to disable height checks.
80    _ignore_paragraph: Ignore paragraph markdown until the next non-space
81      _AddToken.
82    _ignore_width: True if the next output word should ignore _width.
83    _indent: List of left indentations in characters indexed by _level.
84    _level: The section or list level counting from 0.
85    _tokens: The list of output tokens
86    _truncated: The number of output lines exceeded the output height.
87    _rows: current rows in table
88  """
89  # Internal inline embellishments are 2 character sequences
90  # <CSI><EMBELLISHMENT>. The embellishment must be an alpha character
91  # to make the display width helpers work properly.
92  CSI = '\0'  # Won't clash with markdown text input.
93  EMBELLISHMENTS = {
94      'B': Token.Markdown.Bold,
95      'C': Token.Markdown.Code,
96      'I': Token.Markdown.Italic,
97      'N': Token.Markdown.Normal,
98      'Z': Token.Markdown.BoldItalic,
99  }
100  INDENT = 4
101  SPLIT_INDENT = 2
102  TOKEN_TYPE_INDEX = 0
103  TOKEN_TEXT_INDEX = 1
104
105  class Indent(object):
106    """Second line indent stack."""
107
108    def __init__(self, compact=True):
109      self.indent = 0 if compact else TokenRenderer.INDENT
110      self.second_line_indent = self.indent
111
112  def __init__(self, height=0, encoding='utf-8', compact=True, **kwargs):
113    super(TokenRenderer, self).__init__(**kwargs)
114    self._attr = console_attr.GetConsoleAttr(encoding=encoding)
115    self._csi = self.CSI
116    self._attr._csi = self._csi  # pylint: disable=protected-access
117    self._bullet = self._attr.GetBullets()
118    self._compact = compact
119    self._fill = 0
120    self._current_token_type = Token.Markdown.Normal
121    self._height = height
122    self._ignore_paragraph = False
123    self._ignore_width = False
124    self._indent = [self.Indent(compact)]
125    self._level = 0
126    self._lines = []
127    self._tokens = []
128    self._truncated = False
129    self._rows = []
130
131  def _Truncate(self, tokens, overflow):
132    """Injects a truncation indicator token and rejects subsequent tokens.
133
134    Args:
135      tokens: The last line of tokens at the output height. The part of the
136        line within the output width will be visible, modulo the trailing
137        truncation marker token added here.
138      overflow: If not None then this is a (word, available) tuple from Fill()
139        where word caused the line width overflow and available is the number of
140        characters available in the current line before ' '+word would be
141        appended.
142
143    Returns:
144      A possibly altered list of tokens that form the last output line.
145    """
146    self._truncated = True
147    marker_string = '...'
148    marker_width = len(marker_string)
149    marker_token = (Token.Markdown.Truncated, marker_string)
150    if tokens and overflow:
151      word, available = overflow  # pylint: disable=unpacking-non-sequence
152      if marker_width == available:
153        # Exactly enough space for the marker.
154        pass
155      elif (marker_width + 1) <= available:
156        # The marker can replace the trailing characters in the overflow word.
157        word = ' ' + self._UnFormat(word)[:available-marker_width-1]
158        tokens.append((self._current_token_type, word))
159      else:
160        # Truncate the token list so the marker token can fit.
161        truncated_tokens = []
162        available = self._width
163        for token in tokens:
164          word = token[self.TOKEN_TEXT_INDEX]
165          width = self._attr.DisplayWidth(word)
166          available -= width
167          if available <= marker_width:
168            trim = marker_width - available
169            if trim:
170              word = word[:-trim]
171            truncated_tokens.append((token[self.TOKEN_TYPE_INDEX], word))
172            break
173          truncated_tokens.append(token)
174        tokens = truncated_tokens
175    tokens.append(marker_token)
176    return tokens
177
178  def _NewLine(self, overflow=None):
179    """Adds the current token list to the line list.
180
181    Args:
182      overflow: If not None then this is a (word, available) tuple from Fill()
183        where word caused the line width overflow and available is the number of
184        characters available in the current line before ' '+word would be
185        appended.
186    """
187    tokens = self._tokens
188    self._tokens = []
189    if self._truncated or not tokens and self._compact:
190      return
191    if self._lines:
192      # Delete trailing space.
193      while (self._lines[-1] and
194             self._lines[-1][-1][self.TOKEN_TEXT_INDEX].isspace()):
195        self._lines[-1] = self._lines[-1][:-1]
196    if self._height and (len(self._lines) + int(bool(tokens))) >= self._height:
197      tokens = self._Truncate(tokens, overflow)
198    self._lines.append(tokens)
199
200  def _MergeOrAddToken(self, text, token_type):
201    """Merges text if the previous token_type matches or appends a new token."""
202    if not text:
203      return
204    if (not self._tokens or
205        self._tokens[-1][self.TOKEN_TYPE_INDEX] != token_type):
206      self._tokens.append((token_type, text))
207    elif self._tokens[-1][self.TOKEN_TYPE_INDEX] == Token.Markdown.Section:
208      # A section header with no content.
209      prv_text = self._tokens[-1][self.TOKEN_TEXT_INDEX]
210      prv_indent = re.match('( *)', prv_text).group(1)
211      new_indent = re.match('( *)', text).group(1)
212      if prv_indent == new_indent:
213        # Same indentation => discard the previous empty section.
214        self._tokens[-1] = (token_type, text)
215      else:
216        # Insert newline to separate previous header from the new one.
217        self._NewLine()
218        self._tokens.append((token_type, text))
219    else:
220      self._tokens[-1] = (token_type,
221                          self._tokens[-1][self.TOKEN_TEXT_INDEX] + text)
222
223  def _AddToken(self, text, token_type=None):
224    """Appends a (token_type, text) tuple to the current line."""
225    if text and not text.isspace():
226      self._ignore_paragraph = False
227    if not token_type:
228      token_type = self._current_token_type
229    if self._csi not in text:
230      self._MergeOrAddToken(text, token_type)
231    else:
232      i = 0
233      while True:
234        j = text.find(self._csi, i)
235        if j < 0:
236          self._MergeOrAddToken(text[i:], token_type)
237          break
238        self._MergeOrAddToken(text[i:j], token_type)
239        token_type = self.EMBELLISHMENTS[text[j + 1]]
240        self._current_token_type = token_type
241        i = j + 2
242
243  def _UnFormat(self, text):
244    """Returns text with all inline formatting stripped."""
245    if self._csi not in text:
246      return text
247    stripped = []
248    i = 0
249    while i < len(text):
250      j = text.find(self._csi, i)
251      if j < 0:
252        stripped.append(text[i:])
253        break
254      stripped.append(text[i:j])
255      i = j + 2
256    return ''.join(stripped)
257
258  def _AddDefinition(self, text):
259    """Appends a definition list definition item to the current line."""
260    text = self._UnFormat(text)
261    parts = text.split('=', 1)
262    self._AddToken(parts[0], Token.Markdown.Definition)
263    if len(parts) > 1:
264      self._AddToken('=', Token.Markdown.Normal)
265      self._AddToken(parts[1], Token.Markdown.Value)
266    self._NewLine()
267
268  def _Flush(self):
269    """Flushes the current collection of Fill() lines."""
270    self._ignore_width = False
271    if self._fill:
272      self._NewLine()
273      self.Content()
274      self._fill = 0
275
276  def _SetIndent(self, level, indent=0, second_line_indent=None):
277    """Sets the markdown list level and indentations.
278
279    Args:
280      level: int, The desired markdown list level.
281      indent: int, The new indentation.
282      second_line_indent: int, The second line indentation. This is subtracted
283        from the prevailing indent to decrease the indentation of the next input
284        line for this effect:
285            SECOND LINE INDENT ON THE NEXT LINE
286               PREVAILING INDENT
287               ON SUBSEQUENT LINES
288    """
289    if self._level < level:
290      # The level can increase by 1 or more. Loop through each so that
291      # intervening levels are handled the same.
292      while self._level < level:
293        prev_level = self._level
294        self._level += 1
295        if self._level >= len(self._indent):
296          self._indent.append(self.Indent())
297        self._indent[self._level].indent = (
298            self._indent[prev_level].indent + indent)
299        if (self._level > 1 and
300            self._indent[prev_level].second_line_indent ==
301            self._indent[prev_level].indent):
302          # Bump the indent by 1 char for nested indentation. Top level looks
303          # fine (aesthetically) without it.
304          self._indent[self._level].indent += 1
305        self._indent[self._level].second_line_indent = (
306            self._indent[self._level].indent)
307        if second_line_indent is not None:
308          # Adjust the second line indent if specified.
309          self._indent[self._level].second_line_indent -= second_line_indent
310    else:
311      # Decreasing level just sets the indent stack level, no state to clean up.
312      self._level = level
313      if second_line_indent is not None:
314        # Change second line indent on existing level.
315        self._indent[self._level].indent = (
316            self._indent[self._level].second_line_indent + second_line_indent)
317
318  def Example(self, line):
319    """Displays line as an indented example.
320
321    Args:
322      line: The example line text.
323    """
324    self._fill = self._indent[self._level].indent + self.INDENT
325    self._AddToken(' ' * self._fill + line, Token.Markdown.Normal)
326    self._NewLine()
327    self.Content()
328    self._fill = 0
329
330  def Fill(self, line):
331    """Adds a line to the output, splitting to stay within the output width.
332
333    This is close to textwrap.wrap() except that control sequence characters
334    don't count in the width computation.
335
336    Args:
337      line: The text line.
338    """
339    self.Blank()
340    for word in line.split():
341      if not self._fill:
342        if self._level or not self._compact:
343          self._fill = self._indent[self._level].indent - 1
344        else:
345          self._level = 0
346        self._AddToken(' ' * self._fill)
347      width = self._attr.DisplayWidth(word)
348      available = self._width - self._fill
349      if (width + 1) >= available and not self._ignore_width:
350        self._NewLine(overflow=(word, available))
351        self._fill = self._indent[self._level].indent
352        self._AddToken(' ' * self._fill)
353      else:
354        self._ignore_width = False
355        if self._fill:
356          self._fill += 1
357          self._AddToken(' ')
358      self._fill += width
359      self._AddToken(word)
360
361  def Finish(self):
362    """Finishes all output document rendering."""
363    self._Flush()
364    self.Font()
365    return self._lines
366
367  def Font(self, attr=None):
368    """Returns the font embellishment control sequence for attr.
369
370    Args:
371      attr: None to reset to the default font, otherwise one of renderer.BOLD,
372        renderer.ITALIC, or renderer.CODE.
373
374    Returns:
375      The font embellishment control sequence.
376    """
377    if attr is None:
378      self._font = 0
379    else:
380      mask = 1 << attr
381      self._font ^= mask
382    font = self._font & ((1 << renderer.BOLD) |
383                         (1 << renderer.CODE) |
384                         (1 << renderer.ITALIC))
385    if font & (1 << renderer.CODE):
386      embellishment = 'C'
387    elif font == ((1 << renderer.BOLD) | (1 << renderer.ITALIC)):
388      embellishment = 'Z'
389    elif font == (1 << renderer.BOLD):
390      embellishment = 'B'
391    elif font == (1 << renderer.ITALIC):
392      embellishment = 'I'
393    else:
394      embellishment = 'N'
395    return self._csi + embellishment
396
397  def Heading(self, level, heading):
398    """Renders a heading.
399
400    Args:
401      level: The heading level counting from 1.
402      heading: The heading text.
403    """
404    if level == 1 and heading.endswith('(1)'):
405      # Ignore man page TH.
406      return
407    self._Flush()
408    self.Line()
409    self.Font()
410    if level > 2:
411      indent = '  ' * (level - 2)
412      self._AddToken(indent)
413      if self._compact:
414        self._ignore_paragraph = True
415        self._fill += len(indent)
416    self._AddToken(heading, Token.Markdown.Section)
417    if self._compact:
418      self._ignore_paragraph = True
419      self._fill += self._attr.DisplayWidth(heading)
420    else:
421      self._NewLine()
422    self.Blank()
423    self._level = 0
424    self._rows = []
425
426  def Line(self):
427    """Renders a paragraph separating line."""
428    if self._ignore_paragraph:
429      return
430    self._Flush()
431    if not self.HaveBlank():
432      self.Blank()
433      self._NewLine()
434
435  def List(self, level, definition=None, end=False):
436    """Renders a bullet or definition list item.
437
438    Args:
439      level: The list nesting level, 0 if not currently in a list.
440      definition: Bullet list if None, definition list item otherwise.
441      end: End of list if True.
442    """
443    self._Flush()
444    if not level:
445      self._level = level
446    elif end:
447      # End of list.
448      self._SetIndent(level)
449    elif definition is not None:
450      # Definition list item.
451      if definition:
452        self._SetIndent(level, indent=4, second_line_indent=3)
453        self._AddToken(' ' * self._indent[level].second_line_indent)
454        self._AddDefinition(definition)
455      else:
456        self._SetIndent(level, indent=1, second_line_indent=0)
457        self.Line()
458    else:
459      # Bullet list item.
460      indent = 2 if level > 1 else 4
461      self._SetIndent(level, indent=indent, second_line_indent=2)
462      self._AddToken(' ' * self._indent[level].second_line_indent +
463                     self._bullet[(level - 1) % len(self._bullet)])
464      self._fill = self._indent[level].indent + 1
465      self._ignore_width = True
466
467  def _SkipSpace(self, line, index):
468    """Skip space characters starting at line[index].
469
470    Args:
471      line: The string.
472      index: The starting index in string.
473
474    Returns:
475      The index in line after spaces or len(line) at end of string.
476    """
477    while index < len(line):
478      c = line[index]
479      if c != ' ':
480        break
481      index += 1
482    return index
483
484  def _SkipControlSequence(self, line, index):
485    """Skip the control sequence at line[index].
486
487    Args:
488      line: The string.
489      index: The starting index in string.
490
491    Returns:
492      The index in line after the control sequence or len(line) at end of
493      string.
494    """
495    n = self._attr.GetControlSequenceLen(line[index:])
496    if not n:
497      n = 1
498    return index + n
499
500  def _SkipNest(self, line, index, open_chars='[(', close_chars=')]'):
501    """Skip a [...] nested bracket group starting at line[index].
502
503    Args:
504      line: The string.
505      index: The starting index in string.
506      open_chars: The open nesting characters.
507      close_chars: The close nesting characters.
508
509    Returns:
510      The index in line after the nesting group or len(line) at end of string.
511    """
512    nest = 0
513    while index < len(line):
514      c = line[index]
515      index += 1
516      if c in open_chars:
517        nest += 1
518      elif c in close_chars:
519        nest -= 1
520        if nest <= 0:
521          break
522      elif c == self._csi:
523        index = self._SkipControlSequence(line, index)
524    return index
525
526  def _SplitWideSynopsisGroup(self, group, indent, running_width):
527    """Splits a wide SYNOPSIS section group string._out.
528
529    Args:
530      group: The wide group string to split.
531      indent: The prevailing left indent.
532      running_width: The width of the line in progress.
533
534    Returns:
535      The running_width after the group has been split and written.
536    """
537    prev_delimiter = ' '
538    while group:
539      # Check split delimiters in order for visual emphasis.
540      for delimiter in (' | ', ' : ', ' ', ','):
541        part, _, remainder = group.partition(delimiter)
542        w = self._attr.DisplayWidth(part)
543        if ((running_width + len(prev_delimiter) + w) >= self._width or
544            prev_delimiter != ',' and delimiter == ','):
545          if delimiter != ',' and (indent +
546                                   self.SPLIT_INDENT +
547                                   len(prev_delimiter) +
548                                   w) >= self._width:
549            # The next delimiter may produce a smaller first part.
550            continue
551          if prev_delimiter == ',':
552            self._AddToken(prev_delimiter)
553            prev_delimiter = ' '
554          if running_width != indent:
555            running_width = indent + self.SPLIT_INDENT
556            self._NewLine()
557            self._AddToken(' ' * running_width)
558        self._AddToken(prev_delimiter + part)
559        running_width += len(prev_delimiter) + w
560        prev_delimiter = delimiter
561        group = remainder
562        break
563    return running_width
564
565  def Synopsis(self, line, is_synopsis=False):
566    """Renders NAME and SYNOPSIS lines as a second line indent.
567
568    Collapses adjacent spaces to one space, deletes trailing space, and doesn't
569    split top-level nested [...] or (...) groups. Also detects and does not
570    count terminal control sequences.
571
572    Args:
573      line: The NAME or SYNOPSIS text.
574      is_synopsis: if it is the synopsis section
575    """
576    # Split the line into token, token | token, and [...] groups.
577    groups = []
578    i = self._SkipSpace(line, 0)
579    beg = i
580    while i < len(line):
581      c = line[i]
582      if c == ' ':
583        end = i
584        i = self._SkipSpace(line, i)
585        if i <= (len(line) - 1) and line[i] == '|' and line[i + 1] == ' ':
586          i = self._SkipSpace(line, i + 1)
587        else:
588          groups.append(line[beg:end])
589          beg = i
590      elif c in '[(':
591        i = self._SkipNest(line, i)
592      elif c == self._csi:
593        i = self._SkipControlSequence(line, i)
594      else:
595        i += 1
596    if beg < len(line):
597      groups.append(line[beg:])
598
599    # Output the groups.
600    indent = self._indent[0].indent - 1
601    running_width = indent
602    self._AddToken(' ' * running_width)
603    indent += self.INDENT
604    for group in groups:
605      w = self._attr.DisplayWidth(group) + 1
606      if (running_width + w) >= self._width:
607        running_width = indent
608        self._NewLine()
609        self._AddToken(' ' * running_width)
610        if (running_width + w) >= self._width:
611          # The group is wider than the available width and must be split.
612          running_width = self._SplitWideSynopsisGroup(
613              group, indent, running_width)
614          continue
615      self._AddToken(' ' + group)
616      running_width += w
617    self._NewLine()
618    self._NewLine()
619
620  def TableLine(self, line, indent=0):
621    """Adds an indented table line to the output.
622
623    Args:
624      line: The line to add. A newline will be added.
625      indent: The number of characters to indent the table.
626    """
627    self._AddToken(indent * ' ' + line)
628    self._NewLine()
629